polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile`](Dialect::transpile) to another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, FunctionBody, Null};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Token, Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_ascii_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 0,
341 0,
342 )),
343 }
344 }
345}
346
347/// Trait that each concrete SQL dialect must implement.
348///
349/// `DialectImpl` provides the configuration hooks and per-expression transform logic
350/// that distinguish one dialect from another. Implementors supply:
351///
352/// - A [`DialectType`] identifier.
353/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
354/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
355/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
356/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
357/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
358///
359/// The default implementations are no-ops, so a minimal dialect only needs to provide
360/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
361/// standard SQL.
362pub trait DialectImpl {
363 /// Returns the [`DialectType`] that identifies this dialect.
364 fn dialect_type(&self) -> DialectType;
365
366 /// Returns the tokenizer configuration for this dialect.
367 ///
368 /// Override to customize identifier quoting characters, string escape rules,
369 /// comment styles, and other lexing behavior.
370 fn tokenizer_config(&self) -> TokenizerConfig {
371 TokenizerConfig::default()
372 }
373
374 /// Returns the generator configuration for this dialect.
375 ///
376 /// Override to customize identifier quoting style, function name casing,
377 /// keyword casing, and other SQL generation behavior.
378 fn generator_config(&self) -> GeneratorConfig {
379 GeneratorConfig::default()
380 }
381
382 /// Returns a generator configuration tailored to a specific expression.
383 ///
384 /// Override this for hybrid dialects like Athena that route to different SQL engines
385 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
386 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
387 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
388 self.generator_config()
389 }
390
391 /// Transforms a single expression node for this dialect, without recursing into children.
392 ///
393 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
394 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
395 /// typically include function renaming, operator substitution, and type mapping.
396 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
397 Ok(expr)
398 }
399
400 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
401 ///
402 /// Override this to apply structural rewrites that must see the entire tree at once,
403 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
404 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
405 fn preprocess(&self, expr: Expression) -> Result<Expression> {
406 Ok(expr)
407 }
408}
409
410/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
411/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
412///
413/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
414/// and then nested element/field types are recursed into. This ensures that dialect-level
415/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
416fn transform_data_type_recursive<F>(
417 dt: crate::expressions::DataType,
418 transform_fn: &F,
419) -> Result<crate::expressions::DataType>
420where
421 F: Fn(Expression) -> Result<Expression>,
422{
423 use crate::expressions::DataType;
424 // First, transform the outermost type through the expression system
425 let dt_expr = transform_fn(Expression::DataType(dt))?;
426 let dt = match dt_expr {
427 Expression::DataType(d) => d,
428 _ => {
429 return Ok(match dt_expr {
430 _ => DataType::Custom {
431 name: "UNKNOWN".to_string(),
432 },
433 })
434 }
435 };
436 // Then recurse into nested types
437 match dt {
438 DataType::Array {
439 element_type,
440 dimension,
441 } => {
442 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
443 Ok(DataType::Array {
444 element_type: Box::new(inner),
445 dimension,
446 })
447 }
448 DataType::List { element_type } => {
449 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
450 Ok(DataType::List {
451 element_type: Box::new(inner),
452 })
453 }
454 DataType::Struct { fields, nested } => {
455 let mut new_fields = Vec::new();
456 for mut field in fields {
457 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
458 new_fields.push(field);
459 }
460 Ok(DataType::Struct {
461 fields: new_fields,
462 nested,
463 })
464 }
465 DataType::Map {
466 key_type,
467 value_type,
468 } => {
469 let k = transform_data_type_recursive(*key_type, transform_fn)?;
470 let v = transform_data_type_recursive(*value_type, transform_fn)?;
471 Ok(DataType::Map {
472 key_type: Box::new(k),
473 value_type: Box::new(v),
474 })
475 }
476 other => Ok(other),
477 }
478}
479
480/// Convert DuckDB C-style format strings to Presto C-style format strings.
481/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
482#[cfg(feature = "transpile")]
483fn duckdb_to_presto_format(fmt: &str) -> String {
484 // Order matters: handle longer patterns first to avoid partial replacements
485 let mut result = fmt.to_string();
486 // First pass: mark multi-char patterns with placeholders
487 result = result.replace("%-m", "\x01NOPADM\x01");
488 result = result.replace("%-d", "\x01NOPADD\x01");
489 result = result.replace("%-I", "\x01NOPADI\x01");
490 result = result.replace("%-H", "\x01NOPADH\x01");
491 result = result.replace("%H:%M:%S", "\x01HMS\x01");
492 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
493 // Now convert individual specifiers
494 result = result.replace("%M", "%i");
495 result = result.replace("%S", "%s");
496 // Restore multi-char patterns with Presto equivalents
497 result = result.replace("\x01NOPADM\x01", "%c");
498 result = result.replace("\x01NOPADD\x01", "%e");
499 result = result.replace("\x01NOPADI\x01", "%l");
500 result = result.replace("\x01NOPADH\x01", "%k");
501 result = result.replace("\x01HMS\x01", "%T");
502 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
503 result
504}
505
506/// Convert DuckDB C-style format strings to BigQuery format strings.
507/// BigQuery uses a mix of strftime-like directives.
508#[cfg(feature = "transpile")]
509fn duckdb_to_bigquery_format(fmt: &str) -> String {
510 let mut result = fmt.to_string();
511 // Handle longer patterns first
512 result = result.replace("%-d", "%e");
513 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
514 result = result.replace("%Y-%m-%d", "%F");
515 result = result.replace("%H:%M:%S", "%T");
516 result
517}
518
519/// Applies a transform function bottom-up through an entire expression tree.
520///
521/// This is the core tree-rewriting engine used by the dialect system. It performs
522/// a post-order (children-first) traversal: for each node, all children are recursively
523/// transformed before the node itself is passed to `transform_fn`. This bottom-up
524/// strategy means that when `transform_fn` sees a node, its children have already
525/// been rewritten, which simplifies pattern matching on sub-expressions.
526///
527/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
528/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
529/// function calls, CASE expressions, date/time functions, and more.
530///
531/// # Arguments
532///
533/// * `expr` - The root expression to transform (consumed).
534/// * `transform_fn` - A closure that receives each expression node (after its children
535/// have been transformed) and returns a possibly-rewritten expression.
536///
537/// # Errors
538///
539/// Returns an error if `transform_fn` returns an error for any node.
540pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
541where
542 F: Fn(Expression) -> Result<Expression>,
543{
544 use crate::expressions::BinaryOp;
545
546 // Helper macro to recurse into AggFunc-based expressions (this, filter, order_by, having_max, limit).
547 macro_rules! recurse_agg {
548 ($variant:ident, $f:expr) => {{
549 let mut f = $f;
550 f.this = transform_recursive(f.this, transform_fn)?;
551 if let Some(filter) = f.filter.take() {
552 f.filter = Some(transform_recursive(filter, transform_fn)?);
553 }
554 for ord in &mut f.order_by {
555 ord.this = transform_recursive(
556 std::mem::replace(&mut ord.this, Expression::Null(crate::expressions::Null)),
557 transform_fn,
558 )?;
559 }
560 if let Some((ref mut expr, _)) = f.having_max {
561 *expr = Box::new(transform_recursive(
562 std::mem::replace(expr.as_mut(), Expression::Null(crate::expressions::Null)),
563 transform_fn,
564 )?);
565 }
566 if let Some(limit) = f.limit.take() {
567 f.limit = Some(Box::new(transform_recursive(*limit, transform_fn)?));
568 }
569 Expression::$variant(f)
570 }};
571 }
572
573 // Helper macro to transform binary ops with Box<BinaryOp>
574 macro_rules! transform_binary {
575 ($variant:ident, $op:expr) => {{
576 let left = transform_recursive($op.left, transform_fn)?;
577 let right = transform_recursive($op.right, transform_fn)?;
578 Expression::$variant(Box::new(BinaryOp {
579 left,
580 right,
581 left_comments: $op.left_comments,
582 operator_comments: $op.operator_comments,
583 trailing_comments: $op.trailing_comments,
584 inferred_type: $op.inferred_type,
585 }))
586 }};
587 }
588
589 // Fast path: leaf nodes never need child traversal, apply transform directly
590 if matches!(
591 &expr,
592 Expression::Literal(_)
593 | Expression::Boolean(_)
594 | Expression::Null(_)
595 | Expression::Identifier(_)
596 | Expression::Star(_)
597 | Expression::Parameter(_)
598 | Expression::Placeholder(_)
599 | Expression::SessionParameter(_)
600 ) {
601 return transform_fn(expr);
602 }
603
604 // First recursively transform children, then apply the transform function
605 let expr = match expr {
606 Expression::Select(mut select) => {
607 select.expressions = select
608 .expressions
609 .into_iter()
610 .map(|e| transform_recursive(e, transform_fn))
611 .collect::<Result<Vec<_>>>()?;
612
613 // Transform FROM clause
614 if let Some(mut from) = select.from.take() {
615 from.expressions = from
616 .expressions
617 .into_iter()
618 .map(|e| transform_recursive(e, transform_fn))
619 .collect::<Result<Vec<_>>>()?;
620 select.from = Some(from);
621 }
622
623 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
624 select.joins = select
625 .joins
626 .into_iter()
627 .map(|mut join| {
628 join.this = transform_recursive(join.this, transform_fn)?;
629 if let Some(on) = join.on.take() {
630 join.on = Some(transform_recursive(on, transform_fn)?);
631 }
632 // Wrap join in Expression::Join to allow transform_fn to transform it
633 match transform_fn(Expression::Join(Box::new(join)))? {
634 Expression::Join(j) => Ok(*j),
635 _ => Err(crate::error::Error::parse(
636 "Join transformation returned non-join expression",
637 0,
638 0,
639 0,
640 0,
641 )),
642 }
643 })
644 .collect::<Result<Vec<_>>>()?;
645
646 // Transform LATERAL VIEW expressions (Hive/Spark)
647 select.lateral_views = select
648 .lateral_views
649 .into_iter()
650 .map(|mut lv| {
651 lv.this = transform_recursive(lv.this, transform_fn)?;
652 Ok(lv)
653 })
654 .collect::<Result<Vec<_>>>()?;
655
656 // Transform WHERE clause
657 if let Some(mut where_clause) = select.where_clause.take() {
658 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
659 select.where_clause = Some(where_clause);
660 }
661
662 // Transform GROUP BY
663 if let Some(mut group_by) = select.group_by.take() {
664 group_by.expressions = group_by
665 .expressions
666 .into_iter()
667 .map(|e| transform_recursive(e, transform_fn))
668 .collect::<Result<Vec<_>>>()?;
669 select.group_by = Some(group_by);
670 }
671
672 // Transform HAVING
673 if let Some(mut having) = select.having.take() {
674 having.this = transform_recursive(having.this, transform_fn)?;
675 select.having = Some(having);
676 }
677
678 // Transform WITH (CTEs)
679 if let Some(mut with) = select.with.take() {
680 with.ctes = with
681 .ctes
682 .into_iter()
683 .map(|mut cte| {
684 let original = cte.this.clone();
685 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
686 cte
687 })
688 .collect();
689 select.with = Some(with);
690 }
691
692 // Transform ORDER BY
693 if let Some(mut order) = select.order_by.take() {
694 order.expressions = order
695 .expressions
696 .into_iter()
697 .map(|o| {
698 let mut o = o;
699 let original = o.this.clone();
700 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
701 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
702 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
703 Ok(Expression::Ordered(transformed)) => *transformed,
704 Ok(_) | Err(_) => o,
705 }
706 })
707 .collect();
708 select.order_by = Some(order);
709 }
710
711 // Transform WINDOW clause order_by
712 if let Some(ref mut windows) = select.windows {
713 for nw in windows.iter_mut() {
714 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
715 .into_iter()
716 .map(|o| {
717 let mut o = o;
718 let original = o.this.clone();
719 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
720 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
721 Ok(Expression::Ordered(transformed)) => *transformed,
722 Ok(_) | Err(_) => o,
723 }
724 })
725 .collect();
726 }
727 }
728
729 // Transform QUALIFY
730 if let Some(mut qual) = select.qualify.take() {
731 qual.this = transform_recursive(qual.this, transform_fn)?;
732 select.qualify = Some(qual);
733 }
734
735 Expression::Select(select)
736 }
737 Expression::Function(mut f) => {
738 f.args = f
739 .args
740 .into_iter()
741 .map(|e| transform_recursive(e, transform_fn))
742 .collect::<Result<Vec<_>>>()?;
743 Expression::Function(f)
744 }
745 Expression::AggregateFunction(mut f) => {
746 f.args = f
747 .args
748 .into_iter()
749 .map(|e| transform_recursive(e, transform_fn))
750 .collect::<Result<Vec<_>>>()?;
751 if let Some(filter) = f.filter {
752 f.filter = Some(transform_recursive(filter, transform_fn)?);
753 }
754 Expression::AggregateFunction(f)
755 }
756 Expression::WindowFunction(mut wf) => {
757 wf.this = transform_recursive(wf.this, transform_fn)?;
758 wf.over.partition_by = wf
759 .over
760 .partition_by
761 .into_iter()
762 .map(|e| transform_recursive(e, transform_fn))
763 .collect::<Result<Vec<_>>>()?;
764 // Transform order_by items through Expression::Ordered wrapper
765 wf.over.order_by = wf
766 .over
767 .order_by
768 .into_iter()
769 .map(|o| {
770 let mut o = o;
771 o.this = transform_recursive(o.this, transform_fn)?;
772 match transform_fn(Expression::Ordered(Box::new(o)))? {
773 Expression::Ordered(transformed) => Ok(*transformed),
774 _ => Err(crate::error::Error::parse(
775 "Ordered transformation returned non-Ordered expression",
776 0,
777 0,
778 0,
779 0,
780 )),
781 }
782 })
783 .collect::<Result<Vec<_>>>()?;
784 Expression::WindowFunction(wf)
785 }
786 Expression::Alias(mut a) => {
787 a.this = transform_recursive(a.this, transform_fn)?;
788 Expression::Alias(a)
789 }
790 Expression::Cast(mut c) => {
791 c.this = transform_recursive(c.this, transform_fn)?;
792 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
793 c.to = transform_data_type_recursive(c.to, transform_fn)?;
794 Expression::Cast(c)
795 }
796 Expression::And(op) => transform_binary!(And, *op),
797 Expression::Or(op) => transform_binary!(Or, *op),
798 Expression::Add(op) => transform_binary!(Add, *op),
799 Expression::Sub(op) => transform_binary!(Sub, *op),
800 Expression::Mul(op) => transform_binary!(Mul, *op),
801 Expression::Div(op) => transform_binary!(Div, *op),
802 Expression::Eq(op) => transform_binary!(Eq, *op),
803 Expression::Lt(op) => transform_binary!(Lt, *op),
804 Expression::Gt(op) => transform_binary!(Gt, *op),
805 Expression::Paren(mut p) => {
806 p.this = transform_recursive(p.this, transform_fn)?;
807 Expression::Paren(p)
808 }
809 Expression::Coalesce(mut f) => {
810 f.expressions = f
811 .expressions
812 .into_iter()
813 .map(|e| transform_recursive(e, transform_fn))
814 .collect::<Result<Vec<_>>>()?;
815 Expression::Coalesce(f)
816 }
817 Expression::IfNull(mut f) => {
818 f.this = transform_recursive(f.this, transform_fn)?;
819 f.expression = transform_recursive(f.expression, transform_fn)?;
820 Expression::IfNull(f)
821 }
822 Expression::Nvl(mut f) => {
823 f.this = transform_recursive(f.this, transform_fn)?;
824 f.expression = transform_recursive(f.expression, transform_fn)?;
825 Expression::Nvl(f)
826 }
827 Expression::In(mut i) => {
828 i.this = transform_recursive(i.this, transform_fn)?;
829 i.expressions = i
830 .expressions
831 .into_iter()
832 .map(|e| transform_recursive(e, transform_fn))
833 .collect::<Result<Vec<_>>>()?;
834 if let Some(query) = i.query {
835 i.query = Some(transform_recursive(query, transform_fn)?);
836 }
837 Expression::In(i)
838 }
839 Expression::Not(mut n) => {
840 n.this = transform_recursive(n.this, transform_fn)?;
841 Expression::Not(n)
842 }
843 Expression::ArraySlice(mut s) => {
844 s.this = transform_recursive(s.this, transform_fn)?;
845 if let Some(start) = s.start {
846 s.start = Some(transform_recursive(start, transform_fn)?);
847 }
848 if let Some(end) = s.end {
849 s.end = Some(transform_recursive(end, transform_fn)?);
850 }
851 Expression::ArraySlice(s)
852 }
853 Expression::Subscript(mut s) => {
854 s.this = transform_recursive(s.this, transform_fn)?;
855 s.index = transform_recursive(s.index, transform_fn)?;
856 Expression::Subscript(s)
857 }
858 Expression::Array(mut a) => {
859 a.expressions = a
860 .expressions
861 .into_iter()
862 .map(|e| transform_recursive(e, transform_fn))
863 .collect::<Result<Vec<_>>>()?;
864 Expression::Array(a)
865 }
866 Expression::Struct(mut s) => {
867 let mut new_fields = Vec::new();
868 for (name, expr) in s.fields {
869 let transformed = transform_recursive(expr, transform_fn)?;
870 new_fields.push((name, transformed));
871 }
872 s.fields = new_fields;
873 Expression::Struct(s)
874 }
875 Expression::NamedArgument(mut na) => {
876 na.value = transform_recursive(na.value, transform_fn)?;
877 Expression::NamedArgument(na)
878 }
879 Expression::MapFunc(mut m) => {
880 m.keys = m
881 .keys
882 .into_iter()
883 .map(|e| transform_recursive(e, transform_fn))
884 .collect::<Result<Vec<_>>>()?;
885 m.values = m
886 .values
887 .into_iter()
888 .map(|e| transform_recursive(e, transform_fn))
889 .collect::<Result<Vec<_>>>()?;
890 Expression::MapFunc(m)
891 }
892 Expression::ArrayFunc(mut a) => {
893 a.expressions = a
894 .expressions
895 .into_iter()
896 .map(|e| transform_recursive(e, transform_fn))
897 .collect::<Result<Vec<_>>>()?;
898 Expression::ArrayFunc(a)
899 }
900 Expression::Lambda(mut l) => {
901 l.body = transform_recursive(l.body, transform_fn)?;
902 Expression::Lambda(l)
903 }
904 Expression::JsonExtract(mut f) => {
905 f.this = transform_recursive(f.this, transform_fn)?;
906 f.path = transform_recursive(f.path, transform_fn)?;
907 Expression::JsonExtract(f)
908 }
909 Expression::JsonExtractScalar(mut f) => {
910 f.this = transform_recursive(f.this, transform_fn)?;
911 f.path = transform_recursive(f.path, transform_fn)?;
912 Expression::JsonExtractScalar(f)
913 }
914
915 // ===== UnaryFunc-based expressions =====
916 // These all have a single `this: Expression` child
917 Expression::Length(mut f) => {
918 f.this = transform_recursive(f.this, transform_fn)?;
919 Expression::Length(f)
920 }
921 Expression::Upper(mut f) => {
922 f.this = transform_recursive(f.this, transform_fn)?;
923 Expression::Upper(f)
924 }
925 Expression::Lower(mut f) => {
926 f.this = transform_recursive(f.this, transform_fn)?;
927 Expression::Lower(f)
928 }
929 Expression::LTrim(mut f) => {
930 f.this = transform_recursive(f.this, transform_fn)?;
931 Expression::LTrim(f)
932 }
933 Expression::RTrim(mut f) => {
934 f.this = transform_recursive(f.this, transform_fn)?;
935 Expression::RTrim(f)
936 }
937 Expression::Reverse(mut f) => {
938 f.this = transform_recursive(f.this, transform_fn)?;
939 Expression::Reverse(f)
940 }
941 Expression::Abs(mut f) => {
942 f.this = transform_recursive(f.this, transform_fn)?;
943 Expression::Abs(f)
944 }
945 Expression::Ceil(mut f) => {
946 f.this = transform_recursive(f.this, transform_fn)?;
947 Expression::Ceil(f)
948 }
949 Expression::Floor(mut f) => {
950 f.this = transform_recursive(f.this, transform_fn)?;
951 Expression::Floor(f)
952 }
953 Expression::Sign(mut f) => {
954 f.this = transform_recursive(f.this, transform_fn)?;
955 Expression::Sign(f)
956 }
957 Expression::Sqrt(mut f) => {
958 f.this = transform_recursive(f.this, transform_fn)?;
959 Expression::Sqrt(f)
960 }
961 Expression::Cbrt(mut f) => {
962 f.this = transform_recursive(f.this, transform_fn)?;
963 Expression::Cbrt(f)
964 }
965 Expression::Ln(mut f) => {
966 f.this = transform_recursive(f.this, transform_fn)?;
967 Expression::Ln(f)
968 }
969 Expression::Log(mut f) => {
970 f.this = transform_recursive(f.this, transform_fn)?;
971 if let Some(base) = f.base {
972 f.base = Some(transform_recursive(base, transform_fn)?);
973 }
974 Expression::Log(f)
975 }
976 Expression::Exp(mut f) => {
977 f.this = transform_recursive(f.this, transform_fn)?;
978 Expression::Exp(f)
979 }
980 Expression::Date(mut f) => {
981 f.this = transform_recursive(f.this, transform_fn)?;
982 Expression::Date(f)
983 }
984 Expression::Stddev(f) => recurse_agg!(Stddev, f),
985 Expression::StddevSamp(f) => recurse_agg!(StddevSamp, f),
986 Expression::Variance(f) => recurse_agg!(Variance, f),
987
988 // ===== BinaryFunc-based expressions =====
989 Expression::ModFunc(mut f) => {
990 f.this = transform_recursive(f.this, transform_fn)?;
991 f.expression = transform_recursive(f.expression, transform_fn)?;
992 Expression::ModFunc(f)
993 }
994 Expression::Power(mut f) => {
995 f.this = transform_recursive(f.this, transform_fn)?;
996 f.expression = transform_recursive(f.expression, transform_fn)?;
997 Expression::Power(f)
998 }
999 Expression::MapFromArrays(mut f) => {
1000 f.this = transform_recursive(f.this, transform_fn)?;
1001 f.expression = transform_recursive(f.expression, transform_fn)?;
1002 Expression::MapFromArrays(f)
1003 }
1004 Expression::ElementAt(mut f) => {
1005 f.this = transform_recursive(f.this, transform_fn)?;
1006 f.expression = transform_recursive(f.expression, transform_fn)?;
1007 Expression::ElementAt(f)
1008 }
1009 Expression::MapContainsKey(mut f) => {
1010 f.this = transform_recursive(f.this, transform_fn)?;
1011 f.expression = transform_recursive(f.expression, transform_fn)?;
1012 Expression::MapContainsKey(f)
1013 }
1014 Expression::Left(mut f) => {
1015 f.this = transform_recursive(f.this, transform_fn)?;
1016 f.length = transform_recursive(f.length, transform_fn)?;
1017 Expression::Left(f)
1018 }
1019 Expression::Right(mut f) => {
1020 f.this = transform_recursive(f.this, transform_fn)?;
1021 f.length = transform_recursive(f.length, transform_fn)?;
1022 Expression::Right(f)
1023 }
1024 Expression::Repeat(mut f) => {
1025 f.this = transform_recursive(f.this, transform_fn)?;
1026 f.times = transform_recursive(f.times, transform_fn)?;
1027 Expression::Repeat(f)
1028 }
1029
1030 // ===== Complex function expressions =====
1031 Expression::Substring(mut f) => {
1032 f.this = transform_recursive(f.this, transform_fn)?;
1033 f.start = transform_recursive(f.start, transform_fn)?;
1034 if let Some(len) = f.length {
1035 f.length = Some(transform_recursive(len, transform_fn)?);
1036 }
1037 Expression::Substring(f)
1038 }
1039 Expression::Replace(mut f) => {
1040 f.this = transform_recursive(f.this, transform_fn)?;
1041 f.old = transform_recursive(f.old, transform_fn)?;
1042 f.new = transform_recursive(f.new, transform_fn)?;
1043 Expression::Replace(f)
1044 }
1045 Expression::ConcatWs(mut f) => {
1046 f.separator = transform_recursive(f.separator, transform_fn)?;
1047 f.expressions = f
1048 .expressions
1049 .into_iter()
1050 .map(|e| transform_recursive(e, transform_fn))
1051 .collect::<Result<Vec<_>>>()?;
1052 Expression::ConcatWs(f)
1053 }
1054 Expression::Trim(mut f) => {
1055 f.this = transform_recursive(f.this, transform_fn)?;
1056 if let Some(chars) = f.characters {
1057 f.characters = Some(transform_recursive(chars, transform_fn)?);
1058 }
1059 Expression::Trim(f)
1060 }
1061 Expression::Split(mut f) => {
1062 f.this = transform_recursive(f.this, transform_fn)?;
1063 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
1064 Expression::Split(f)
1065 }
1066 Expression::Lpad(mut f) => {
1067 f.this = transform_recursive(f.this, transform_fn)?;
1068 f.length = transform_recursive(f.length, transform_fn)?;
1069 if let Some(fill) = f.fill {
1070 f.fill = Some(transform_recursive(fill, transform_fn)?);
1071 }
1072 Expression::Lpad(f)
1073 }
1074 Expression::Rpad(mut f) => {
1075 f.this = transform_recursive(f.this, transform_fn)?;
1076 f.length = transform_recursive(f.length, transform_fn)?;
1077 if let Some(fill) = f.fill {
1078 f.fill = Some(transform_recursive(fill, transform_fn)?);
1079 }
1080 Expression::Rpad(f)
1081 }
1082
1083 // ===== Conditional expressions =====
1084 Expression::Case(mut c) => {
1085 if let Some(operand) = c.operand {
1086 c.operand = Some(transform_recursive(operand, transform_fn)?);
1087 }
1088 c.whens = c
1089 .whens
1090 .into_iter()
1091 .map(|(cond, then)| {
1092 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
1093 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
1094 (new_cond, new_then)
1095 })
1096 .collect();
1097 if let Some(else_expr) = c.else_ {
1098 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
1099 }
1100 Expression::Case(c)
1101 }
1102 Expression::IfFunc(mut f) => {
1103 f.condition = transform_recursive(f.condition, transform_fn)?;
1104 f.true_value = transform_recursive(f.true_value, transform_fn)?;
1105 if let Some(false_val) = f.false_value {
1106 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
1107 }
1108 Expression::IfFunc(f)
1109 }
1110
1111 // ===== Date/Time expressions =====
1112 Expression::DateAdd(mut f) => {
1113 f.this = transform_recursive(f.this, transform_fn)?;
1114 f.interval = transform_recursive(f.interval, transform_fn)?;
1115 Expression::DateAdd(f)
1116 }
1117 Expression::DateSub(mut f) => {
1118 f.this = transform_recursive(f.this, transform_fn)?;
1119 f.interval = transform_recursive(f.interval, transform_fn)?;
1120 Expression::DateSub(f)
1121 }
1122 Expression::DateDiff(mut f) => {
1123 f.this = transform_recursive(f.this, transform_fn)?;
1124 f.expression = transform_recursive(f.expression, transform_fn)?;
1125 Expression::DateDiff(f)
1126 }
1127 Expression::DateTrunc(mut f) => {
1128 f.this = transform_recursive(f.this, transform_fn)?;
1129 Expression::DateTrunc(f)
1130 }
1131 Expression::Extract(mut f) => {
1132 f.this = transform_recursive(f.this, transform_fn)?;
1133 Expression::Extract(f)
1134 }
1135
1136 // ===== JSON expressions =====
1137 Expression::JsonObject(mut f) => {
1138 f.pairs = f
1139 .pairs
1140 .into_iter()
1141 .map(|(k, v)| {
1142 let new_k = transform_recursive(k, transform_fn)?;
1143 let new_v = transform_recursive(v, transform_fn)?;
1144 Ok((new_k, new_v))
1145 })
1146 .collect::<Result<Vec<_>>>()?;
1147 Expression::JsonObject(f)
1148 }
1149
1150 // ===== Subquery expressions =====
1151 Expression::Subquery(mut s) => {
1152 s.this = transform_recursive(s.this, transform_fn)?;
1153 Expression::Subquery(s)
1154 }
1155 Expression::Exists(mut e) => {
1156 e.this = transform_recursive(e.this, transform_fn)?;
1157 Expression::Exists(e)
1158 }
1159 Expression::Describe(mut d) => {
1160 d.target = transform_recursive(d.target, transform_fn)?;
1161 Expression::Describe(d)
1162 }
1163
1164 // ===== Set operations =====
1165 Expression::Union(mut u) => {
1166 let left = std::mem::replace(&mut u.left, Expression::Null(Null));
1167 u.left = transform_recursive(left, transform_fn)?;
1168 let right = std::mem::replace(&mut u.right, Expression::Null(Null));
1169 u.right = transform_recursive(right, transform_fn)?;
1170 if let Some(mut with) = u.with.take() {
1171 with.ctes = with
1172 .ctes
1173 .into_iter()
1174 .map(|mut cte| {
1175 let original = cte.this.clone();
1176 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1177 cte
1178 })
1179 .collect();
1180 u.with = Some(with);
1181 }
1182 Expression::Union(u)
1183 }
1184 Expression::Intersect(mut i) => {
1185 let left = std::mem::replace(&mut i.left, Expression::Null(Null));
1186 i.left = transform_recursive(left, transform_fn)?;
1187 let right = std::mem::replace(&mut i.right, Expression::Null(Null));
1188 i.right = transform_recursive(right, transform_fn)?;
1189 if let Some(mut with) = i.with.take() {
1190 with.ctes = with
1191 .ctes
1192 .into_iter()
1193 .map(|mut cte| {
1194 let original = cte.this.clone();
1195 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1196 cte
1197 })
1198 .collect();
1199 i.with = Some(with);
1200 }
1201 Expression::Intersect(i)
1202 }
1203 Expression::Except(mut e) => {
1204 let left = std::mem::replace(&mut e.left, Expression::Null(Null));
1205 e.left = transform_recursive(left, transform_fn)?;
1206 let right = std::mem::replace(&mut e.right, Expression::Null(Null));
1207 e.right = transform_recursive(right, transform_fn)?;
1208 if let Some(mut with) = e.with.take() {
1209 with.ctes = with
1210 .ctes
1211 .into_iter()
1212 .map(|mut cte| {
1213 let original = cte.this.clone();
1214 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1215 cte
1216 })
1217 .collect();
1218 e.with = Some(with);
1219 }
1220 Expression::Except(e)
1221 }
1222
1223 // ===== DML expressions =====
1224 Expression::Insert(mut ins) => {
1225 // Transform VALUES clause expressions
1226 let mut new_values = Vec::new();
1227 for row in ins.values {
1228 let mut new_row = Vec::new();
1229 for e in row {
1230 new_row.push(transform_recursive(e, transform_fn)?);
1231 }
1232 new_values.push(new_row);
1233 }
1234 ins.values = new_values;
1235
1236 // Transform query (for INSERT ... SELECT)
1237 if let Some(query) = ins.query {
1238 ins.query = Some(transform_recursive(query, transform_fn)?);
1239 }
1240
1241 // Transform RETURNING clause
1242 let mut new_returning = Vec::new();
1243 for e in ins.returning {
1244 new_returning.push(transform_recursive(e, transform_fn)?);
1245 }
1246 ins.returning = new_returning;
1247
1248 // Transform ON CONFLICT clause
1249 if let Some(on_conflict) = ins.on_conflict {
1250 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
1251 }
1252
1253 Expression::Insert(ins)
1254 }
1255 Expression::Update(mut upd) => {
1256 upd.set = upd
1257 .set
1258 .into_iter()
1259 .map(|(id, val)| {
1260 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1261 (id, new_val)
1262 })
1263 .collect();
1264 if let Some(mut where_clause) = upd.where_clause.take() {
1265 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1266 upd.where_clause = Some(where_clause);
1267 }
1268 Expression::Update(upd)
1269 }
1270 Expression::Delete(mut del) => {
1271 if let Some(mut where_clause) = del.where_clause.take() {
1272 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1273 del.where_clause = Some(where_clause);
1274 }
1275 Expression::Delete(del)
1276 }
1277
1278 // ===== CTE expressions =====
1279 Expression::With(mut w) => {
1280 w.ctes = w
1281 .ctes
1282 .into_iter()
1283 .map(|mut cte| {
1284 let original = cte.this.clone();
1285 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1286 cte
1287 })
1288 .collect();
1289 Expression::With(w)
1290 }
1291 Expression::Cte(mut c) => {
1292 c.this = transform_recursive(c.this, transform_fn)?;
1293 Expression::Cte(c)
1294 }
1295
1296 // ===== Order expressions =====
1297 Expression::Ordered(mut o) => {
1298 o.this = transform_recursive(o.this, transform_fn)?;
1299 Expression::Ordered(o)
1300 }
1301
1302 // ===== Negation =====
1303 Expression::Neg(mut n) => {
1304 n.this = transform_recursive(n.this, transform_fn)?;
1305 Expression::Neg(n)
1306 }
1307
1308 // ===== Between =====
1309 Expression::Between(mut b) => {
1310 b.this = transform_recursive(b.this, transform_fn)?;
1311 b.low = transform_recursive(b.low, transform_fn)?;
1312 b.high = transform_recursive(b.high, transform_fn)?;
1313 Expression::Between(b)
1314 }
1315 Expression::IsNull(mut i) => {
1316 i.this = transform_recursive(i.this, transform_fn)?;
1317 Expression::IsNull(i)
1318 }
1319 Expression::IsTrue(mut i) => {
1320 i.this = transform_recursive(i.this, transform_fn)?;
1321 Expression::IsTrue(i)
1322 }
1323 Expression::IsFalse(mut i) => {
1324 i.this = transform_recursive(i.this, transform_fn)?;
1325 Expression::IsFalse(i)
1326 }
1327
1328 // ===== Like expressions =====
1329 Expression::Like(mut l) => {
1330 l.left = transform_recursive(l.left, transform_fn)?;
1331 l.right = transform_recursive(l.right, transform_fn)?;
1332 Expression::Like(l)
1333 }
1334 Expression::ILike(mut l) => {
1335 l.left = transform_recursive(l.left, transform_fn)?;
1336 l.right = transform_recursive(l.right, transform_fn)?;
1337 Expression::ILike(l)
1338 }
1339
1340 // ===== Additional binary ops not covered by macro =====
1341 Expression::Neq(op) => transform_binary!(Neq, *op),
1342 Expression::Lte(op) => transform_binary!(Lte, *op),
1343 Expression::Gte(op) => transform_binary!(Gte, *op),
1344 Expression::Mod(op) => transform_binary!(Mod, *op),
1345 Expression::Concat(op) => transform_binary!(Concat, *op),
1346 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1347 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1348 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1349 Expression::Is(op) => transform_binary!(Is, *op),
1350
1351 // ===== TryCast / SafeCast =====
1352 Expression::TryCast(mut c) => {
1353 c.this = transform_recursive(c.this, transform_fn)?;
1354 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1355 Expression::TryCast(c)
1356 }
1357 Expression::SafeCast(mut c) => {
1358 c.this = transform_recursive(c.this, transform_fn)?;
1359 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1360 Expression::SafeCast(c)
1361 }
1362
1363 // ===== Misc =====
1364 Expression::Unnest(mut f) => {
1365 f.this = transform_recursive(f.this, transform_fn)?;
1366 f.expressions = f
1367 .expressions
1368 .into_iter()
1369 .map(|e| transform_recursive(e, transform_fn))
1370 .collect::<Result<Vec<_>>>()?;
1371 Expression::Unnest(f)
1372 }
1373 Expression::Explode(mut f) => {
1374 f.this = transform_recursive(f.this, transform_fn)?;
1375 Expression::Explode(f)
1376 }
1377 Expression::GroupConcat(mut f) => {
1378 f.this = transform_recursive(f.this, transform_fn)?;
1379 Expression::GroupConcat(f)
1380 }
1381 Expression::StringAgg(mut f) => {
1382 f.this = transform_recursive(f.this, transform_fn)?;
1383 Expression::StringAgg(f)
1384 }
1385 Expression::ListAgg(mut f) => {
1386 f.this = transform_recursive(f.this, transform_fn)?;
1387 Expression::ListAgg(f)
1388 }
1389 Expression::ArrayAgg(mut f) => {
1390 f.this = transform_recursive(f.this, transform_fn)?;
1391 Expression::ArrayAgg(f)
1392 }
1393 Expression::ParseJson(mut f) => {
1394 f.this = transform_recursive(f.this, transform_fn)?;
1395 Expression::ParseJson(f)
1396 }
1397 Expression::ToJson(mut f) => {
1398 f.this = transform_recursive(f.this, transform_fn)?;
1399 Expression::ToJson(f)
1400 }
1401 Expression::JSONExtract(mut e) => {
1402 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1403 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1404 Expression::JSONExtract(e)
1405 }
1406 Expression::JSONExtractScalar(mut e) => {
1407 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1408 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1409 Expression::JSONExtractScalar(e)
1410 }
1411
1412 // StrToTime: recurse into this
1413 Expression::StrToTime(mut e) => {
1414 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1415 Expression::StrToTime(e)
1416 }
1417
1418 // UnixToTime: recurse into this
1419 Expression::UnixToTime(mut e) => {
1420 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1421 Expression::UnixToTime(e)
1422 }
1423
1424 // CreateTable: recurse into column defaults, on_update expressions, and data types
1425 Expression::CreateTable(mut ct) => {
1426 for col in &mut ct.columns {
1427 if let Some(default_expr) = col.default.take() {
1428 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1429 }
1430 if let Some(on_update_expr) = col.on_update.take() {
1431 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1432 }
1433 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1434 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1435 // contexts and may not produce correct results for DDL column definitions.
1436 // The DDL type mappings would need dedicated handling per source/target pair.
1437 }
1438 if let Some(as_select) = ct.as_select.take() {
1439 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1440 }
1441 Expression::CreateTable(ct)
1442 }
1443
1444 // CreateView: recurse into the view body query
1445 Expression::CreateView(mut cv) => {
1446 cv.query = transform_recursive(cv.query, transform_fn)?;
1447 Expression::CreateView(cv)
1448 }
1449
1450 // CreateTask: recurse into the task body
1451 Expression::CreateTask(mut ct) => {
1452 ct.body = transform_recursive(ct.body, transform_fn)?;
1453 Expression::CreateTask(ct)
1454 }
1455
1456 // CreateProcedure: recurse into body expressions
1457 Expression::CreateProcedure(mut cp) => {
1458 if let Some(body) = cp.body.take() {
1459 cp.body = Some(match body {
1460 FunctionBody::Expression(expr) => {
1461 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1462 }
1463 FunctionBody::Return(expr) => {
1464 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1465 }
1466 FunctionBody::Statements(stmts) => {
1467 let transformed_stmts = stmts
1468 .into_iter()
1469 .map(|s| transform_recursive(s, transform_fn))
1470 .collect::<Result<Vec<_>>>()?;
1471 FunctionBody::Statements(transformed_stmts)
1472 }
1473 other => other,
1474 });
1475 }
1476 Expression::CreateProcedure(cp)
1477 }
1478
1479 // CreateFunction: recurse into body expressions
1480 Expression::CreateFunction(mut cf) => {
1481 if let Some(body) = cf.body.take() {
1482 cf.body = Some(match body {
1483 FunctionBody::Expression(expr) => {
1484 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1485 }
1486 FunctionBody::Return(expr) => {
1487 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1488 }
1489 FunctionBody::Statements(stmts) => {
1490 let transformed_stmts = stmts
1491 .into_iter()
1492 .map(|s| transform_recursive(s, transform_fn))
1493 .collect::<Result<Vec<_>>>()?;
1494 FunctionBody::Statements(transformed_stmts)
1495 }
1496 other => other,
1497 });
1498 }
1499 Expression::CreateFunction(cf)
1500 }
1501
1502 // MemberOf: recurse into left and right operands
1503 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1504 // ArrayContainsAll (@>): recurse into left and right operands
1505 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1506 // ArrayContainedBy (<@): recurse into left and right operands
1507 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1508 // ArrayOverlaps (&&): recurse into left and right operands
1509 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1510 // TsMatch (@@): recurse into left and right operands
1511 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1512 // Adjacent (-|-): recurse into left and right operands
1513 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1514
1515 // Table: recurse into when (HistoricalData) and changes fields
1516 Expression::Table(mut t) => {
1517 if let Some(when) = t.when.take() {
1518 let transformed =
1519 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1520 if let Expression::HistoricalData(hd) = transformed {
1521 t.when = Some(hd);
1522 }
1523 }
1524 if let Some(changes) = t.changes.take() {
1525 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1526 if let Expression::Changes(c) = transformed {
1527 t.changes = Some(c);
1528 }
1529 }
1530 Expression::Table(t)
1531 }
1532
1533 // HistoricalData (Snowflake time travel): recurse into expression
1534 Expression::HistoricalData(mut hd) => {
1535 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1536 Expression::HistoricalData(hd)
1537 }
1538
1539 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1540 Expression::Changes(mut c) => {
1541 if let Some(at_before) = c.at_before.take() {
1542 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1543 }
1544 if let Some(end) = c.end.take() {
1545 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1546 }
1547 Expression::Changes(c)
1548 }
1549
1550 // TableArgument: TABLE(expr) or MODEL(expr)
1551 Expression::TableArgument(mut ta) => {
1552 ta.this = transform_recursive(ta.this, transform_fn)?;
1553 Expression::TableArgument(ta)
1554 }
1555
1556 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1557 Expression::JoinedTable(mut jt) => {
1558 jt.left = transform_recursive(jt.left, transform_fn)?;
1559 for join in &mut jt.joins {
1560 join.this = transform_recursive(
1561 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
1562 transform_fn,
1563 )?;
1564 if let Some(on) = join.on.take() {
1565 join.on = Some(transform_recursive(on, transform_fn)?);
1566 }
1567 }
1568 jt.lateral_views = jt
1569 .lateral_views
1570 .into_iter()
1571 .map(|mut lv| {
1572 lv.this = transform_recursive(lv.this, transform_fn)?;
1573 Ok(lv)
1574 })
1575 .collect::<Result<Vec<_>>>()?;
1576 Expression::JoinedTable(jt)
1577 }
1578
1579 // Lateral: LATERAL func() - recurse into the function expression
1580 Expression::Lateral(mut lat) => {
1581 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1582 Expression::Lateral(lat)
1583 }
1584
1585 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1586 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1587 // as a unit together with the WithinGroup wrapper
1588 Expression::WithinGroup(mut wg) => {
1589 wg.order_by = wg
1590 .order_by
1591 .into_iter()
1592 .map(|mut o| {
1593 let original = o.this.clone();
1594 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1595 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1596 Ok(Expression::Ordered(transformed)) => *transformed,
1597 Ok(_) | Err(_) => o,
1598 }
1599 })
1600 .collect();
1601 Expression::WithinGroup(wg)
1602 }
1603
1604 // Filter: recurse into both the aggregate and the filter condition
1605 Expression::Filter(mut f) => {
1606 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1607 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1608 Expression::Filter(f)
1609 }
1610
1611 // Aggregate functions (AggFunc-based): recurse into the aggregate argument,
1612 // filter, order_by, having_max, and limit.
1613 // Stddev, StddevSamp, Variance, and ArrayAgg are handled earlier in this match.
1614 Expression::Sum(f) => recurse_agg!(Sum, f),
1615 Expression::Avg(f) => recurse_agg!(Avg, f),
1616 Expression::Min(f) => recurse_agg!(Min, f),
1617 Expression::Max(f) => recurse_agg!(Max, f),
1618 Expression::CountIf(f) => recurse_agg!(CountIf, f),
1619 Expression::StddevPop(f) => recurse_agg!(StddevPop, f),
1620 Expression::VarPop(f) => recurse_agg!(VarPop, f),
1621 Expression::VarSamp(f) => recurse_agg!(VarSamp, f),
1622 Expression::Median(f) => recurse_agg!(Median, f),
1623 Expression::Mode(f) => recurse_agg!(Mode, f),
1624 Expression::First(f) => recurse_agg!(First, f),
1625 Expression::Last(f) => recurse_agg!(Last, f),
1626 Expression::AnyValue(f) => recurse_agg!(AnyValue, f),
1627 Expression::ApproxDistinct(f) => recurse_agg!(ApproxDistinct, f),
1628 Expression::ApproxCountDistinct(f) => recurse_agg!(ApproxCountDistinct, f),
1629 Expression::LogicalAnd(f) => recurse_agg!(LogicalAnd, f),
1630 Expression::LogicalOr(f) => recurse_agg!(LogicalOr, f),
1631 Expression::Skewness(f) => recurse_agg!(Skewness, f),
1632 Expression::ArrayConcatAgg(f) => recurse_agg!(ArrayConcatAgg, f),
1633 Expression::ArrayUniqueAgg(f) => recurse_agg!(ArrayUniqueAgg, f),
1634 Expression::BoolXorAgg(f) => recurse_agg!(BoolXorAgg, f),
1635 Expression::BitwiseOrAgg(f) => recurse_agg!(BitwiseOrAgg, f),
1636 Expression::BitwiseAndAgg(f) => recurse_agg!(BitwiseAndAgg, f),
1637 Expression::BitwiseXorAgg(f) => recurse_agg!(BitwiseXorAgg, f),
1638
1639 // Count has its own struct with an Option<Expression> `this` field
1640 Expression::Count(mut c) => {
1641 if let Some(this) = c.this.take() {
1642 c.this = Some(transform_recursive(this, transform_fn)?);
1643 }
1644 if let Some(filter) = c.filter.take() {
1645 c.filter = Some(transform_recursive(filter, transform_fn)?);
1646 }
1647 Expression::Count(c)
1648 }
1649
1650 Expression::PipeOperator(mut pipe) => {
1651 pipe.this = transform_recursive(pipe.this, transform_fn)?;
1652 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
1653 Expression::PipeOperator(pipe)
1654 }
1655
1656 // ArrayExcept/ArrayContains/ArrayDistinct: recurse into children
1657 Expression::ArrayExcept(mut f) => {
1658 f.this = transform_recursive(f.this, transform_fn)?;
1659 f.expression = transform_recursive(f.expression, transform_fn)?;
1660 Expression::ArrayExcept(f)
1661 }
1662 Expression::ArrayContains(mut f) => {
1663 f.this = transform_recursive(f.this, transform_fn)?;
1664 f.expression = transform_recursive(f.expression, transform_fn)?;
1665 Expression::ArrayContains(f)
1666 }
1667 Expression::ArrayDistinct(mut f) => {
1668 f.this = transform_recursive(f.this, transform_fn)?;
1669 Expression::ArrayDistinct(f)
1670 }
1671 Expression::ArrayPosition(mut f) => {
1672 f.this = transform_recursive(f.this, transform_fn)?;
1673 f.expression = transform_recursive(f.expression, transform_fn)?;
1674 Expression::ArrayPosition(f)
1675 }
1676
1677 // Pass through leaf nodes unchanged
1678 other => other,
1679 };
1680
1681 // Then apply the transform function
1682 transform_fn(expr)
1683}
1684
1685/// Returns the tokenizer config, generator config, and expression transform closure
1686/// for a built-in dialect type. This is the shared implementation used by both
1687/// `Dialect::get()` and custom dialect construction.
1688// ---------------------------------------------------------------------------
1689// Cached dialect configurations
1690// ---------------------------------------------------------------------------
1691
1692/// Pre-computed tokenizer + generator configs for a dialect, cached via `LazyLock`.
1693/// Transform closures are cheap (unit-struct method calls) and created fresh each time.
1694struct CachedDialectConfig {
1695 tokenizer_config: TokenizerConfig,
1696 generator_config: Arc<GeneratorConfig>,
1697}
1698
1699/// Declare a per-dialect `LazyLock<CachedDialectConfig>` static.
1700macro_rules! cached_dialect {
1701 ($static_name:ident, $dialect_struct:expr, $feature:literal) => {
1702 #[cfg(feature = $feature)]
1703 static $static_name: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
1704 let d = $dialect_struct;
1705 CachedDialectConfig {
1706 tokenizer_config: d.tokenizer_config(),
1707 generator_config: Arc::new(d.generator_config()),
1708 }
1709 });
1710 };
1711}
1712
1713static CACHED_GENERIC: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
1714 let d = GenericDialect;
1715 CachedDialectConfig {
1716 tokenizer_config: d.tokenizer_config(),
1717 generator_config: Arc::new(d.generator_config()),
1718 }
1719});
1720
1721cached_dialect!(CACHED_POSTGRESQL, PostgresDialect, "dialect-postgresql");
1722cached_dialect!(CACHED_MYSQL, MySQLDialect, "dialect-mysql");
1723cached_dialect!(CACHED_BIGQUERY, BigQueryDialect, "dialect-bigquery");
1724cached_dialect!(CACHED_SNOWFLAKE, SnowflakeDialect, "dialect-snowflake");
1725cached_dialect!(CACHED_DUCKDB, DuckDBDialect, "dialect-duckdb");
1726cached_dialect!(CACHED_TSQL, TSQLDialect, "dialect-tsql");
1727cached_dialect!(CACHED_ORACLE, OracleDialect, "dialect-oracle");
1728cached_dialect!(CACHED_HIVE, HiveDialect, "dialect-hive");
1729cached_dialect!(CACHED_SPARK, SparkDialect, "dialect-spark");
1730cached_dialect!(CACHED_SQLITE, SQLiteDialect, "dialect-sqlite");
1731cached_dialect!(CACHED_PRESTO, PrestoDialect, "dialect-presto");
1732cached_dialect!(CACHED_TRINO, TrinoDialect, "dialect-trino");
1733cached_dialect!(CACHED_REDSHIFT, RedshiftDialect, "dialect-redshift");
1734cached_dialect!(CACHED_CLICKHOUSE, ClickHouseDialect, "dialect-clickhouse");
1735cached_dialect!(CACHED_DATABRICKS, DatabricksDialect, "dialect-databricks");
1736cached_dialect!(CACHED_ATHENA, AthenaDialect, "dialect-athena");
1737cached_dialect!(CACHED_TERADATA, TeradataDialect, "dialect-teradata");
1738cached_dialect!(CACHED_DORIS, DorisDialect, "dialect-doris");
1739cached_dialect!(CACHED_STARROCKS, StarRocksDialect, "dialect-starrocks");
1740cached_dialect!(
1741 CACHED_MATERIALIZE,
1742 MaterializeDialect,
1743 "dialect-materialize"
1744);
1745cached_dialect!(CACHED_RISINGWAVE, RisingWaveDialect, "dialect-risingwave");
1746cached_dialect!(
1747 CACHED_SINGLESTORE,
1748 SingleStoreDialect,
1749 "dialect-singlestore"
1750);
1751cached_dialect!(
1752 CACHED_COCKROACHDB,
1753 CockroachDBDialect,
1754 "dialect-cockroachdb"
1755);
1756cached_dialect!(CACHED_TIDB, TiDBDialect, "dialect-tidb");
1757cached_dialect!(CACHED_DRUID, DruidDialect, "dialect-druid");
1758cached_dialect!(CACHED_SOLR, SolrDialect, "dialect-solr");
1759cached_dialect!(CACHED_TABLEAU, TableauDialect, "dialect-tableau");
1760cached_dialect!(CACHED_DUNE, DuneDialect, "dialect-dune");
1761cached_dialect!(CACHED_FABRIC, FabricDialect, "dialect-fabric");
1762cached_dialect!(CACHED_DRILL, DrillDialect, "dialect-drill");
1763cached_dialect!(CACHED_DREMIO, DremioDialect, "dialect-dremio");
1764cached_dialect!(CACHED_EXASOL, ExasolDialect, "dialect-exasol");
1765cached_dialect!(CACHED_DATAFUSION, DataFusionDialect, "dialect-datafusion");
1766
1767fn configs_for_dialect_type(
1768 dt: DialectType,
1769) -> (
1770 TokenizerConfig,
1771 Arc<GeneratorConfig>,
1772 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1773) {
1774 /// Clone configs from a cached static and pair with a fresh transform closure.
1775 macro_rules! from_cache {
1776 ($cache:expr, $dialect_struct:expr) => {{
1777 let c = &*$cache;
1778 (
1779 c.tokenizer_config.clone(),
1780 c.generator_config.clone(),
1781 Box::new(move |e| $dialect_struct.transform_expr(e)),
1782 )
1783 }};
1784 }
1785 match dt {
1786 #[cfg(feature = "dialect-postgresql")]
1787 DialectType::PostgreSQL => from_cache!(CACHED_POSTGRESQL, PostgresDialect),
1788 #[cfg(feature = "dialect-mysql")]
1789 DialectType::MySQL => from_cache!(CACHED_MYSQL, MySQLDialect),
1790 #[cfg(feature = "dialect-bigquery")]
1791 DialectType::BigQuery => from_cache!(CACHED_BIGQUERY, BigQueryDialect),
1792 #[cfg(feature = "dialect-snowflake")]
1793 DialectType::Snowflake => from_cache!(CACHED_SNOWFLAKE, SnowflakeDialect),
1794 #[cfg(feature = "dialect-duckdb")]
1795 DialectType::DuckDB => from_cache!(CACHED_DUCKDB, DuckDBDialect),
1796 #[cfg(feature = "dialect-tsql")]
1797 DialectType::TSQL => from_cache!(CACHED_TSQL, TSQLDialect),
1798 #[cfg(feature = "dialect-oracle")]
1799 DialectType::Oracle => from_cache!(CACHED_ORACLE, OracleDialect),
1800 #[cfg(feature = "dialect-hive")]
1801 DialectType::Hive => from_cache!(CACHED_HIVE, HiveDialect),
1802 #[cfg(feature = "dialect-spark")]
1803 DialectType::Spark => from_cache!(CACHED_SPARK, SparkDialect),
1804 #[cfg(feature = "dialect-sqlite")]
1805 DialectType::SQLite => from_cache!(CACHED_SQLITE, SQLiteDialect),
1806 #[cfg(feature = "dialect-presto")]
1807 DialectType::Presto => from_cache!(CACHED_PRESTO, PrestoDialect),
1808 #[cfg(feature = "dialect-trino")]
1809 DialectType::Trino => from_cache!(CACHED_TRINO, TrinoDialect),
1810 #[cfg(feature = "dialect-redshift")]
1811 DialectType::Redshift => from_cache!(CACHED_REDSHIFT, RedshiftDialect),
1812 #[cfg(feature = "dialect-clickhouse")]
1813 DialectType::ClickHouse => from_cache!(CACHED_CLICKHOUSE, ClickHouseDialect),
1814 #[cfg(feature = "dialect-databricks")]
1815 DialectType::Databricks => from_cache!(CACHED_DATABRICKS, DatabricksDialect),
1816 #[cfg(feature = "dialect-athena")]
1817 DialectType::Athena => from_cache!(CACHED_ATHENA, AthenaDialect),
1818 #[cfg(feature = "dialect-teradata")]
1819 DialectType::Teradata => from_cache!(CACHED_TERADATA, TeradataDialect),
1820 #[cfg(feature = "dialect-doris")]
1821 DialectType::Doris => from_cache!(CACHED_DORIS, DorisDialect),
1822 #[cfg(feature = "dialect-starrocks")]
1823 DialectType::StarRocks => from_cache!(CACHED_STARROCKS, StarRocksDialect),
1824 #[cfg(feature = "dialect-materialize")]
1825 DialectType::Materialize => from_cache!(CACHED_MATERIALIZE, MaterializeDialect),
1826 #[cfg(feature = "dialect-risingwave")]
1827 DialectType::RisingWave => from_cache!(CACHED_RISINGWAVE, RisingWaveDialect),
1828 #[cfg(feature = "dialect-singlestore")]
1829 DialectType::SingleStore => from_cache!(CACHED_SINGLESTORE, SingleStoreDialect),
1830 #[cfg(feature = "dialect-cockroachdb")]
1831 DialectType::CockroachDB => from_cache!(CACHED_COCKROACHDB, CockroachDBDialect),
1832 #[cfg(feature = "dialect-tidb")]
1833 DialectType::TiDB => from_cache!(CACHED_TIDB, TiDBDialect),
1834 #[cfg(feature = "dialect-druid")]
1835 DialectType::Druid => from_cache!(CACHED_DRUID, DruidDialect),
1836 #[cfg(feature = "dialect-solr")]
1837 DialectType::Solr => from_cache!(CACHED_SOLR, SolrDialect),
1838 #[cfg(feature = "dialect-tableau")]
1839 DialectType::Tableau => from_cache!(CACHED_TABLEAU, TableauDialect),
1840 #[cfg(feature = "dialect-dune")]
1841 DialectType::Dune => from_cache!(CACHED_DUNE, DuneDialect),
1842 #[cfg(feature = "dialect-fabric")]
1843 DialectType::Fabric => from_cache!(CACHED_FABRIC, FabricDialect),
1844 #[cfg(feature = "dialect-drill")]
1845 DialectType::Drill => from_cache!(CACHED_DRILL, DrillDialect),
1846 #[cfg(feature = "dialect-dremio")]
1847 DialectType::Dremio => from_cache!(CACHED_DREMIO, DremioDialect),
1848 #[cfg(feature = "dialect-exasol")]
1849 DialectType::Exasol => from_cache!(CACHED_EXASOL, ExasolDialect),
1850 #[cfg(feature = "dialect-datafusion")]
1851 DialectType::DataFusion => from_cache!(CACHED_DATAFUSION, DataFusionDialect),
1852 _ => from_cache!(CACHED_GENERIC, GenericDialect),
1853 }
1854}
1855
1856// ---------------------------------------------------------------------------
1857// Custom dialect registry
1858// ---------------------------------------------------------------------------
1859
1860static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1861 LazyLock::new(|| RwLock::new(HashMap::new()));
1862
1863struct CustomDialectConfig {
1864 name: String,
1865 base_dialect: DialectType,
1866 tokenizer_config: TokenizerConfig,
1867 generator_config: GeneratorConfig,
1868 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1869 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1870}
1871
1872/// Fluent builder for creating and registering custom SQL dialects.
1873///
1874/// A custom dialect is based on an existing built-in dialect and allows selective
1875/// overrides of tokenizer configuration, generator configuration, and expression
1876/// transforms.
1877///
1878/// # Example
1879///
1880/// ```rust,ignore
1881/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1882/// use polyglot_sql::generator::NormalizeFunctions;
1883///
1884/// CustomDialectBuilder::new("my_postgres")
1885/// .based_on(DialectType::PostgreSQL)
1886/// .generator_config_modifier(|gc| {
1887/// gc.normalize_functions = NormalizeFunctions::Lower;
1888/// })
1889/// .register()
1890/// .unwrap();
1891///
1892/// let d = Dialect::get_by_name("my_postgres").unwrap();
1893/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1894/// let sql = d.generate(&exprs[0]).unwrap();
1895/// assert_eq!(sql, "select count(*)");
1896///
1897/// polyglot_sql::unregister_custom_dialect("my_postgres");
1898/// ```
1899pub struct CustomDialectBuilder {
1900 name: String,
1901 base_dialect: DialectType,
1902 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1903 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1904 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1905 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1906}
1907
1908impl CustomDialectBuilder {
1909 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1910 pub fn new(name: impl Into<String>) -> Self {
1911 Self {
1912 name: name.into(),
1913 base_dialect: DialectType::Generic,
1914 tokenizer_modifier: None,
1915 generator_modifier: None,
1916 transform: None,
1917 preprocess: None,
1918 }
1919 }
1920
1921 /// Set the base built-in dialect to inherit configuration from.
1922 pub fn based_on(mut self, dialect: DialectType) -> Self {
1923 self.base_dialect = dialect;
1924 self
1925 }
1926
1927 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1928 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1929 where
1930 F: FnOnce(&mut TokenizerConfig) + 'static,
1931 {
1932 self.tokenizer_modifier = Some(Box::new(f));
1933 self
1934 }
1935
1936 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1937 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1938 where
1939 F: FnOnce(&mut GeneratorConfig) + 'static,
1940 {
1941 self.generator_modifier = Some(Box::new(f));
1942 self
1943 }
1944
1945 /// Set a custom per-node expression transform function.
1946 ///
1947 /// This replaces the base dialect's transform. It is called on every expression
1948 /// node during the recursive transform pass.
1949 pub fn transform_fn<F>(mut self, f: F) -> Self
1950 where
1951 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1952 {
1953 self.transform = Some(Arc::new(f));
1954 self
1955 }
1956
1957 /// Set a custom whole-tree preprocessing function.
1958 ///
1959 /// This replaces the base dialect's built-in preprocessing. It is called once
1960 /// on the entire expression tree before the recursive per-node transform.
1961 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1962 where
1963 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1964 {
1965 self.preprocess = Some(Arc::new(f));
1966 self
1967 }
1968
1969 /// Build the custom dialect configuration and register it in the global registry.
1970 ///
1971 /// Returns an error if:
1972 /// - The name collides with a built-in dialect name
1973 /// - A custom dialect with the same name is already registered
1974 pub fn register(self) -> Result<()> {
1975 // Reject names that collide with built-in dialects
1976 if DialectType::from_str(&self.name).is_ok() {
1977 return Err(crate::error::Error::parse(
1978 format!(
1979 "Cannot register custom dialect '{}': name collides with built-in dialect",
1980 self.name
1981 ),
1982 0,
1983 0,
1984 0,
1985 0,
1986 ));
1987 }
1988
1989 // Get base configs
1990 let (mut tok_config, arc_gen_config, _base_transform) =
1991 configs_for_dialect_type(self.base_dialect);
1992 let mut gen_config = (*arc_gen_config).clone();
1993
1994 // Apply modifiers
1995 if let Some(tok_mod) = self.tokenizer_modifier {
1996 tok_mod(&mut tok_config);
1997 }
1998 if let Some(gen_mod) = self.generator_modifier {
1999 gen_mod(&mut gen_config);
2000 }
2001
2002 let config = CustomDialectConfig {
2003 name: self.name.clone(),
2004 base_dialect: self.base_dialect,
2005 tokenizer_config: tok_config,
2006 generator_config: gen_config,
2007 transform: self.transform,
2008 preprocess: self.preprocess,
2009 };
2010
2011 register_custom_dialect(config)
2012 }
2013}
2014
2015use std::str::FromStr;
2016
2017fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
2018 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
2019 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
2020 })?;
2021
2022 if registry.contains_key(&config.name) {
2023 return Err(crate::error::Error::parse(
2024 format!("Custom dialect '{}' is already registered", config.name),
2025 0,
2026 0,
2027 0,
2028 0,
2029 ));
2030 }
2031
2032 registry.insert(config.name.clone(), Arc::new(config));
2033 Ok(())
2034}
2035
2036/// Remove a custom dialect from the global registry.
2037///
2038/// Returns `true` if a dialect with that name was found and removed,
2039/// `false` if no such custom dialect existed.
2040pub fn unregister_custom_dialect(name: &str) -> bool {
2041 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
2042 registry.remove(name).is_some()
2043 } else {
2044 false
2045 }
2046}
2047
2048fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
2049 CUSTOM_DIALECT_REGISTRY
2050 .read()
2051 .ok()
2052 .and_then(|registry| registry.get(name).cloned())
2053}
2054
2055/// Main entry point for dialect-specific SQL operations.
2056///
2057/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
2058/// transformer for a specific SQL database engine. It is the high-level API through
2059/// which callers parse, generate, transform, and transpile SQL.
2060///
2061/// # Usage
2062///
2063/// ```rust,ignore
2064/// use polyglot_sql::dialects::{Dialect, DialectType};
2065///
2066/// // Parse PostgreSQL SQL into an AST
2067/// let pg = Dialect::get(DialectType::PostgreSQL);
2068/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
2069///
2070/// // Transpile from PostgreSQL to BigQuery
2071/// let results = pg.transpile("SELECT NOW()", DialectType::BigQuery)?;
2072/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
2073/// ```
2074///
2075/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
2076/// The struct is `Send + Sync` safe so it can be shared across threads.
2077pub struct Dialect {
2078 dialect_type: DialectType,
2079 tokenizer: Tokenizer,
2080 generator_config: Arc<GeneratorConfig>,
2081 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
2082 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
2083 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
2084 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
2085 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
2086}
2087
2088/// Options for [`Dialect::transpile_with`].
2089///
2090/// Use [`TranspileOptions::default`] for defaults, then tweak the fields you need.
2091/// The struct is marked `#[non_exhaustive]` so new fields can be added without
2092/// breaking the API.
2093///
2094/// The struct derives `Serialize`/`Deserialize` using camelCase field names so
2095/// it can be round-tripped over JSON bridges (C FFI, WASM) without mapping.
2096#[derive(Debug, Clone, Default, Serialize, Deserialize)]
2097#[serde(rename_all = "camelCase", default)]
2098#[non_exhaustive]
2099pub struct TranspileOptions {
2100 /// Whether to pretty-print the output SQL.
2101 pub pretty: bool,
2102}
2103
2104impl TranspileOptions {
2105 /// Construct options with pretty-printing enabled.
2106 pub fn pretty() -> Self {
2107 Self { pretty: true }
2108 }
2109}
2110
2111/// A value that can be used as the target dialect in [`Dialect::transpile`] /
2112/// [`Dialect::transpile_with`].
2113///
2114/// Implemented for [`DialectType`] (built-in dialect enum) and `&Dialect` (any
2115/// dialect handle, including custom ones). End users do not normally need to
2116/// implement this trait themselves.
2117pub trait TranspileTarget {
2118 /// Invoke `f` with a reference to the resolved target dialect.
2119 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R;
2120}
2121
2122impl TranspileTarget for DialectType {
2123 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
2124 f(&Dialect::get(self))
2125 }
2126}
2127
2128impl TranspileTarget for &Dialect {
2129 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
2130 f(self)
2131 }
2132}
2133
2134impl Dialect {
2135 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
2136 ///
2137 /// This is the primary constructor. It initializes the tokenizer, generator config,
2138 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
2139 /// For hybrid dialects like Athena, it also sets up expression-specific generator
2140 /// config routing.
2141 pub fn get(dialect_type: DialectType) -> Self {
2142 let (tokenizer_config, generator_config, transformer) =
2143 configs_for_dialect_type(dialect_type);
2144
2145 // Set up expression-specific generator config for hybrid dialects
2146 let generator_config_for_expr: Option<
2147 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
2148 > = match dialect_type {
2149 #[cfg(feature = "dialect-athena")]
2150 DialectType::Athena => Some(Box::new(|expr| {
2151 AthenaDialect.generator_config_for_expr(expr)
2152 })),
2153 _ => None,
2154 };
2155
2156 Self {
2157 dialect_type,
2158 tokenizer: Tokenizer::new(tokenizer_config),
2159 generator_config,
2160 transformer,
2161 generator_config_for_expr,
2162 custom_preprocess: None,
2163 }
2164 }
2165
2166 /// Look up a dialect by string name.
2167 ///
2168 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
2169 /// falls back to the custom dialect registry. Returns `None` if no dialect
2170 /// with the given name exists.
2171 pub fn get_by_name(name: &str) -> Option<Self> {
2172 // Try built-in first
2173 if let Ok(dt) = DialectType::from_str(name) {
2174 return Some(Self::get(dt));
2175 }
2176
2177 // Try custom registry
2178 let config = get_custom_dialect_config(name)?;
2179 Some(Self::from_custom_config(&config))
2180 }
2181
2182 /// Construct a `Dialect` from a custom dialect configuration.
2183 fn from_custom_config(config: &CustomDialectConfig) -> Self {
2184 // Build the transformer: use custom if provided, else use base dialect's
2185 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
2186 if let Some(ref custom_transform) = config.transform {
2187 let t = Arc::clone(custom_transform);
2188 Box::new(move |e| t(e))
2189 } else {
2190 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
2191 base_transform
2192 };
2193
2194 // Build the custom preprocess: use custom if provided
2195 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
2196 config.preprocess.as_ref().map(|p| {
2197 let p = Arc::clone(p);
2198 Box::new(move |e: Expression| p(e))
2199 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
2200 });
2201
2202 Self {
2203 dialect_type: config.base_dialect,
2204 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
2205 generator_config: Arc::new(config.generator_config.clone()),
2206 transformer,
2207 generator_config_for_expr: None,
2208 custom_preprocess,
2209 }
2210 }
2211
2212 /// Get the dialect type
2213 pub fn dialect_type(&self) -> DialectType {
2214 self.dialect_type
2215 }
2216
2217 /// Get the generator configuration
2218 pub fn generator_config(&self) -> &GeneratorConfig {
2219 &self.generator_config
2220 }
2221
2222 /// Parses a SQL string into a list of [`Expression`] AST nodes.
2223 ///
2224 /// The input may contain multiple semicolon-separated statements; each one
2225 /// produces a separate element in the returned vector. Tokenization uses
2226 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
2227 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
2228 let tokens = self.tokenizer.tokenize(sql)?;
2229 let config = crate::parser::ParserConfig {
2230 dialect: Some(self.dialect_type),
2231 ..Default::default()
2232 };
2233 let mut parser = Parser::with_source(tokens, config, sql.to_string());
2234 parser.parse()
2235 }
2236
2237 /// Tokenize SQL using this dialect's tokenizer configuration.
2238 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
2239 self.tokenizer.tokenize(sql)
2240 }
2241
2242 /// Get the generator config for a specific expression (supports hybrid dialects).
2243 /// Returns an owned `GeneratorConfig` suitable for mutation before generation.
2244 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
2245 if let Some(ref config_fn) = self.generator_config_for_expr {
2246 config_fn(expr)
2247 } else {
2248 (*self.generator_config).clone()
2249 }
2250 }
2251
2252 /// Generates a SQL string from an [`Expression`] AST node.
2253 ///
2254 /// The output uses this dialect's generator configuration for identifier quoting,
2255 /// keyword casing, function name normalization, and syntax style. The result is
2256 /// a single-line (non-pretty) SQL string.
2257 pub fn generate(&self, expr: &Expression) -> Result<String> {
2258 // Fast path: when no per-expression config override, share the Arc cheaply.
2259 if self.generator_config_for_expr.is_none() {
2260 let mut generator = Generator::with_arc_config(self.generator_config.clone());
2261 return generator.generate(expr);
2262 }
2263 let config = self.get_config_for_expr(expr);
2264 let mut generator = Generator::with_config(config);
2265 generator.generate(expr)
2266 }
2267
2268 /// Generate SQL from an expression with pretty printing enabled
2269 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
2270 let mut config = self.get_config_for_expr(expr);
2271 config.pretty = true;
2272 let mut generator = Generator::with_config(config);
2273 generator.generate(expr)
2274 }
2275
2276 /// Generate SQL from an expression with source dialect info (for transpilation)
2277 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
2278 let mut config = self.get_config_for_expr(expr);
2279 config.source_dialect = Some(source);
2280 let mut generator = Generator::with_config(config);
2281 generator.generate(expr)
2282 }
2283
2284 /// Generate SQL from an expression with pretty printing and source dialect info
2285 pub fn generate_pretty_with_source(
2286 &self,
2287 expr: &Expression,
2288 source: DialectType,
2289 ) -> Result<String> {
2290 let mut config = self.get_config_for_expr(expr);
2291 config.pretty = true;
2292 config.source_dialect = Some(source);
2293 let mut generator = Generator::with_config(config);
2294 generator.generate(expr)
2295 }
2296
2297 /// Generate SQL from an expression with forced identifier quoting (identify=True)
2298 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
2299 let mut config = self.get_config_for_expr(expr);
2300 config.always_quote_identifiers = true;
2301 let mut generator = Generator::with_config(config);
2302 generator.generate(expr)
2303 }
2304
2305 /// Generate SQL from an expression with pretty printing and forced identifier quoting
2306 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
2307 let mut config = (*self.generator_config).clone();
2308 config.pretty = true;
2309 config.always_quote_identifiers = true;
2310 let mut generator = Generator::with_config(config);
2311 generator.generate(expr)
2312 }
2313
2314 /// Generate SQL from an expression with caller-specified config overrides
2315 pub fn generate_with_overrides(
2316 &self,
2317 expr: &Expression,
2318 overrides: impl FnOnce(&mut GeneratorConfig),
2319 ) -> Result<String> {
2320 let mut config = self.get_config_for_expr(expr);
2321 overrides(&mut config);
2322 let mut generator = Generator::with_config(config);
2323 generator.generate(expr)
2324 }
2325
2326 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
2327 ///
2328 /// The transformation proceeds in two phases:
2329 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
2330 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
2331 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
2332 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
2333 ///
2334 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
2335 /// and for identity transforms (normalizing SQL within the same dialect).
2336 pub fn transform(&self, expr: Expression) -> Result<Expression> {
2337 // Apply preprocessing transforms based on dialect
2338 let preprocessed = self.preprocess(expr)?;
2339 // Then apply recursive transformation
2340 transform_recursive(preprocessed, &self.transformer)
2341 }
2342
2343 /// Apply dialect-specific preprocessing transforms
2344 fn preprocess(&self, expr: Expression) -> Result<Expression> {
2345 // If a custom preprocess function is set, use it instead of the built-in logic
2346 if let Some(ref custom_preprocess) = self.custom_preprocess {
2347 return custom_preprocess(expr);
2348 }
2349
2350 #[cfg(any(
2351 feature = "dialect-mysql",
2352 feature = "dialect-postgresql",
2353 feature = "dialect-bigquery",
2354 feature = "dialect-snowflake",
2355 feature = "dialect-tsql",
2356 feature = "dialect-spark",
2357 feature = "dialect-databricks",
2358 feature = "dialect-hive",
2359 feature = "dialect-sqlite",
2360 feature = "dialect-trino",
2361 feature = "dialect-presto",
2362 feature = "dialect-duckdb",
2363 feature = "dialect-redshift",
2364 feature = "dialect-starrocks",
2365 feature = "dialect-oracle",
2366 feature = "dialect-clickhouse",
2367 ))]
2368 use crate::transforms;
2369
2370 match self.dialect_type {
2371 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
2372 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
2373 #[cfg(feature = "dialect-mysql")]
2374 DialectType::MySQL => {
2375 let expr = transforms::eliminate_qualify(expr)?;
2376 let expr = transforms::eliminate_full_outer_join(expr)?;
2377 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2378 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2379 Ok(expr)
2380 }
2381 // PostgreSQL doesn't support QUALIFY
2382 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
2383 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
2384 #[cfg(feature = "dialect-postgresql")]
2385 DialectType::PostgreSQL => {
2386 let expr = transforms::eliminate_qualify(expr)?;
2387 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2388 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
2389 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
2390 // Only normalize when sqlglot would fully parse (no body) —
2391 // sqlglot falls back to Command for complex function bodies,
2392 // preserving the original text including TO.
2393 let expr = if let Expression::CreateFunction(mut cf) = expr {
2394 if cf.body.is_none() {
2395 for opt in &mut cf.set_options {
2396 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
2397 &mut opt.value
2398 {
2399 *use_to = false;
2400 }
2401 }
2402 }
2403 Expression::CreateFunction(cf)
2404 } else {
2405 expr
2406 };
2407 Ok(expr)
2408 }
2409 // BigQuery doesn't support DISTINCT ON or CTE column aliases
2410 #[cfg(feature = "dialect-bigquery")]
2411 DialectType::BigQuery => {
2412 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2413 let expr = transforms::pushdown_cte_column_names(expr)?;
2414 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
2415 Ok(expr)
2416 }
2417 // Snowflake
2418 #[cfg(feature = "dialect-snowflake")]
2419 DialectType::Snowflake => {
2420 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2421 let expr = transforms::eliminate_window_clause(expr)?;
2422 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
2423 Ok(expr)
2424 }
2425 // TSQL doesn't support QUALIFY
2426 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
2427 // TSQL doesn't support CTEs in subqueries (hoist to top level)
2428 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
2429 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
2430 #[cfg(feature = "dialect-tsql")]
2431 DialectType::TSQL => {
2432 let expr = transforms::eliminate_qualify(expr)?;
2433 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2434 let expr = transforms::ensure_bools(expr)?;
2435 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2436 let expr = transforms::move_ctes_to_top_level(expr)?;
2437 let expr = transforms::qualify_derived_table_outputs(expr)?;
2438 Ok(expr)
2439 }
2440 // Spark doesn't support QUALIFY (but Databricks does)
2441 // Spark doesn't support CTEs in subqueries (hoist to top level)
2442 #[cfg(feature = "dialect-spark")]
2443 DialectType::Spark => {
2444 let expr = transforms::eliminate_qualify(expr)?;
2445 let expr = transforms::add_auto_table_alias(expr)?;
2446 let expr = transforms::simplify_nested_paren_values(expr)?;
2447 let expr = transforms::move_ctes_to_top_level(expr)?;
2448 Ok(expr)
2449 }
2450 // Databricks supports QUALIFY natively
2451 // Databricks doesn't support CTEs in subqueries (hoist to top level)
2452 #[cfg(feature = "dialect-databricks")]
2453 DialectType::Databricks => {
2454 let expr = transforms::add_auto_table_alias(expr)?;
2455 let expr = transforms::simplify_nested_paren_values(expr)?;
2456 let expr = transforms::move_ctes_to_top_level(expr)?;
2457 Ok(expr)
2458 }
2459 // Hive doesn't support QUALIFY or CTEs in subqueries
2460 #[cfg(feature = "dialect-hive")]
2461 DialectType::Hive => {
2462 let expr = transforms::eliminate_qualify(expr)?;
2463 let expr = transforms::move_ctes_to_top_level(expr)?;
2464 Ok(expr)
2465 }
2466 // SQLite doesn't support QUALIFY
2467 #[cfg(feature = "dialect-sqlite")]
2468 DialectType::SQLite => {
2469 let expr = transforms::eliminate_qualify(expr)?;
2470 Ok(expr)
2471 }
2472 // Trino doesn't support QUALIFY
2473 #[cfg(feature = "dialect-trino")]
2474 DialectType::Trino => {
2475 let expr = transforms::eliminate_qualify(expr)?;
2476 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
2477 Ok(expr)
2478 }
2479 // Presto doesn't support QUALIFY or WINDOW clause
2480 #[cfg(feature = "dialect-presto")]
2481 DialectType::Presto => {
2482 let expr = transforms::eliminate_qualify(expr)?;
2483 let expr = transforms::eliminate_window_clause(expr)?;
2484 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
2485 Ok(expr)
2486 }
2487 // DuckDB supports QUALIFY - no elimination needed
2488 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
2489 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
2490 #[cfg(feature = "dialect-duckdb")]
2491 DialectType::DuckDB => {
2492 let expr = transforms::expand_posexplode_duckdb(expr)?;
2493 let expr = transforms::expand_like_any(expr)?;
2494 Ok(expr)
2495 }
2496 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
2497 #[cfg(feature = "dialect-redshift")]
2498 DialectType::Redshift => {
2499 let expr = transforms::eliminate_qualify(expr)?;
2500 let expr = transforms::eliminate_window_clause(expr)?;
2501 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2502 Ok(expr)
2503 }
2504 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
2505 #[cfg(feature = "dialect-starrocks")]
2506 DialectType::StarRocks => {
2507 let expr = transforms::eliminate_qualify(expr)?;
2508 let expr = transforms::expand_between_in_delete(expr)?;
2509 Ok(expr)
2510 }
2511 // DataFusion supports QUALIFY and semi/anti joins natively
2512 #[cfg(feature = "dialect-datafusion")]
2513 DialectType::DataFusion => Ok(expr),
2514 // Oracle doesn't support QUALIFY
2515 #[cfg(feature = "dialect-oracle")]
2516 DialectType::Oracle => {
2517 let expr = transforms::eliminate_qualify(expr)?;
2518 Ok(expr)
2519 }
2520 // Drill - no special preprocessing needed
2521 #[cfg(feature = "dialect-drill")]
2522 DialectType::Drill => Ok(expr),
2523 // Teradata - no special preprocessing needed
2524 #[cfg(feature = "dialect-teradata")]
2525 DialectType::Teradata => Ok(expr),
2526 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
2527 #[cfg(feature = "dialect-clickhouse")]
2528 DialectType::ClickHouse => {
2529 let expr = transforms::no_limit_order_by_union(expr)?;
2530 Ok(expr)
2531 }
2532 // Other dialects - no preprocessing
2533 _ => Ok(expr),
2534 }
2535 }
2536
2537 /// Transpile SQL from this dialect to the given target dialect.
2538 ///
2539 /// The target may be specified as either a built-in [`DialectType`] enum variant
2540 /// or as a reference to a [`Dialect`] handle (built-in or custom). Both work:
2541 ///
2542 /// ```rust,ignore
2543 /// let pg = Dialect::get(DialectType::PostgreSQL);
2544 /// pg.transpile("SELECT NOW()", DialectType::BigQuery)?; // enum
2545 /// pg.transpile("SELECT NOW()", &custom_dialect)?; // handle
2546 /// ```
2547 ///
2548 /// For pretty-printing or other options, use [`transpile_with`](Self::transpile_with).
2549 pub fn transpile<T: TranspileTarget>(&self, sql: &str, target: T) -> Result<Vec<String>> {
2550 self.transpile_with(sql, target, TranspileOptions::default())
2551 }
2552
2553 /// Transpile SQL with configurable [`TranspileOptions`] (e.g. pretty-printing).
2554 pub fn transpile_with<T: TranspileTarget>(
2555 &self,
2556 sql: &str,
2557 target: T,
2558 opts: TranspileOptions,
2559 ) -> Result<Vec<String>> {
2560 target.with_dialect(|td| self.transpile_inner(sql, td, opts.pretty))
2561 }
2562
2563 #[cfg(not(feature = "transpile"))]
2564 fn transpile_inner(
2565 &self,
2566 sql: &str,
2567 target_dialect: &Dialect,
2568 pretty: bool,
2569 ) -> Result<Vec<String>> {
2570 let target = target_dialect.dialect_type;
2571 // Without the transpile feature, only same-dialect or to/from generic is supported
2572 if self.dialect_type != target
2573 && self.dialect_type != DialectType::Generic
2574 && target != DialectType::Generic
2575 {
2576 return Err(crate::error::Error::parse(
2577 "Cross-dialect transpilation not available in this build",
2578 0,
2579 0,
2580 0,
2581 0,
2582 ));
2583 }
2584
2585 let expressions = self.parse(sql)?;
2586 let generic_identity =
2587 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2588
2589 if generic_identity {
2590 return expressions
2591 .into_iter()
2592 .map(|expr| {
2593 if pretty {
2594 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2595 } else {
2596 target_dialect.generate_with_source(&expr, self.dialect_type)
2597 }
2598 })
2599 .collect();
2600 }
2601
2602 expressions
2603 .into_iter()
2604 .map(|expr| {
2605 let transformed = target_dialect.transform(expr)?;
2606 if pretty {
2607 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
2608 } else {
2609 target_dialect.generate_with_source(&transformed, self.dialect_type)
2610 }
2611 })
2612 .collect()
2613 }
2614
2615 #[cfg(feature = "transpile")]
2616 fn transpile_inner(
2617 &self,
2618 sql: &str,
2619 target_dialect: &Dialect,
2620 pretty: bool,
2621 ) -> Result<Vec<String>> {
2622 let target = target_dialect.dialect_type;
2623 let expressions = self.parse(sql)?;
2624 let generic_identity =
2625 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2626
2627 if generic_identity {
2628 return expressions
2629 .into_iter()
2630 .map(|expr| {
2631 if pretty {
2632 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2633 } else {
2634 target_dialect.generate_with_source(&expr, self.dialect_type)
2635 }
2636 })
2637 .collect();
2638 }
2639
2640 expressions
2641 .into_iter()
2642 .map(|expr| {
2643 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
2644 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
2645 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
2646 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
2647 use crate::expressions::DataType as DT;
2648 transform_recursive(expr, &|e| match e {
2649 Expression::DataType(DT::VarChar { .. }) => {
2650 Ok(Expression::DataType(DT::Text))
2651 }
2652 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
2653 _ => Ok(e),
2654 })?
2655 } else {
2656 expr
2657 };
2658
2659 // When source and target differ, first normalize the source dialect's
2660 // AST constructs to standard SQL, so that the target dialect can handle them.
2661 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
2662 let normalized =
2663 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
2664 self.transform(expr)?
2665 } else {
2666 expr
2667 };
2668
2669 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
2670 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
2671 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
2672 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
2673 let normalized =
2674 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2675 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2676 {
2677 transform_recursive(normalized, &|e| {
2678 if let Expression::Function(ref f) = e {
2679 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
2680 // Check if first arg is JSON_QUERY and second is JSON_VALUE
2681 if let (
2682 Expression::Function(ref jq),
2683 Expression::Function(ref jv),
2684 ) = (&f.args[0], &f.args[1])
2685 {
2686 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
2687 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
2688 {
2689 // Unwrap: return just JSON_QUERY(...)
2690 return Ok(f.args[0].clone());
2691 }
2692 }
2693 }
2694 }
2695 Ok(e)
2696 })?
2697 } else {
2698 normalized
2699 };
2700
2701 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
2702 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
2703 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
2704 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2705 && !matches!(target, DialectType::Snowflake)
2706 {
2707 transform_recursive(normalized, &|e| {
2708 if let Expression::Function(ref f) = e {
2709 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
2710 return Ok(Expression::Localtime(Box::new(
2711 crate::expressions::Localtime { this: None },
2712 )));
2713 }
2714 }
2715 Ok(e)
2716 })?
2717 } else {
2718 normalized
2719 };
2720
2721 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
2722 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
2723 // transform. DuckDB requires the count argument to be BIGINT.
2724 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2725 && matches!(target, DialectType::DuckDB)
2726 {
2727 transform_recursive(normalized, &|e| {
2728 if let Expression::Function(ref f) = e {
2729 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
2730 // Check if first arg is space string literal
2731 if let Expression::Literal(ref lit) = f.args[0] {
2732 if let crate::expressions::Literal::String(ref s) = lit.as_ref()
2733 {
2734 if s == " " {
2735 // Wrap second arg in CAST(... AS BIGINT) if not already
2736 if !matches!(f.args[1], Expression::Cast(_)) {
2737 let mut new_args = f.args.clone();
2738 new_args[1] = Expression::Cast(Box::new(
2739 crate::expressions::Cast {
2740 this: new_args[1].clone(),
2741 to: crate::expressions::DataType::BigInt {
2742 length: None,
2743 },
2744 trailing_comments: Vec::new(),
2745 double_colon_syntax: false,
2746 format: None,
2747 default: None,
2748 inferred_type: None,
2749 },
2750 ));
2751 return Ok(Expression::Function(Box::new(
2752 crate::expressions::Function {
2753 name: f.name.clone(),
2754 args: new_args,
2755 distinct: f.distinct,
2756 trailing_comments: f
2757 .trailing_comments
2758 .clone(),
2759 use_bracket_syntax: f.use_bracket_syntax,
2760 no_parens: f.no_parens,
2761 quoted: f.quoted,
2762 span: None,
2763 inferred_type: None,
2764 },
2765 )));
2766 }
2767 }
2768 }
2769 }
2770 }
2771 }
2772 Ok(e)
2773 })?
2774 } else {
2775 normalized
2776 };
2777
2778 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
2779 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
2780 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2781 && !matches!(target, DialectType::BigQuery)
2782 {
2783 crate::transforms::propagate_struct_field_names(normalized)?
2784 } else {
2785 normalized
2786 };
2787
2788 // Snowflake source to DuckDB target: RANDOM()/RANDOM(seed) -> scaled RANDOM()
2789 // Snowflake RANDOM() returns integer in [-2^63, 2^63-1], DuckDB RANDOM() returns float [0, 1)
2790 // Skip RANDOM inside UNIFORM/NORMAL/ZIPF/RANDSTR generator args since those
2791 // functions handle their generator args differently (as float seeds).
2792 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2793 && matches!(target, DialectType::DuckDB)
2794 {
2795 fn make_scaled_random() -> Expression {
2796 let lower =
2797 Expression::Literal(Box::new(crate::expressions::Literal::Number(
2798 "-9.223372036854776E+18".to_string(),
2799 )));
2800 let upper =
2801 Expression::Literal(Box::new(crate::expressions::Literal::Number(
2802 "9.223372036854776e+18".to_string(),
2803 )));
2804 let random_call = Expression::Random(crate::expressions::Random);
2805 let range_size = Expression::Paren(Box::new(crate::expressions::Paren {
2806 this: Expression::Sub(Box::new(crate::expressions::BinaryOp {
2807 left: upper,
2808 right: lower.clone(),
2809 left_comments: vec![],
2810 operator_comments: vec![],
2811 trailing_comments: vec![],
2812 inferred_type: None,
2813 })),
2814 trailing_comments: vec![],
2815 }));
2816 let scaled = Expression::Mul(Box::new(crate::expressions::BinaryOp {
2817 left: random_call,
2818 right: range_size,
2819 left_comments: vec![],
2820 operator_comments: vec![],
2821 trailing_comments: vec![],
2822 inferred_type: None,
2823 }));
2824 let shifted = Expression::Add(Box::new(crate::expressions::BinaryOp {
2825 left: lower,
2826 right: scaled,
2827 left_comments: vec![],
2828 operator_comments: vec![],
2829 trailing_comments: vec![],
2830 inferred_type: None,
2831 }));
2832 Expression::Cast(Box::new(crate::expressions::Cast {
2833 this: shifted,
2834 to: crate::expressions::DataType::BigInt { length: None },
2835 trailing_comments: vec![],
2836 double_colon_syntax: false,
2837 format: None,
2838 default: None,
2839 inferred_type: None,
2840 }))
2841 }
2842
2843 // Pre-process: protect seeded RANDOM(seed) inside UNIFORM/NORMAL/ZIPF/RANDSTR
2844 // by converting Rand{seed: Some(s)} to Function{name:"RANDOM", args:[s]}.
2845 // This prevents transform_recursive (which is bottom-up) from expanding
2846 // seeded RANDOM into make_scaled_random() and losing the seed value.
2847 // Unseeded RANDOM()/Rand{seed:None} is left as-is so it gets expanded
2848 // and then un-expanded back to Expression::Random by the code below.
2849 let normalized = transform_recursive(normalized, &|e| {
2850 if let Expression::Function(ref f) = e {
2851 let n = f.name.to_ascii_uppercase();
2852 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" || n == "RANDSTR" {
2853 if let Expression::Function(mut f) = e {
2854 for arg in f.args.iter_mut() {
2855 if let Expression::Rand(ref r) = arg {
2856 if r.lower.is_none() && r.upper.is_none() {
2857 if let Some(ref seed) = r.seed {
2858 // Convert Rand{seed: Some(s)} to Function("RANDOM", [s])
2859 // so it won't be expanded by the RANDOM expansion below
2860 *arg = Expression::Function(Box::new(
2861 crate::expressions::Function::new(
2862 "RANDOM".to_string(),
2863 vec![*seed.clone()],
2864 ),
2865 ));
2866 }
2867 }
2868 }
2869 }
2870 return Ok(Expression::Function(f));
2871 }
2872 }
2873 }
2874 Ok(e)
2875 })?;
2876
2877 // transform_recursive processes bottom-up, so RANDOM() (unseeded) inside
2878 // generator functions (UNIFORM, NORMAL, ZIPF) gets expanded before
2879 // we see the parent. We detect this and undo the expansion by replacing
2880 // the expanded pattern back with Expression::Random.
2881 // Seeded RANDOM(seed) was already protected above as Function("RANDOM", [seed]).
2882 // Note: RANDSTR is NOT included here — it needs the expanded form for unseeded
2883 // RANDOM() since the DuckDB handler uses the expanded SQL as-is in the hash.
2884 transform_recursive(normalized, &|e| {
2885 if let Expression::Function(ref f) = e {
2886 let n = f.name.to_ascii_uppercase();
2887 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" {
2888 if let Expression::Function(mut f) = e {
2889 for arg in f.args.iter_mut() {
2890 // Detect expanded RANDOM pattern: CAST(-9.22... + RANDOM() * (...) AS BIGINT)
2891 if let Expression::Cast(ref cast) = arg {
2892 if matches!(
2893 cast.to,
2894 crate::expressions::DataType::BigInt { .. }
2895 ) {
2896 if let Expression::Add(ref add) = cast.this {
2897 if let Expression::Literal(ref lit) = add.left {
2898 if let crate::expressions::Literal::Number(
2899 ref num,
2900 ) = lit.as_ref()
2901 {
2902 if num == "-9.223372036854776E+18" {
2903 *arg = Expression::Random(
2904 crate::expressions::Random,
2905 );
2906 }
2907 }
2908 }
2909 }
2910 }
2911 }
2912 }
2913 return Ok(Expression::Function(f));
2914 }
2915 return Ok(e);
2916 }
2917 }
2918 match e {
2919 Expression::Random(_) => Ok(make_scaled_random()),
2920 // Rand(seed) with no bounds: drop seed and expand
2921 // (DuckDB RANDOM doesn't support seeds)
2922 Expression::Rand(ref r) if r.lower.is_none() && r.upper.is_none() => {
2923 Ok(make_scaled_random())
2924 }
2925 _ => Ok(e),
2926 }
2927 })?
2928 } else {
2929 normalized
2930 };
2931
2932 // Apply cross-dialect semantic normalizations
2933 let normalized =
2934 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
2935
2936 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
2937 // (SELECT UNNEST(..., max_depth => 2)) subquery
2938 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
2939 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2940 && matches!(target, DialectType::DuckDB)
2941 {
2942 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
2943 } else {
2944 normalized
2945 };
2946
2947 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
2948 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
2949 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2950 && matches!(
2951 target,
2952 DialectType::DuckDB
2953 | DialectType::Presto
2954 | DialectType::Trino
2955 | DialectType::Athena
2956 | DialectType::Spark
2957 | DialectType::Databricks
2958 ) {
2959 crate::transforms::unnest_alias_to_column_alias(normalized)?
2960 } else if matches!(self.dialect_type, DialectType::BigQuery)
2961 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
2962 {
2963 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
2964 // but don't convert alias format (no _t0 wrapper)
2965 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
2966 // For Redshift: strip UNNEST when arg is a column reference path
2967 if matches!(target, DialectType::Redshift) {
2968 crate::transforms::strip_unnest_column_refs(result)?
2969 } else {
2970 result
2971 }
2972 } else {
2973 normalized
2974 };
2975
2976 // For Presto/Trino targets from PostgreSQL/Redshift source:
2977 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
2978 let normalized = if matches!(
2979 self.dialect_type,
2980 DialectType::PostgreSQL | DialectType::Redshift
2981 ) && matches!(
2982 target,
2983 DialectType::Presto | DialectType::Trino | DialectType::Athena
2984 ) {
2985 crate::transforms::wrap_unnest_join_aliases(normalized)?
2986 } else {
2987 normalized
2988 };
2989
2990 // Eliminate DISTINCT ON with target-dialect awareness
2991 // This must happen after source transform (which may produce DISTINCT ON)
2992 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
2993 let normalized =
2994 crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
2995
2996 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
2997 let normalized = if matches!(target, DialectType::Snowflake) {
2998 Self::transform_generate_date_array_snowflake(normalized)?
2999 } else {
3000 normalized
3001 };
3002
3003 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
3004 let normalized = if matches!(
3005 target,
3006 DialectType::Spark | DialectType::Databricks | DialectType::Hive
3007 ) {
3008 crate::transforms::unnest_to_explode_select(normalized)?
3009 } else {
3010 normalized
3011 };
3012
3013 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
3014 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
3015 crate::transforms::no_limit_order_by_union(normalized)?
3016 } else {
3017 normalized
3018 };
3019
3020 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
3021 // Python sqlglot does this in the TSQL generator, but we can't do it there
3022 // because it would break TSQL -> TSQL identity
3023 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
3024 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
3025 {
3026 transform_recursive(normalized, &|e| {
3027 if let Expression::Count(ref c) = e {
3028 // Build COUNT_BIG(...) as an AggregateFunction
3029 let args = if c.star {
3030 vec![Expression::Star(crate::expressions::Star {
3031 table: None,
3032 except: None,
3033 replace: None,
3034 rename: None,
3035 trailing_comments: Vec::new(),
3036 span: None,
3037 })]
3038 } else if let Some(ref this) = c.this {
3039 vec![this.clone()]
3040 } else {
3041 vec![]
3042 };
3043 Ok(Expression::AggregateFunction(Box::new(
3044 crate::expressions::AggregateFunction {
3045 name: "COUNT_BIG".to_string(),
3046 args,
3047 distinct: c.distinct,
3048 filter: c.filter.clone(),
3049 order_by: Vec::new(),
3050 limit: None,
3051 ignore_nulls: None,
3052 inferred_type: None,
3053 },
3054 )))
3055 } else {
3056 Ok(e)
3057 }
3058 })?
3059 } else {
3060 normalized
3061 };
3062
3063 let transformed = target_dialect.transform(normalized)?;
3064
3065 // DuckDB target: when FROM is RANGE(n), replace SEQ's ROW_NUMBER pattern with `range`
3066 let transformed = if matches!(target, DialectType::DuckDB) {
3067 Self::seq_rownum_to_range(transformed)?
3068 } else {
3069 transformed
3070 };
3071
3072 let mut sql = if pretty {
3073 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
3074 } else {
3075 target_dialect.generate_with_source(&transformed, self.dialect_type)?
3076 };
3077
3078 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
3079 if pretty && target == DialectType::Snowflake {
3080 sql = Self::normalize_snowflake_pretty(sql);
3081 }
3082
3083 Ok(sql)
3084 })
3085 .collect()
3086 }
3087}
3088
3089// Transpile-only methods: cross-dialect normalization and helpers
3090#[cfg(feature = "transpile")]
3091impl Dialect {
3092 /// For DuckDB target: when FROM clause contains RANGE(n), replace
3093 /// `(ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1)` with `range` in select expressions.
3094 /// This handles SEQ1/2/4/8 → RANGE transpilation from Snowflake.
3095 fn seq_rownum_to_range(expr: Expression) -> Result<Expression> {
3096 if let Expression::Select(mut select) = expr {
3097 // Check if FROM contains a RANGE function
3098 let has_range_from = if let Some(ref from) = select.from {
3099 from.expressions.iter().any(|e| {
3100 // Check for direct RANGE(...) or aliased RANGE(...)
3101 match e {
3102 Expression::Function(f) => f.name.eq_ignore_ascii_case("RANGE"),
3103 Expression::Alias(a) => {
3104 matches!(&a.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("RANGE"))
3105 }
3106 _ => false,
3107 }
3108 })
3109 } else {
3110 false
3111 };
3112
3113 if has_range_from {
3114 // Replace the ROW_NUMBER pattern in select expressions
3115 select.expressions = select
3116 .expressions
3117 .into_iter()
3118 .map(|e| Self::replace_rownum_with_range(e))
3119 .collect();
3120 }
3121
3122 Ok(Expression::Select(select))
3123 } else {
3124 Ok(expr)
3125 }
3126 }
3127
3128 /// Replace `(ROW_NUMBER() OVER (...) - 1)` with `range` column reference
3129 fn replace_rownum_with_range(expr: Expression) -> Expression {
3130 match expr {
3131 // Match: (ROW_NUMBER() OVER (...) - 1) % N → range % N
3132 Expression::Mod(op) => {
3133 let new_left = Self::try_replace_rownum_paren(&op.left);
3134 Expression::Mod(Box::new(crate::expressions::BinaryOp {
3135 left: new_left,
3136 right: op.right,
3137 left_comments: op.left_comments,
3138 operator_comments: op.operator_comments,
3139 trailing_comments: op.trailing_comments,
3140 inferred_type: op.inferred_type,
3141 }))
3142 }
3143 // Match: (CASE WHEN (ROW...) % N >= ... THEN ... ELSE ... END)
3144 Expression::Paren(p) => {
3145 let inner = Self::replace_rownum_with_range(p.this);
3146 Expression::Paren(Box::new(crate::expressions::Paren {
3147 this: inner,
3148 trailing_comments: p.trailing_comments,
3149 }))
3150 }
3151 Expression::Case(mut c) => {
3152 // Replace ROW_NUMBER in WHEN conditions and THEN expressions
3153 c.whens = c
3154 .whens
3155 .into_iter()
3156 .map(|(cond, then)| {
3157 (
3158 Self::replace_rownum_with_range(cond),
3159 Self::replace_rownum_with_range(then),
3160 )
3161 })
3162 .collect();
3163 if let Some(else_) = c.else_ {
3164 c.else_ = Some(Self::replace_rownum_with_range(else_));
3165 }
3166 Expression::Case(c)
3167 }
3168 Expression::Gte(op) => Expression::Gte(Box::new(crate::expressions::BinaryOp {
3169 left: Self::replace_rownum_with_range(op.left),
3170 right: op.right,
3171 left_comments: op.left_comments,
3172 operator_comments: op.operator_comments,
3173 trailing_comments: op.trailing_comments,
3174 inferred_type: op.inferred_type,
3175 })),
3176 Expression::Sub(op) => Expression::Sub(Box::new(crate::expressions::BinaryOp {
3177 left: Self::replace_rownum_with_range(op.left),
3178 right: op.right,
3179 left_comments: op.left_comments,
3180 operator_comments: op.operator_comments,
3181 trailing_comments: op.trailing_comments,
3182 inferred_type: op.inferred_type,
3183 })),
3184 Expression::Alias(mut a) => {
3185 a.this = Self::replace_rownum_with_range(a.this);
3186 Expression::Alias(a)
3187 }
3188 other => other,
3189 }
3190 }
3191
3192 /// Check if an expression is `(ROW_NUMBER() OVER (...) - 1)` and replace with `range`
3193 fn try_replace_rownum_paren(expr: &Expression) -> Expression {
3194 if let Expression::Paren(ref p) = expr {
3195 if let Expression::Sub(ref sub) = p.this {
3196 if let Expression::WindowFunction(ref wf) = sub.left {
3197 if let Expression::Function(ref f) = wf.this {
3198 if f.name.eq_ignore_ascii_case("ROW_NUMBER") {
3199 if let Expression::Literal(ref lit) = sub.right {
3200 if let crate::expressions::Literal::Number(ref n) = lit.as_ref() {
3201 if n == "1" {
3202 return Expression::column("range");
3203 }
3204 }
3205 }
3206 }
3207 }
3208 }
3209 }
3210 }
3211 expr.clone()
3212 }
3213
3214 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
3215 /// Converts:
3216 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
3217 /// To:
3218 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
3219 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)) AS _t0(seq, key, path, index, alias, this)
3220 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
3221 use crate::expressions::*;
3222 transform_recursive(expr, &|e| {
3223 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
3224 if let Expression::ArraySize(ref af) = e {
3225 if let Expression::Function(ref f) = af.this {
3226 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
3227 let result = Self::convert_array_size_gda_snowflake(f)?;
3228 return Ok(result);
3229 }
3230 }
3231 }
3232
3233 let Expression::Select(mut sel) = e else {
3234 return Ok(e);
3235 };
3236
3237 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
3238 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
3239 let mut gda_join_idx: Option<usize> = None;
3240
3241 for (idx, join) in sel.joins.iter().enumerate() {
3242 // The join.this may be:
3243 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
3244 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
3245 let (unnest_ref, alias_name) = match &join.this {
3246 Expression::Unnest(ref unnest) => {
3247 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
3248 (Some(unnest.as_ref()), alias)
3249 }
3250 Expression::Alias(ref a) => {
3251 if let Expression::Unnest(ref unnest) = a.this {
3252 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
3253 } else {
3254 (None, None)
3255 }
3256 }
3257 _ => (None, None),
3258 };
3259
3260 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
3261 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
3262 if let Expression::Function(ref f) = unnest.this {
3263 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
3264 let start_expr = f.args[0].clone();
3265 let end_expr = f.args[1].clone();
3266 let step = f.args.get(2).cloned();
3267
3268 // Extract unit from step interval
3269 let unit = if let Some(Expression::Interval(ref iv)) = step {
3270 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3271 Some(format!("{:?}", unit).to_ascii_uppercase())
3272 } else if let Some(ref this) = iv.this {
3273 // The interval may be stored as a string like "1 MONTH"
3274 if let Expression::Literal(lit) = this {
3275 if let Literal::String(ref s) = lit.as_ref() {
3276 let parts: Vec<&str> = s.split_whitespace().collect();
3277 if parts.len() == 2 {
3278 Some(parts[1].to_ascii_uppercase())
3279 } else if parts.len() == 1 {
3280 // Single word like "MONTH" or just "1"
3281 let upper = parts[0].to_ascii_uppercase();
3282 if matches!(
3283 upper.as_str(),
3284 "YEAR"
3285 | "QUARTER"
3286 | "MONTH"
3287 | "WEEK"
3288 | "DAY"
3289 | "HOUR"
3290 | "MINUTE"
3291 | "SECOND"
3292 ) {
3293 Some(upper)
3294 } else {
3295 None
3296 }
3297 } else {
3298 None
3299 }
3300 } else {
3301 None
3302 }
3303 } else {
3304 None
3305 }
3306 } else {
3307 None
3308 }
3309 } else {
3310 None
3311 };
3312
3313 if let Some(unit_str) = unit {
3314 gda_info = Some((alias, start_expr, end_expr, unit_str));
3315 gda_join_idx = Some(idx);
3316 }
3317 }
3318 }
3319 }
3320 if gda_info.is_some() {
3321 break;
3322 }
3323 }
3324
3325 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
3326 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
3327 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
3328 let result = Self::try_transform_from_gda_snowflake(sel);
3329 return result;
3330 };
3331 let join_idx = gda_join_idx.unwrap();
3332
3333 // Build ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)
3334 // ARRAY_GENERATE_RANGE uses exclusive end, and we need DATEDIFF + 1 values
3335 // (inclusive date range), so the exclusive end is DATEDIFF + 1.
3336 let datediff = Expression::Function(Box::new(Function::new(
3337 "DATEDIFF".to_string(),
3338 vec![
3339 Expression::boxed_column(Column {
3340 name: Identifier::new(&unit_str),
3341 table: None,
3342 join_mark: false,
3343 trailing_comments: vec![],
3344 span: None,
3345 inferred_type: None,
3346 }),
3347 start_expr.clone(),
3348 end_expr.clone(),
3349 ],
3350 )));
3351 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
3352 left: datediff,
3353 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3354 left_comments: vec![],
3355 operator_comments: vec![],
3356 trailing_comments: vec![],
3357 inferred_type: None,
3358 }));
3359
3360 let array_gen_range = Expression::Function(Box::new(Function::new(
3361 "ARRAY_GENERATE_RANGE".to_string(),
3362 vec![
3363 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
3364 datediff_plus_one,
3365 ],
3366 )));
3367
3368 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
3369 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3370 name: Identifier::new("INPUT"),
3371 value: array_gen_range,
3372 separator: crate::expressions::NamedArgSeparator::DArrow,
3373 }));
3374 let flatten = Expression::Function(Box::new(Function::new(
3375 "FLATTEN".to_string(),
3376 vec![flatten_input],
3377 )));
3378
3379 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
3380 let alias_table = Alias {
3381 this: flatten,
3382 alias: Identifier::new("_t0"),
3383 column_aliases: vec![
3384 Identifier::new("seq"),
3385 Identifier::new("key"),
3386 Identifier::new("path"),
3387 Identifier::new("index"),
3388 Identifier::new(&alias_name),
3389 Identifier::new("this"),
3390 ],
3391 pre_alias_comments: vec![],
3392 trailing_comments: vec![],
3393 inferred_type: None,
3394 };
3395 let lateral_expr = Expression::Lateral(Box::new(Lateral {
3396 this: Box::new(Expression::Alias(Box::new(alias_table))),
3397 view: None,
3398 outer: None,
3399 alias: None,
3400 alias_quoted: false,
3401 cross_apply: None,
3402 ordinality: None,
3403 column_aliases: vec![],
3404 }));
3405
3406 // Remove the original join and add to FROM expressions
3407 sel.joins.remove(join_idx);
3408 if let Some(ref mut from) = sel.from {
3409 from.expressions.push(lateral_expr);
3410 }
3411
3412 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
3413 let dateadd_expr = Expression::Function(Box::new(Function::new(
3414 "DATEADD".to_string(),
3415 vec![
3416 Expression::boxed_column(Column {
3417 name: Identifier::new(&unit_str),
3418 table: None,
3419 join_mark: false,
3420 trailing_comments: vec![],
3421 span: None,
3422 inferred_type: None,
3423 }),
3424 Expression::Cast(Box::new(Cast {
3425 this: Expression::boxed_column(Column {
3426 name: Identifier::new(&alias_name),
3427 table: None,
3428 join_mark: false,
3429 trailing_comments: vec![],
3430 span: None,
3431 inferred_type: None,
3432 }),
3433 to: DataType::Int {
3434 length: None,
3435 integer_spelling: false,
3436 },
3437 trailing_comments: vec![],
3438 double_colon_syntax: false,
3439 format: None,
3440 default: None,
3441 inferred_type: None,
3442 })),
3443 Expression::Cast(Box::new(Cast {
3444 this: start_expr.clone(),
3445 to: DataType::Date,
3446 trailing_comments: vec![],
3447 double_colon_syntax: false,
3448 format: None,
3449 default: None,
3450 inferred_type: None,
3451 })),
3452 ],
3453 )));
3454
3455 // Replace references to the alias in the SELECT list
3456 let new_exprs: Vec<Expression> = sel
3457 .expressions
3458 .iter()
3459 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
3460 .collect();
3461 sel.expressions = new_exprs;
3462
3463 Ok(Expression::Select(sel))
3464 })
3465 }
3466
3467 /// Helper: replace column references to `alias_name` with dateadd expression
3468 fn replace_column_ref_with_dateadd(
3469 expr: &Expression,
3470 alias_name: &str,
3471 dateadd: &Expression,
3472 ) -> Expression {
3473 use crate::expressions::*;
3474 match expr {
3475 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
3476 // Plain column reference -> DATEADD(...) AS alias_name
3477 Expression::Alias(Box::new(Alias {
3478 this: dateadd.clone(),
3479 alias: Identifier::new(alias_name),
3480 column_aliases: vec![],
3481 pre_alias_comments: vec![],
3482 trailing_comments: vec![],
3483 inferred_type: None,
3484 }))
3485 }
3486 Expression::Alias(a) => {
3487 // Check if the inner expression references the alias
3488 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
3489 Expression::Alias(Box::new(Alias {
3490 this: new_this,
3491 alias: a.alias.clone(),
3492 column_aliases: a.column_aliases.clone(),
3493 pre_alias_comments: a.pre_alias_comments.clone(),
3494 trailing_comments: a.trailing_comments.clone(),
3495 inferred_type: None,
3496 }))
3497 }
3498 _ => expr.clone(),
3499 }
3500 }
3501
3502 /// Helper: replace column references in inner expression (not top-level)
3503 fn replace_column_ref_inner(
3504 expr: &Expression,
3505 alias_name: &str,
3506 dateadd: &Expression,
3507 ) -> Expression {
3508 use crate::expressions::*;
3509 match expr {
3510 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
3511 dateadd.clone()
3512 }
3513 Expression::Add(op) => {
3514 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3515 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3516 Expression::Add(Box::new(BinaryOp {
3517 left,
3518 right,
3519 left_comments: op.left_comments.clone(),
3520 operator_comments: op.operator_comments.clone(),
3521 trailing_comments: op.trailing_comments.clone(),
3522 inferred_type: None,
3523 }))
3524 }
3525 Expression::Sub(op) => {
3526 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3527 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3528 Expression::Sub(Box::new(BinaryOp {
3529 left,
3530 right,
3531 left_comments: op.left_comments.clone(),
3532 operator_comments: op.operator_comments.clone(),
3533 trailing_comments: op.trailing_comments.clone(),
3534 inferred_type: None,
3535 }))
3536 }
3537 Expression::Mul(op) => {
3538 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3539 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3540 Expression::Mul(Box::new(BinaryOp {
3541 left,
3542 right,
3543 left_comments: op.left_comments.clone(),
3544 operator_comments: op.operator_comments.clone(),
3545 trailing_comments: op.trailing_comments.clone(),
3546 inferred_type: None,
3547 }))
3548 }
3549 _ => expr.clone(),
3550 }
3551 }
3552
3553 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
3554 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
3555 fn try_transform_from_gda_snowflake(
3556 mut sel: Box<crate::expressions::Select>,
3557 ) -> Result<Expression> {
3558 use crate::expressions::*;
3559
3560 // Extract GDA info from FROM clause
3561 let mut gda_info: Option<(
3562 usize,
3563 String,
3564 Expression,
3565 Expression,
3566 String,
3567 Option<(String, Vec<Identifier>)>,
3568 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
3569
3570 if let Some(ref from) = sel.from {
3571 for (idx, table_expr) in from.expressions.iter().enumerate() {
3572 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
3573 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
3574 let (unnest_opt, outer_alias_info) = match table_expr {
3575 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
3576 Expression::Alias(ref a) => {
3577 if let Expression::Unnest(ref unnest) = a.this {
3578 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
3579 (Some(unnest.as_ref()), Some(alias_info))
3580 } else {
3581 (None, None)
3582 }
3583 }
3584 _ => (None, None),
3585 };
3586
3587 if let Some(unnest) = unnest_opt {
3588 // Check for GENERATE_DATE_ARRAY function
3589 let func_opt = match &unnest.this {
3590 Expression::Function(ref f)
3591 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
3592 && f.args.len() >= 2 =>
3593 {
3594 Some(f)
3595 }
3596 // Also check for GenerateSeries (from earlier normalization)
3597 _ => None,
3598 };
3599
3600 if let Some(f) = func_opt {
3601 let start_expr = f.args[0].clone();
3602 let end_expr = f.args[1].clone();
3603 let step = f.args.get(2).cloned();
3604
3605 // Extract unit and column name
3606 let unit = Self::extract_interval_unit_str(&step);
3607 let col_name = outer_alias_info
3608 .as_ref()
3609 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
3610 .unwrap_or_else(|| "value".to_string());
3611
3612 if let Some(unit_str) = unit {
3613 gda_info = Some((
3614 idx,
3615 col_name,
3616 start_expr,
3617 end_expr,
3618 unit_str,
3619 outer_alias_info,
3620 ));
3621 break;
3622 }
3623 }
3624 }
3625 }
3626 }
3627
3628 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
3629 else {
3630 return Ok(Expression::Select(sel));
3631 };
3632
3633 // Build the Snowflake subquery:
3634 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3635 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(seq, key, path, index, col_name, this))
3636
3637 // DATEDIFF(unit, start, end)
3638 let datediff = Expression::Function(Box::new(Function::new(
3639 "DATEDIFF".to_string(),
3640 vec![
3641 Expression::boxed_column(Column {
3642 name: Identifier::new(&unit_str),
3643 table: None,
3644 join_mark: false,
3645 trailing_comments: vec![],
3646 span: None,
3647 inferred_type: None,
3648 }),
3649 start_expr.clone(),
3650 end_expr.clone(),
3651 ],
3652 )));
3653 // DATEDIFF(...) + 1
3654 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
3655 left: datediff,
3656 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3657 left_comments: vec![],
3658 operator_comments: vec![],
3659 trailing_comments: vec![],
3660 inferred_type: None,
3661 }));
3662
3663 let array_gen_range = Expression::Function(Box::new(Function::new(
3664 "ARRAY_GENERATE_RANGE".to_string(),
3665 vec![
3666 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
3667 datediff_plus_one,
3668 ],
3669 )));
3670
3671 // TABLE(FLATTEN(INPUT => ...))
3672 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3673 name: Identifier::new("INPUT"),
3674 value: array_gen_range,
3675 separator: crate::expressions::NamedArgSeparator::DArrow,
3676 }));
3677 let flatten = Expression::Function(Box::new(Function::new(
3678 "FLATTEN".to_string(),
3679 vec![flatten_input],
3680 )));
3681
3682 // Determine alias name for the table: use outer alias or _t0
3683 let table_alias_name = outer_alias_info
3684 .as_ref()
3685 .map(|(name, _)| name.clone())
3686 .unwrap_or_else(|| "_t0".to_string());
3687
3688 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
3689 let table_func =
3690 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3691 let flatten_aliased = Expression::Alias(Box::new(Alias {
3692 this: table_func,
3693 alias: Identifier::new(&table_alias_name),
3694 column_aliases: vec![
3695 Identifier::new("seq"),
3696 Identifier::new("key"),
3697 Identifier::new("path"),
3698 Identifier::new("index"),
3699 Identifier::new(&col_name),
3700 Identifier::new("this"),
3701 ],
3702 pre_alias_comments: vec![],
3703 trailing_comments: vec![],
3704 inferred_type: None,
3705 }));
3706
3707 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3708 let dateadd_expr = Expression::Function(Box::new(Function::new(
3709 "DATEADD".to_string(),
3710 vec![
3711 Expression::boxed_column(Column {
3712 name: Identifier::new(&unit_str),
3713 table: None,
3714 join_mark: false,
3715 trailing_comments: vec![],
3716 span: None,
3717 inferred_type: None,
3718 }),
3719 Expression::Cast(Box::new(Cast {
3720 this: Expression::boxed_column(Column {
3721 name: Identifier::new(&col_name),
3722 table: None,
3723 join_mark: false,
3724 trailing_comments: vec![],
3725 span: None,
3726 inferred_type: None,
3727 }),
3728 to: DataType::Int {
3729 length: None,
3730 integer_spelling: false,
3731 },
3732 trailing_comments: vec![],
3733 double_colon_syntax: false,
3734 format: None,
3735 default: None,
3736 inferred_type: None,
3737 })),
3738 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
3739 start_expr.clone(),
3740 ],
3741 )));
3742 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3743 this: dateadd_expr,
3744 alias: Identifier::new(&col_name),
3745 column_aliases: vec![],
3746 pre_alias_comments: vec![],
3747 trailing_comments: vec![],
3748 inferred_type: None,
3749 }));
3750
3751 // Build inner SELECT
3752 let mut inner_select = Select::new();
3753 inner_select.expressions = vec![dateadd_aliased];
3754 inner_select.from = Some(From {
3755 expressions: vec![flatten_aliased],
3756 });
3757
3758 let inner_select_expr = Expression::Select(Box::new(inner_select));
3759 let subquery = Expression::Subquery(Box::new(Subquery {
3760 this: inner_select_expr,
3761 alias: None,
3762 column_aliases: vec![],
3763 order_by: None,
3764 limit: None,
3765 offset: None,
3766 distribute_by: None,
3767 sort_by: None,
3768 cluster_by: None,
3769 lateral: false,
3770 modifiers_inside: false,
3771 trailing_comments: vec![],
3772 inferred_type: None,
3773 }));
3774
3775 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
3776 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
3777 Expression::Alias(Box::new(Alias {
3778 this: subquery,
3779 alias: Identifier::new(&alias_name),
3780 column_aliases: col_aliases,
3781 pre_alias_comments: vec![],
3782 trailing_comments: vec![],
3783 inferred_type: None,
3784 }))
3785 } else {
3786 subquery
3787 };
3788
3789 // Replace the FROM expression
3790 if let Some(ref mut from) = sel.from {
3791 from.expressions[from_idx] = replacement;
3792 }
3793
3794 Ok(Expression::Select(sel))
3795 }
3796
3797 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
3798 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
3799 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(...))))
3800 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
3801 use crate::expressions::*;
3802
3803 let start_expr = f.args[0].clone();
3804 let end_expr = f.args[1].clone();
3805 let step = f.args.get(2).cloned();
3806 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
3807 let col_name = "value";
3808
3809 // Build the inner subquery: same as try_transform_from_gda_snowflake
3810 let datediff = Expression::Function(Box::new(Function::new(
3811 "DATEDIFF".to_string(),
3812 vec![
3813 Expression::boxed_column(Column {
3814 name: Identifier::new(&unit_str),
3815 table: None,
3816 join_mark: false,
3817 trailing_comments: vec![],
3818 span: None,
3819 inferred_type: None,
3820 }),
3821 start_expr.clone(),
3822 end_expr.clone(),
3823 ],
3824 )));
3825 // DATEDIFF(...) + 1
3826 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
3827 left: datediff,
3828 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3829 left_comments: vec![],
3830 operator_comments: vec![],
3831 trailing_comments: vec![],
3832 inferred_type: None,
3833 }));
3834
3835 let array_gen_range = Expression::Function(Box::new(Function::new(
3836 "ARRAY_GENERATE_RANGE".to_string(),
3837 vec![
3838 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
3839 datediff_plus_one,
3840 ],
3841 )));
3842
3843 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3844 name: Identifier::new("INPUT"),
3845 value: array_gen_range,
3846 separator: crate::expressions::NamedArgSeparator::DArrow,
3847 }));
3848 let flatten = Expression::Function(Box::new(Function::new(
3849 "FLATTEN".to_string(),
3850 vec![flatten_input],
3851 )));
3852
3853 let table_func =
3854 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3855 let flatten_aliased = Expression::Alias(Box::new(Alias {
3856 this: table_func,
3857 alias: Identifier::new("_t0"),
3858 column_aliases: vec![
3859 Identifier::new("seq"),
3860 Identifier::new("key"),
3861 Identifier::new("path"),
3862 Identifier::new("index"),
3863 Identifier::new(col_name),
3864 Identifier::new("this"),
3865 ],
3866 pre_alias_comments: vec![],
3867 trailing_comments: vec![],
3868 inferred_type: None,
3869 }));
3870
3871 let dateadd_expr = Expression::Function(Box::new(Function::new(
3872 "DATEADD".to_string(),
3873 vec![
3874 Expression::boxed_column(Column {
3875 name: Identifier::new(&unit_str),
3876 table: None,
3877 join_mark: false,
3878 trailing_comments: vec![],
3879 span: None,
3880 inferred_type: None,
3881 }),
3882 Expression::Cast(Box::new(Cast {
3883 this: Expression::boxed_column(Column {
3884 name: Identifier::new(col_name),
3885 table: None,
3886 join_mark: false,
3887 trailing_comments: vec![],
3888 span: None,
3889 inferred_type: None,
3890 }),
3891 to: DataType::Int {
3892 length: None,
3893 integer_spelling: false,
3894 },
3895 trailing_comments: vec![],
3896 double_colon_syntax: false,
3897 format: None,
3898 default: None,
3899 inferred_type: None,
3900 })),
3901 start_expr.clone(),
3902 ],
3903 )));
3904 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3905 this: dateadd_expr,
3906 alias: Identifier::new(col_name),
3907 column_aliases: vec![],
3908 pre_alias_comments: vec![],
3909 trailing_comments: vec![],
3910 inferred_type: None,
3911 }));
3912
3913 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
3914 let mut inner_select = Select::new();
3915 inner_select.expressions = vec![dateadd_aliased];
3916 inner_select.from = Some(From {
3917 expressions: vec![flatten_aliased],
3918 });
3919
3920 // Wrap in subquery for the inner part
3921 let inner_subquery = Expression::Subquery(Box::new(Subquery {
3922 this: Expression::Select(Box::new(inner_select)),
3923 alias: None,
3924 column_aliases: vec![],
3925 order_by: None,
3926 limit: None,
3927 offset: None,
3928 distribute_by: None,
3929 sort_by: None,
3930 cluster_by: None,
3931 lateral: false,
3932 modifiers_inside: false,
3933 trailing_comments: vec![],
3934 inferred_type: None,
3935 }));
3936
3937 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
3938 let star = Expression::Star(Star {
3939 table: None,
3940 except: None,
3941 replace: None,
3942 rename: None,
3943 trailing_comments: vec![],
3944 span: None,
3945 });
3946 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
3947 this: star,
3948 distinct: false,
3949 filter: None,
3950 order_by: vec![],
3951 name: Some("ARRAY_AGG".to_string()),
3952 ignore_nulls: None,
3953 having_max: None,
3954 limit: None,
3955 inferred_type: None,
3956 }));
3957
3958 let mut outer_select = Select::new();
3959 outer_select.expressions = vec![array_agg];
3960 outer_select.from = Some(From {
3961 expressions: vec![inner_subquery],
3962 });
3963
3964 // Wrap in a subquery
3965 let outer_subquery = Expression::Subquery(Box::new(Subquery {
3966 this: Expression::Select(Box::new(outer_select)),
3967 alias: None,
3968 column_aliases: vec![],
3969 order_by: None,
3970 limit: None,
3971 offset: None,
3972 distribute_by: None,
3973 sort_by: None,
3974 cluster_by: None,
3975 lateral: false,
3976 modifiers_inside: false,
3977 trailing_comments: vec![],
3978 inferred_type: None,
3979 }));
3980
3981 // ARRAY_SIZE(subquery)
3982 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
3983 outer_subquery,
3984 ))))
3985 }
3986
3987 /// Extract interval unit string from an optional step expression.
3988 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
3989 use crate::expressions::*;
3990 if let Some(Expression::Interval(ref iv)) = step {
3991 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3992 return Some(format!("{:?}", unit).to_ascii_uppercase());
3993 }
3994 if let Some(ref this) = iv.this {
3995 if let Expression::Literal(lit) = this {
3996 if let Literal::String(ref s) = lit.as_ref() {
3997 let parts: Vec<&str> = s.split_whitespace().collect();
3998 if parts.len() == 2 {
3999 return Some(parts[1].to_ascii_uppercase());
4000 } else if parts.len() == 1 {
4001 let upper = parts[0].to_ascii_uppercase();
4002 if matches!(
4003 upper.as_str(),
4004 "YEAR"
4005 | "QUARTER"
4006 | "MONTH"
4007 | "WEEK"
4008 | "DAY"
4009 | "HOUR"
4010 | "MINUTE"
4011 | "SECOND"
4012 ) {
4013 return Some(upper);
4014 }
4015 }
4016 }
4017 }
4018 }
4019 }
4020 // Default to DAY if no step or no interval
4021 if step.is_none() {
4022 return Some("DAY".to_string());
4023 }
4024 None
4025 }
4026
4027 fn normalize_snowflake_pretty(mut sql: String) -> String {
4028 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
4029 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
4030 {
4031 sql = sql.replace(
4032 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
4033 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
4034 );
4035
4036 sql = sql.replace(
4037 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
4038 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
4039 );
4040
4041 sql = sql.replace(
4042 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
4043 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
4044 );
4045 }
4046
4047 sql
4048 }
4049
4050 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
4051 /// This handles cases where the same syntax has different semantics across dialects.
4052 fn cross_dialect_normalize(
4053 expr: Expression,
4054 source: DialectType,
4055 target: DialectType,
4056 ) -> Result<Expression> {
4057 use crate::expressions::{
4058 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
4059 Function, Identifier, IsNull, Literal, Null, Paren,
4060 };
4061
4062 // Helper to tag which kind of transform to apply
4063 #[derive(Debug)]
4064 enum Action {
4065 None,
4066 GreatestLeastNull,
4067 ArrayGenerateRange,
4068 Div0TypedDivision,
4069 ArrayAggCollectList,
4070 ArrayAggWithinGroupFilter,
4071 ArrayAggFilter,
4072 CastTimestampToDatetime,
4073 DateTruncWrapCast,
4074 ToDateToCast,
4075 ConvertTimezoneToExpr,
4076 SetToVariable,
4077 RegexpReplaceSnowflakeToDuckDB,
4078 BigQueryFunctionNormalize,
4079 BigQuerySafeDivide,
4080 BigQueryCastType,
4081 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
4082 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
4083 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
4084 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
4085 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
4086 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
4087 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
4088 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
4089 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target
4090 EpochConvert, // Expression::Epoch -> target-specific epoch function
4091 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
4092 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
4093 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
4094 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
4095 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
4096 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
4097 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
4098 TempTableHash, // TSQL #table -> temp table normalization
4099 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
4100 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
4101 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
4102 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
4103 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
4104 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
4105 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
4106 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
4107 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
4108 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake; also DuckDB CAST(x AS JSON)
4109 DuckDBTryCastJsonToTryJsonParse, // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
4110 DuckDBJsonFuncToJsonParse, // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
4111 DuckDBJsonValidToIsJson, // DuckDB json_valid(x) -> x IS JSON for Trino/Presto/Athena
4112 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
4113 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
4114 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
4115 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
4116 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
4117 ArrayAggToGroupConcat, // ARRAY_AGG(x) -> GROUP_CONCAT(x) for MySQL-like targets
4118 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
4119 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
4120 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
4121 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
4122 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
4123 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
4124 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
4125 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
4126 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
4127 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
4128 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
4129 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
4130 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
4131 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
4132 DollarParamConvert, // $foo -> @foo for BigQuery
4133 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
4134 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
4135 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
4136 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
4137 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
4138 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
4139 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
4140 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
4141 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
4142 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
4143 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
4144 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
4145 RespectNullsConvert, // RESPECT NULLS window function handling
4146 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
4147 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
4148 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
4149 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
4150 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
4151 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
4152 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
4153 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
4154 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
4155 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
4156 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
4157 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
4158 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
4159 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
4160 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
4161 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
4162 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
4163 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
4164 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
4165 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
4166 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
4167 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
4168 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
4169 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
4170 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
4171 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
4172 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
4173 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
4174 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
4175 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
4176 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
4177 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
4178 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
4179 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
4180 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
4181 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
4182 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
4183 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
4184 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
4185 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
4186 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
4187 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
4188 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
4189 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
4190 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
4191 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
4192 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
4193 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
4194 DecodeSimplify, // DECODE with null-safe -> simple = comparison
4195 ArraySumConvert, // ARRAY_SUM -> target-specific
4196 ArraySizeConvert, // ARRAY_SIZE -> target-specific
4197 ArrayAnyConvert, // ARRAY_ANY -> target-specific
4198 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
4199 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
4200 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
4201 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
4202 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
4203 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
4204 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
4205 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
4206 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
4207 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
4208 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
4209 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
4210 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
4211 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
4212 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
4213 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
4214 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
4215 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
4216 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
4217 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
4218 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
4219 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
4220 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
4221 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
4222 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
4223 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
4224 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
4225 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
4226 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
4227 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
4228 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
4229 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
4230 RegexpSubstrSnowflakeToDuckDB, // REGEXP_SUBSTR(s, p, ...) -> REGEXP_EXTRACT variants for DuckDB
4231 RegexpSubstrSnowflakeIdentity, // REGEXP_SUBSTR/REGEXP_SUBSTR_ALL strip trailing group=0 for Snowflake identity
4232 RegexpSubstrAllSnowflakeToDuckDB, // REGEXP_SUBSTR_ALL(s, p, ...) -> REGEXP_EXTRACT_ALL variants for DuckDB
4233 RegexpCountSnowflakeToDuckDB, // REGEXP_COUNT(s, p, ...) -> LENGTH(REGEXP_EXTRACT_ALL(...)) for DuckDB
4234 RegexpInstrSnowflakeToDuckDB, // REGEXP_INSTR(s, p, ...) -> complex CASE expression for DuckDB
4235 RegexpReplacePositionSnowflakeToDuckDB, // REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
4236 RlikeSnowflakeToDuckDB, // RLIKE(a, b[, flags]) -> REGEXP_FULL_MATCH(a, b[, flags]) for DuckDB
4237 RegexpExtractAllToSnowflake, // BigQuery REGEXP_EXTRACT_ALL -> REGEXP_SUBSTR_ALL for Snowflake
4238 ArrayExceptConvert, // ARRAY_EXCEPT -> DuckDB complex CASE / Snowflake ARRAY_EXCEPT / Presto ARRAY_EXCEPT
4239 ArrayPositionSnowflakeSwap, // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
4240 RegexpLikeExasolAnchor, // RegexpLike -> Exasol REGEXP_LIKE with .*pattern.* anchoring
4241 ArrayDistinctConvert, // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
4242 ArrayDistinctClickHouse, // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
4243 ArrayContainsDuckDBConvert, // ARRAY_CONTAINS -> DuckDB CASE with NULL-aware check
4244 SnowflakeWindowFrameStrip, // Strip default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for Snowflake target
4245 SnowflakeWindowFrameAdd, // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for non-Snowflake target
4246 SnowflakeArrayPositionToDuckDB, // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB
4247 }
4248
4249 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
4250 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
4251 Self::transform_select_into(expr, source, target)
4252 } else {
4253 expr
4254 };
4255
4256 // Strip OFFSET ROWS for non-TSQL/Oracle targets
4257 let expr = if !matches!(
4258 target,
4259 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
4260 ) {
4261 if let Expression::Select(mut select) = expr {
4262 if let Some(ref mut offset) = select.offset {
4263 offset.rows = None;
4264 }
4265 Expression::Select(select)
4266 } else {
4267 expr
4268 }
4269 } else {
4270 expr
4271 };
4272
4273 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
4274 let expr = if matches!(target, DialectType::Oracle) {
4275 if let Expression::Select(mut select) = expr {
4276 if let Some(limit) = select.limit.take() {
4277 // Convert LIMIT to FETCH FIRST n ROWS ONLY
4278 select.fetch = Some(crate::expressions::Fetch {
4279 direction: "FIRST".to_string(),
4280 count: Some(limit.this),
4281 percent: false,
4282 rows: true,
4283 with_ties: false,
4284 });
4285 }
4286 // Add ROWS to OFFSET if present
4287 if let Some(ref mut offset) = select.offset {
4288 offset.rows = Some(true);
4289 }
4290 Expression::Select(select)
4291 } else {
4292 expr
4293 }
4294 } else {
4295 expr
4296 };
4297
4298 // Handle CreateTable WITH properties transformation before recursive transforms
4299 let expr = if let Expression::CreateTable(mut ct) = expr {
4300 Self::transform_create_table_properties(&mut ct, source, target);
4301
4302 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
4303 // When the PARTITIONED BY clause contains column definitions, merge them into the
4304 // main column list and adjust the PARTITIONED BY clause for the target dialect.
4305 if matches!(
4306 source,
4307 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4308 ) {
4309 let mut partition_col_names: Vec<String> = Vec::new();
4310 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
4311 let mut has_col_def_partitions = false;
4312
4313 // Check if any PARTITIONED BY property contains ColumnDef expressions
4314 for prop in &ct.properties {
4315 if let Expression::PartitionedByProperty(ref pbp) = prop {
4316 if let Expression::Tuple(ref tuple) = *pbp.this {
4317 for expr in &tuple.expressions {
4318 if let Expression::ColumnDef(ref cd) = expr {
4319 has_col_def_partitions = true;
4320 partition_col_names.push(cd.name.name.clone());
4321 partition_col_defs.push(*cd.clone());
4322 }
4323 }
4324 }
4325 }
4326 }
4327
4328 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
4329 // Merge partition columns into main column list
4330 for cd in partition_col_defs {
4331 ct.columns.push(cd);
4332 }
4333
4334 // Replace PARTITIONED BY property with column-name-only version
4335 ct.properties
4336 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
4337
4338 if matches!(
4339 target,
4340 DialectType::Presto | DialectType::Trino | DialectType::Athena
4341 ) {
4342 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
4343 let array_elements: Vec<String> = partition_col_names
4344 .iter()
4345 .map(|n| format!("'{}'", n))
4346 .collect();
4347 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
4348 ct.with_properties
4349 .push(("PARTITIONED_BY".to_string(), array_value));
4350 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
4351 // Spark: PARTITIONED BY (y, z) - just column names
4352 let name_exprs: Vec<Expression> = partition_col_names
4353 .iter()
4354 .map(|n| {
4355 Expression::Column(Box::new(crate::expressions::Column {
4356 name: crate::expressions::Identifier::new(n.clone()),
4357 table: None,
4358 join_mark: false,
4359 trailing_comments: Vec::new(),
4360 span: None,
4361 inferred_type: None,
4362 }))
4363 })
4364 .collect();
4365 ct.properties.insert(
4366 0,
4367 Expression::PartitionedByProperty(Box::new(
4368 crate::expressions::PartitionedByProperty {
4369 this: Box::new(Expression::Tuple(Box::new(
4370 crate::expressions::Tuple {
4371 expressions: name_exprs,
4372 },
4373 ))),
4374 },
4375 )),
4376 );
4377 }
4378 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
4379 }
4380
4381 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
4382 // are handled by transform_create_table_properties which runs first
4383 }
4384
4385 // Strip LOCATION property for Presto/Trino (not supported)
4386 if matches!(
4387 target,
4388 DialectType::Presto | DialectType::Trino | DialectType::Athena
4389 ) {
4390 ct.properties
4391 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
4392 }
4393
4394 // Strip table-level constraints for Spark/Hive/Databricks
4395 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
4396 if matches!(
4397 target,
4398 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4399 ) {
4400 ct.constraints.retain(|c| {
4401 matches!(
4402 c,
4403 crate::expressions::TableConstraint::PrimaryKey { .. }
4404 | crate::expressions::TableConstraint::Like { .. }
4405 )
4406 });
4407 for constraint in &mut ct.constraints {
4408 if let crate::expressions::TableConstraint::PrimaryKey {
4409 columns,
4410 modifiers,
4411 ..
4412 } = constraint
4413 {
4414 // Strip ASC/DESC from column names
4415 for col in columns.iter_mut() {
4416 if col.name.ends_with(" ASC") {
4417 col.name = col.name[..col.name.len() - 4].to_string();
4418 } else if col.name.ends_with(" DESC") {
4419 col.name = col.name[..col.name.len() - 5].to_string();
4420 }
4421 }
4422 // Strip TSQL-specific modifiers
4423 modifiers.clustered = None;
4424 modifiers.with_options.clear();
4425 modifiers.on_filegroup = None;
4426 }
4427 }
4428 }
4429
4430 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
4431 if matches!(target, DialectType::Databricks) {
4432 for col in &mut ct.columns {
4433 if col.auto_increment {
4434 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
4435 col.data_type = crate::expressions::DataType::BigInt { length: None };
4436 }
4437 }
4438 }
4439 }
4440
4441 // Spark/Databricks: INTEGER -> INT in column definitions
4442 // Python sqlglot always outputs INT for Spark/Databricks
4443 if matches!(target, DialectType::Spark | DialectType::Databricks) {
4444 for col in &mut ct.columns {
4445 if let crate::expressions::DataType::Int {
4446 integer_spelling, ..
4447 } = &mut col.data_type
4448 {
4449 *integer_spelling = false;
4450 }
4451 }
4452 }
4453
4454 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
4455 if matches!(target, DialectType::Hive | DialectType::Spark) {
4456 for col in &mut ct.columns {
4457 // If nullable is explicitly true (NULL), change to None (omit it)
4458 if col.nullable == Some(true) {
4459 col.nullable = None;
4460 }
4461 // Also remove from constraints if stored there
4462 col.constraints
4463 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
4464 }
4465 }
4466
4467 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
4468 if ct.on_property.is_some()
4469 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4470 {
4471 ct.on_property = None;
4472 }
4473
4474 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
4475 // Snowflake doesn't support typed arrays in DDL
4476 if matches!(target, DialectType::Snowflake) {
4477 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
4478 if let crate::expressions::DataType::Array { .. } = dt {
4479 *dt = crate::expressions::DataType::Custom {
4480 name: "ARRAY".to_string(),
4481 };
4482 }
4483 }
4484 for col in &mut ct.columns {
4485 strip_array_type_params(&mut col.data_type);
4486 }
4487 }
4488
4489 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
4490 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
4491 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
4492 if matches!(target, DialectType::PostgreSQL) {
4493 for col in &mut ct.columns {
4494 if col.auto_increment && !col.constraint_order.is_empty() {
4495 use crate::expressions::ConstraintType;
4496 let has_explicit_not_null = col
4497 .constraint_order
4498 .iter()
4499 .any(|ct| *ct == ConstraintType::NotNull);
4500
4501 if has_explicit_not_null {
4502 // Source had explicit NOT NULL - preserve original order
4503 // Just ensure nullable is set
4504 if col.nullable != Some(false) {
4505 col.nullable = Some(false);
4506 }
4507 } else {
4508 // Source didn't have explicit NOT NULL - build order with
4509 // AutoIncrement + NotNull first, then remaining constraints
4510 let mut new_order = Vec::new();
4511 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
4512 new_order.push(ConstraintType::AutoIncrement);
4513 new_order.push(ConstraintType::NotNull);
4514 // Add remaining constraints in original order (except AutoIncrement)
4515 for ct_type in &col.constraint_order {
4516 if *ct_type != ConstraintType::AutoIncrement {
4517 new_order.push(ct_type.clone());
4518 }
4519 }
4520 col.constraint_order = new_order;
4521 col.nullable = Some(false);
4522 }
4523 }
4524 }
4525 }
4526
4527 Expression::CreateTable(ct)
4528 } else {
4529 expr
4530 };
4531
4532 // Handle CreateView column stripping for Presto/Trino target
4533 let expr = if let Expression::CreateView(mut cv) = expr {
4534 // Presto/Trino: drop column list when view has a SELECT body
4535 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
4536 {
4537 if !matches!(&cv.query, Expression::Null(_)) {
4538 cv.columns.clear();
4539 }
4540 }
4541 Expression::CreateView(cv)
4542 } else {
4543 expr
4544 };
4545
4546 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
4547 let expr = if !matches!(
4548 target,
4549 DialectType::Presto | DialectType::Trino | DialectType::Athena
4550 ) {
4551 if let Expression::Select(mut select) = expr {
4552 if let Some(ref mut with) = select.with {
4553 for cte in &mut with.ctes {
4554 if let Expression::Values(ref vals) = cte.this {
4555 // Build: SELECT * FROM (VALUES ...) AS _values
4556 let values_subquery =
4557 Expression::Subquery(Box::new(crate::expressions::Subquery {
4558 this: Expression::Values(vals.clone()),
4559 alias: Some(Identifier::new("_values".to_string())),
4560 column_aliases: Vec::new(),
4561 order_by: None,
4562 limit: None,
4563 offset: None,
4564 distribute_by: None,
4565 sort_by: None,
4566 cluster_by: None,
4567 lateral: false,
4568 modifiers_inside: false,
4569 trailing_comments: Vec::new(),
4570 inferred_type: None,
4571 }));
4572 let mut new_select = crate::expressions::Select::new();
4573 new_select.expressions =
4574 vec![Expression::Star(crate::expressions::Star {
4575 table: None,
4576 except: None,
4577 replace: None,
4578 rename: None,
4579 trailing_comments: Vec::new(),
4580 span: None,
4581 })];
4582 new_select.from = Some(crate::expressions::From {
4583 expressions: vec![values_subquery],
4584 });
4585 cte.this = Expression::Select(Box::new(new_select));
4586 }
4587 }
4588 }
4589 Expression::Select(select)
4590 } else {
4591 expr
4592 }
4593 } else {
4594 expr
4595 };
4596
4597 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
4598 let expr = if matches!(target, DialectType::PostgreSQL) {
4599 if let Expression::CreateIndex(mut ci) = expr {
4600 for col in &mut ci.columns {
4601 if col.nulls_first.is_none() {
4602 col.nulls_first = Some(true);
4603 }
4604 }
4605 Expression::CreateIndex(ci)
4606 } else {
4607 expr
4608 }
4609 } else {
4610 expr
4611 };
4612
4613 transform_recursive(expr, &|e| {
4614 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
4615 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
4616 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4617 if let Expression::Cast(ref c) = e {
4618 // Check if this is a CAST of an array to a struct array type
4619 let is_struct_array_cast =
4620 matches!(&c.to, crate::expressions::DataType::Array { .. });
4621 if is_struct_array_cast {
4622 let has_auto_named_structs = match &c.this {
4623 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
4624 if let Expression::Struct(s) = elem {
4625 s.fields.iter().all(|(name, _)| {
4626 name.as_ref().map_or(true, |n| {
4627 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4628 })
4629 })
4630 } else {
4631 false
4632 }
4633 }),
4634 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
4635 if let Expression::Struct(s) = elem {
4636 s.fields.iter().all(|(name, _)| {
4637 name.as_ref().map_or(true, |n| {
4638 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4639 })
4640 })
4641 } else {
4642 false
4643 }
4644 }),
4645 _ => false,
4646 };
4647 if has_auto_named_structs {
4648 let convert_struct_to_row = |elem: Expression| -> Expression {
4649 if let Expression::Struct(s) = elem {
4650 let row_args: Vec<Expression> =
4651 s.fields.into_iter().map(|(_, v)| v).collect();
4652 Expression::Function(Box::new(Function::new(
4653 "ROW".to_string(),
4654 row_args,
4655 )))
4656 } else {
4657 elem
4658 }
4659 };
4660 let mut c_clone = c.as_ref().clone();
4661 match &mut c_clone.this {
4662 Expression::Array(arr) => {
4663 arr.expressions = arr
4664 .expressions
4665 .drain(..)
4666 .map(convert_struct_to_row)
4667 .collect();
4668 }
4669 Expression::ArrayFunc(arr) => {
4670 arr.expressions = arr
4671 .expressions
4672 .drain(..)
4673 .map(convert_struct_to_row)
4674 .collect();
4675 }
4676 _ => {}
4677 }
4678 return Ok(Expression::Cast(Box::new(c_clone)));
4679 }
4680 }
4681 }
4682 }
4683
4684 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
4685 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4686 if let Expression::Select(ref sel) = e {
4687 if sel.kind.as_deref() == Some("STRUCT") {
4688 let mut fields = Vec::new();
4689 for expr in &sel.expressions {
4690 match expr {
4691 Expression::Alias(a) => {
4692 fields.push((Some(a.alias.name.clone()), a.this.clone()));
4693 }
4694 Expression::Column(c) => {
4695 fields.push((Some(c.name.name.clone()), expr.clone()));
4696 }
4697 _ => {
4698 fields.push((None, expr.clone()));
4699 }
4700 }
4701 }
4702 let struct_lit =
4703 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
4704 let mut new_select = sel.as_ref().clone();
4705 new_select.kind = None;
4706 new_select.expressions = vec![struct_lit];
4707 return Ok(Expression::Select(Box::new(new_select)));
4708 }
4709 }
4710 }
4711
4712 // Convert @variable -> ${variable} for Spark/Hive/Databricks
4713 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4714 && matches!(
4715 target,
4716 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4717 )
4718 {
4719 if let Expression::Parameter(ref p) = e {
4720 if p.style == crate::expressions::ParameterStyle::At {
4721 if let Some(ref name) = p.name {
4722 return Ok(Expression::Parameter(Box::new(
4723 crate::expressions::Parameter {
4724 name: Some(name.clone()),
4725 index: p.index,
4726 style: crate::expressions::ParameterStyle::DollarBrace,
4727 quoted: p.quoted,
4728 string_quoted: p.string_quoted,
4729 expression: None,
4730 },
4731 )));
4732 }
4733 }
4734 }
4735 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
4736 if let Expression::Column(ref col) = e {
4737 if col.name.name.starts_with('@') && col.table.is_none() {
4738 let var_name = col.name.name.trim_start_matches('@').to_string();
4739 return Ok(Expression::Parameter(Box::new(
4740 crate::expressions::Parameter {
4741 name: Some(var_name),
4742 index: None,
4743 style: crate::expressions::ParameterStyle::DollarBrace,
4744 quoted: false,
4745 string_quoted: false,
4746 expression: None,
4747 },
4748 )));
4749 }
4750 }
4751 }
4752
4753 // Convert @variable -> variable in SET statements for Spark/Databricks
4754 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4755 && matches!(target, DialectType::Spark | DialectType::Databricks)
4756 {
4757 if let Expression::SetStatement(ref s) = e {
4758 let mut new_items = s.items.clone();
4759 let mut changed = false;
4760 for item in &mut new_items {
4761 // Strip @ from the SET name (Parameter style)
4762 if let Expression::Parameter(ref p) = item.name {
4763 if p.style == crate::expressions::ParameterStyle::At {
4764 if let Some(ref name) = p.name {
4765 item.name = Expression::Identifier(Identifier::new(name));
4766 changed = true;
4767 }
4768 }
4769 }
4770 // Strip @ from the SET name (Identifier style - SET parser)
4771 if let Expression::Identifier(ref id) = item.name {
4772 if id.name.starts_with('@') {
4773 let var_name = id.name.trim_start_matches('@').to_string();
4774 item.name = Expression::Identifier(Identifier::new(&var_name));
4775 changed = true;
4776 }
4777 }
4778 // Strip @ from the SET name (Column style - alternative parsing)
4779 if let Expression::Column(ref col) = item.name {
4780 if col.name.name.starts_with('@') && col.table.is_none() {
4781 let var_name = col.name.name.trim_start_matches('@').to_string();
4782 item.name = Expression::Identifier(Identifier::new(&var_name));
4783 changed = true;
4784 }
4785 }
4786 }
4787 if changed {
4788 let mut new_set = (**s).clone();
4789 new_set.items = new_items;
4790 return Ok(Expression::SetStatement(Box::new(new_set)));
4791 }
4792 }
4793 }
4794
4795 // Strip NOLOCK hint for non-TSQL targets
4796 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4797 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4798 {
4799 if let Expression::Table(ref tr) = e {
4800 if !tr.hints.is_empty() {
4801 let mut new_tr = tr.clone();
4802 new_tr.hints.clear();
4803 return Ok(Expression::Table(new_tr));
4804 }
4805 }
4806 }
4807
4808 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
4809 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
4810 if matches!(target, DialectType::Snowflake) {
4811 if let Expression::IsTrue(ref itf) = e {
4812 if let Expression::Boolean(ref b) = itf.this {
4813 if !itf.not {
4814 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4815 value: b.value,
4816 }));
4817 } else {
4818 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4819 value: !b.value,
4820 }));
4821 }
4822 }
4823 }
4824 if let Expression::IsFalse(ref itf) = e {
4825 if let Expression::Boolean(ref b) = itf.this {
4826 if !itf.not {
4827 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4828 value: !b.value,
4829 }));
4830 } else {
4831 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4832 value: b.value,
4833 }));
4834 }
4835 }
4836 }
4837 }
4838
4839 // BigQuery: split dotted backtick identifiers in table names
4840 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
4841 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4842 if let Expression::CreateTable(ref ct) = e {
4843 let mut changed = false;
4844 let mut new_ct = ct.clone();
4845 // Split the table name
4846 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
4847 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
4848 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
4849 let was_quoted = ct.name.name.quoted;
4850 let mk_id = |s: &str| {
4851 if was_quoted {
4852 Identifier::quoted(s)
4853 } else {
4854 Identifier::new(s)
4855 }
4856 };
4857 if parts.len() == 3 {
4858 new_ct.name.catalog = Some(mk_id(parts[0]));
4859 new_ct.name.schema = Some(mk_id(parts[1]));
4860 new_ct.name.name = mk_id(parts[2]);
4861 changed = true;
4862 } else if parts.len() == 2 {
4863 new_ct.name.schema = Some(mk_id(parts[0]));
4864 new_ct.name.name = mk_id(parts[1]);
4865 changed = true;
4866 }
4867 }
4868 // Split the clone source name
4869 if let Some(ref clone_src) = ct.clone_source {
4870 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
4871 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
4872 let was_quoted = clone_src.name.quoted;
4873 let mk_id = |s: &str| {
4874 if was_quoted {
4875 Identifier::quoted(s)
4876 } else {
4877 Identifier::new(s)
4878 }
4879 };
4880 let mut new_src = clone_src.clone();
4881 if parts.len() == 3 {
4882 new_src.catalog = Some(mk_id(parts[0]));
4883 new_src.schema = Some(mk_id(parts[1]));
4884 new_src.name = mk_id(parts[2]);
4885 new_ct.clone_source = Some(new_src);
4886 changed = true;
4887 } else if parts.len() == 2 {
4888 new_src.schema = Some(mk_id(parts[0]));
4889 new_src.name = mk_id(parts[1]);
4890 new_ct.clone_source = Some(new_src);
4891 changed = true;
4892 }
4893 }
4894 }
4895 if changed {
4896 return Ok(Expression::CreateTable(new_ct));
4897 }
4898 }
4899 }
4900
4901 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
4902 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
4903 if matches!(source, DialectType::BigQuery)
4904 && matches!(
4905 target,
4906 DialectType::DuckDB
4907 | DialectType::Presto
4908 | DialectType::Trino
4909 | DialectType::Athena
4910 )
4911 {
4912 if let Expression::Subscript(ref sub) = e {
4913 let (new_index, is_safe) = match &sub.index {
4914 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
4915 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
4916 let Literal::Number(n) = lit.as_ref() else {
4917 unreachable!()
4918 };
4919 if let Ok(val) = n.parse::<i64>() {
4920 (
4921 Some(Expression::Literal(Box::new(Literal::Number(
4922 (val + 1).to_string(),
4923 )))),
4924 false,
4925 )
4926 } else {
4927 (None, false)
4928 }
4929 }
4930 // OFFSET(n) -> n+1 (0-based)
4931 Expression::Function(ref f)
4932 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
4933 {
4934 if let Expression::Literal(lit) = &f.args[0] {
4935 if let Literal::Number(n) = lit.as_ref() {
4936 if let Ok(val) = n.parse::<i64>() {
4937 (
4938 Some(Expression::Literal(Box::new(Literal::Number(
4939 (val + 1).to_string(),
4940 )))),
4941 false,
4942 )
4943 } else {
4944 (
4945 Some(Expression::Add(Box::new(
4946 crate::expressions::BinaryOp::new(
4947 f.args[0].clone(),
4948 Expression::number(1),
4949 ),
4950 ))),
4951 false,
4952 )
4953 }
4954 } else {
4955 (None, false)
4956 }
4957 } else {
4958 (
4959 Some(Expression::Add(Box::new(
4960 crate::expressions::BinaryOp::new(
4961 f.args[0].clone(),
4962 Expression::number(1),
4963 ),
4964 ))),
4965 false,
4966 )
4967 }
4968 }
4969 // ORDINAL(n) -> n (already 1-based)
4970 Expression::Function(ref f)
4971 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
4972 {
4973 (Some(f.args[0].clone()), false)
4974 }
4975 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
4976 Expression::Function(ref f)
4977 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
4978 {
4979 if let Expression::Literal(lit) = &f.args[0] {
4980 if let Literal::Number(n) = lit.as_ref() {
4981 if let Ok(val) = n.parse::<i64>() {
4982 (
4983 Some(Expression::Literal(Box::new(Literal::Number(
4984 (val + 1).to_string(),
4985 )))),
4986 true,
4987 )
4988 } else {
4989 (
4990 Some(Expression::Add(Box::new(
4991 crate::expressions::BinaryOp::new(
4992 f.args[0].clone(),
4993 Expression::number(1),
4994 ),
4995 ))),
4996 true,
4997 )
4998 }
4999 } else {
5000 (None, false)
5001 }
5002 } else {
5003 (
5004 Some(Expression::Add(Box::new(
5005 crate::expressions::BinaryOp::new(
5006 f.args[0].clone(),
5007 Expression::number(1),
5008 ),
5009 ))),
5010 true,
5011 )
5012 }
5013 }
5014 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
5015 Expression::Function(ref f)
5016 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
5017 {
5018 (Some(f.args[0].clone()), true)
5019 }
5020 _ => (None, false),
5021 };
5022 if let Some(idx) = new_index {
5023 if is_safe
5024 && matches!(
5025 target,
5026 DialectType::Presto | DialectType::Trino | DialectType::Athena
5027 )
5028 {
5029 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
5030 return Ok(Expression::Function(Box::new(Function::new(
5031 "ELEMENT_AT".to_string(),
5032 vec![sub.this.clone(), idx],
5033 ))));
5034 } else {
5035 // DuckDB or non-safe: just use subscript with converted index
5036 return Ok(Expression::Subscript(Box::new(
5037 crate::expressions::Subscript {
5038 this: sub.this.clone(),
5039 index: idx,
5040 },
5041 )));
5042 }
5043 }
5044 }
5045 }
5046
5047 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
5048 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
5049 if let Expression::Length(ref uf) = e {
5050 let arg = uf.this.clone();
5051 let typeof_func = Expression::Function(Box::new(Function::new(
5052 "TYPEOF".to_string(),
5053 vec![arg.clone()],
5054 )));
5055 let blob_cast = Expression::Cast(Box::new(Cast {
5056 this: arg.clone(),
5057 to: DataType::VarBinary { length: None },
5058 trailing_comments: vec![],
5059 double_colon_syntax: false,
5060 format: None,
5061 default: None,
5062 inferred_type: None,
5063 }));
5064 let octet_length = Expression::Function(Box::new(Function::new(
5065 "OCTET_LENGTH".to_string(),
5066 vec![blob_cast],
5067 )));
5068 let text_cast = Expression::Cast(Box::new(Cast {
5069 this: arg,
5070 to: DataType::Text,
5071 trailing_comments: vec![],
5072 double_colon_syntax: false,
5073 format: None,
5074 default: None,
5075 inferred_type: None,
5076 }));
5077 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
5078 this: text_cast,
5079 original_name: None,
5080 inferred_type: None,
5081 }));
5082 return Ok(Expression::Case(Box::new(Case {
5083 operand: Some(typeof_func),
5084 whens: vec![(
5085 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
5086 octet_length,
5087 )],
5088 else_: Some(length_text),
5089 comments: Vec::new(),
5090 inferred_type: None,
5091 })));
5092 }
5093 }
5094
5095 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
5096 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
5097 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
5098 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
5099 if let Expression::Alias(ref a) = e {
5100 if matches!(&a.this, Expression::Unnest(_)) {
5101 if a.column_aliases.is_empty() {
5102 // Drop the entire alias, return just the UNNEST expression
5103 return Ok(a.this.clone());
5104 } else {
5105 // Use first column alias as the main alias
5106 let mut new_alias = a.as_ref().clone();
5107 new_alias.alias = a.column_aliases[0].clone();
5108 new_alias.column_aliases.clear();
5109 return Ok(Expression::Alias(Box::new(new_alias)));
5110 }
5111 }
5112 }
5113 }
5114
5115 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
5116 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
5117 if let Expression::In(ref in_expr) = e {
5118 if let Some(ref unnest_inner) = in_expr.unnest {
5119 // Build the function call for the target dialect
5120 let func_expr = if matches!(
5121 target,
5122 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5123 ) {
5124 // Use EXPLODE for Hive/Spark
5125 Expression::Function(Box::new(Function::new(
5126 "EXPLODE".to_string(),
5127 vec![*unnest_inner.clone()],
5128 )))
5129 } else {
5130 // Use UNNEST for Presto/Trino/DuckDB/etc.
5131 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
5132 this: *unnest_inner.clone(),
5133 expressions: Vec::new(),
5134 with_ordinality: false,
5135 alias: None,
5136 offset_alias: None,
5137 }))
5138 };
5139
5140 // Wrap in SELECT
5141 let mut inner_select = crate::expressions::Select::new();
5142 inner_select.expressions = vec![func_expr];
5143
5144 let subquery_expr = Expression::Select(Box::new(inner_select));
5145
5146 return Ok(Expression::In(Box::new(crate::expressions::In {
5147 this: in_expr.this.clone(),
5148 expressions: Vec::new(),
5149 query: Some(subquery_expr),
5150 not: in_expr.not,
5151 global: in_expr.global,
5152 unnest: None,
5153 is_field: false,
5154 })));
5155 }
5156 }
5157 }
5158
5159 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
5160 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
5161 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
5162 if let Expression::Alias(ref a) = e {
5163 if let Expression::Function(ref f) = a.this {
5164 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
5165 && !a.column_aliases.is_empty()
5166 {
5167 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
5168 let col_alias = a.column_aliases[0].clone();
5169 let mut inner_select = crate::expressions::Select::new();
5170 inner_select.expressions =
5171 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
5172 Expression::Identifier(Identifier::new("value".to_string())),
5173 col_alias,
5174 )))];
5175 inner_select.from = Some(crate::expressions::From {
5176 expressions: vec![a.this.clone()],
5177 });
5178 let subquery =
5179 Expression::Subquery(Box::new(crate::expressions::Subquery {
5180 this: Expression::Select(Box::new(inner_select)),
5181 alias: Some(a.alias.clone()),
5182 column_aliases: Vec::new(),
5183 order_by: None,
5184 limit: None,
5185 offset: None,
5186 lateral: false,
5187 modifiers_inside: false,
5188 trailing_comments: Vec::new(),
5189 distribute_by: None,
5190 sort_by: None,
5191 cluster_by: None,
5192 inferred_type: None,
5193 }));
5194 return Ok(subquery);
5195 }
5196 }
5197 }
5198 }
5199
5200 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
5201 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
5202 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
5203 if matches!(source, DialectType::BigQuery) {
5204 if let Expression::Select(ref s) = e {
5205 if let Some(ref from) = s.from {
5206 if from.expressions.len() >= 2 {
5207 // Collect table names from first expression
5208 let first_tables: Vec<String> = from
5209 .expressions
5210 .iter()
5211 .take(1)
5212 .filter_map(|expr| {
5213 if let Expression::Table(t) = expr {
5214 Some(t.name.name.to_ascii_lowercase())
5215 } else {
5216 None
5217 }
5218 })
5219 .collect();
5220
5221 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
5222 // or have a dotted name matching a table
5223 let mut needs_rewrite = false;
5224 for expr in from.expressions.iter().skip(1) {
5225 if let Expression::Table(t) = expr {
5226 if let Some(ref schema) = t.schema {
5227 if first_tables.contains(&schema.name.to_ascii_lowercase())
5228 {
5229 needs_rewrite = true;
5230 break;
5231 }
5232 }
5233 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
5234 if t.schema.is_none() && t.name.name.contains('.') {
5235 let parts: Vec<&str> = t.name.name.split('.').collect();
5236 if parts.len() >= 2
5237 && first_tables.contains(&parts[0].to_ascii_lowercase())
5238 {
5239 needs_rewrite = true;
5240 break;
5241 }
5242 }
5243 }
5244 }
5245
5246 if needs_rewrite {
5247 let mut new_select = s.clone();
5248 let mut new_from_exprs = vec![from.expressions[0].clone()];
5249 let mut new_joins = s.joins.clone();
5250
5251 for expr in from.expressions.iter().skip(1) {
5252 if let Expression::Table(ref t) = expr {
5253 if let Some(ref schema) = t.schema {
5254 if first_tables
5255 .contains(&schema.name.to_ascii_lowercase())
5256 {
5257 // This is an array path reference, convert to CROSS JOIN UNNEST
5258 let col_expr = Expression::Column(Box::new(
5259 crate::expressions::Column {
5260 name: t.name.clone(),
5261 table: Some(schema.clone()),
5262 join_mark: false,
5263 trailing_comments: vec![],
5264 span: None,
5265 inferred_type: None,
5266 },
5267 ));
5268 let unnest_expr = Expression::Unnest(Box::new(
5269 crate::expressions::UnnestFunc {
5270 this: col_expr,
5271 expressions: Vec::new(),
5272 with_ordinality: false,
5273 alias: None,
5274 offset_alias: None,
5275 },
5276 ));
5277 let join_this = if let Some(ref alias) = t.alias {
5278 if matches!(
5279 target,
5280 DialectType::Presto
5281 | DialectType::Trino
5282 | DialectType::Athena
5283 ) {
5284 // Presto: UNNEST(x) AS _t0(results)
5285 Expression::Alias(Box::new(
5286 crate::expressions::Alias {
5287 this: unnest_expr,
5288 alias: Identifier::new("_t0"),
5289 column_aliases: vec![alias.clone()],
5290 pre_alias_comments: vec![],
5291 trailing_comments: vec![],
5292 inferred_type: None,
5293 },
5294 ))
5295 } else {
5296 // BigQuery: UNNEST(x) AS results
5297 Expression::Alias(Box::new(
5298 crate::expressions::Alias {
5299 this: unnest_expr,
5300 alias: alias.clone(),
5301 column_aliases: vec![],
5302 pre_alias_comments: vec![],
5303 trailing_comments: vec![],
5304 inferred_type: None,
5305 },
5306 ))
5307 }
5308 } else {
5309 unnest_expr
5310 };
5311 new_joins.push(crate::expressions::Join {
5312 kind: crate::expressions::JoinKind::Cross,
5313 this: join_this,
5314 on: None,
5315 using: Vec::new(),
5316 use_inner_keyword: false,
5317 use_outer_keyword: false,
5318 deferred_condition: false,
5319 join_hint: None,
5320 match_condition: None,
5321 pivots: Vec::new(),
5322 comments: Vec::new(),
5323 nesting_group: 0,
5324 directed: false,
5325 });
5326 } else {
5327 new_from_exprs.push(expr.clone());
5328 }
5329 } else if t.schema.is_none() && t.name.name.contains('.') {
5330 // Dotted name in quoted identifier: `Coordinates.position`
5331 let parts: Vec<&str> = t.name.name.split('.').collect();
5332 if parts.len() >= 2
5333 && first_tables
5334 .contains(&parts[0].to_ascii_lowercase())
5335 {
5336 let join_this =
5337 if matches!(target, DialectType::BigQuery) {
5338 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
5339 Expression::Table(t.clone())
5340 } else {
5341 // Other targets: split into "schema"."name"
5342 let mut new_t = t.clone();
5343 new_t.schema =
5344 Some(Identifier::quoted(parts[0]));
5345 new_t.name = Identifier::quoted(parts[1]);
5346 Expression::Table(new_t)
5347 };
5348 new_joins.push(crate::expressions::Join {
5349 kind: crate::expressions::JoinKind::Cross,
5350 this: join_this,
5351 on: None,
5352 using: Vec::new(),
5353 use_inner_keyword: false,
5354 use_outer_keyword: false,
5355 deferred_condition: false,
5356 join_hint: None,
5357 match_condition: None,
5358 pivots: Vec::new(),
5359 comments: Vec::new(),
5360 nesting_group: 0,
5361 directed: false,
5362 });
5363 } else {
5364 new_from_exprs.push(expr.clone());
5365 }
5366 } else {
5367 new_from_exprs.push(expr.clone());
5368 }
5369 } else {
5370 new_from_exprs.push(expr.clone());
5371 }
5372 }
5373
5374 new_select.from = Some(crate::expressions::From {
5375 expressions: new_from_exprs,
5376 ..from.clone()
5377 });
5378 new_select.joins = new_joins;
5379 return Ok(Expression::Select(new_select));
5380 }
5381 }
5382 }
5383 }
5384 }
5385
5386 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
5387 if matches!(
5388 target,
5389 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5390 ) {
5391 if let Expression::Select(ref s) = e {
5392 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
5393 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
5394 matches!(expr, Expression::Unnest(_))
5395 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
5396 };
5397 let has_unnest_join = s.joins.iter().any(|j| {
5398 j.kind == crate::expressions::JoinKind::Cross && (
5399 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
5400 || is_unnest_or_explode_expr(&j.this)
5401 )
5402 });
5403 if has_unnest_join {
5404 let mut select = s.clone();
5405 let mut new_joins = Vec::new();
5406 for join in select.joins.drain(..) {
5407 if join.kind == crate::expressions::JoinKind::Cross {
5408 // Extract the UNNEST/EXPLODE from the join
5409 let (func_expr, table_alias, col_aliases) = match &join.this {
5410 Expression::Alias(a) => {
5411 let ta = if a.alias.is_empty() {
5412 None
5413 } else {
5414 Some(a.alias.clone())
5415 };
5416 let cas = a.column_aliases.clone();
5417 match &a.this {
5418 Expression::Unnest(u) => {
5419 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
5420 if !u.expressions.is_empty() {
5421 let mut all_args = vec![u.this.clone()];
5422 all_args.extend(u.expressions.clone());
5423 let arrays_zip =
5424 Expression::Function(Box::new(
5425 crate::expressions::Function::new(
5426 "ARRAYS_ZIP".to_string(),
5427 all_args,
5428 ),
5429 ));
5430 let inline = Expression::Function(Box::new(
5431 crate::expressions::Function::new(
5432 "INLINE".to_string(),
5433 vec![arrays_zip],
5434 ),
5435 ));
5436 (Some(inline), ta, a.column_aliases.clone())
5437 } else {
5438 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
5439 let func_name = if u.with_ordinality {
5440 "POSEXPLODE"
5441 } else {
5442 "EXPLODE"
5443 };
5444 let explode = Expression::Function(Box::new(
5445 crate::expressions::Function::new(
5446 func_name.to_string(),
5447 vec![u.this.clone()],
5448 ),
5449 ));
5450 // For POSEXPLODE, add 'pos' to column aliases
5451 let cas = if u.with_ordinality {
5452 let mut pos_aliases =
5453 vec![Identifier::new(
5454 "pos".to_string(),
5455 )];
5456 pos_aliases
5457 .extend(a.column_aliases.clone());
5458 pos_aliases
5459 } else {
5460 a.column_aliases.clone()
5461 };
5462 (Some(explode), ta, cas)
5463 }
5464 }
5465 Expression::Function(f)
5466 if f.name.eq_ignore_ascii_case("EXPLODE") =>
5467 {
5468 (Some(Expression::Function(f.clone())), ta, cas)
5469 }
5470 _ => (None, None, Vec::new()),
5471 }
5472 }
5473 Expression::Unnest(u) => {
5474 let func_name = if u.with_ordinality {
5475 "POSEXPLODE"
5476 } else {
5477 "EXPLODE"
5478 };
5479 let explode = Expression::Function(Box::new(
5480 crate::expressions::Function::new(
5481 func_name.to_string(),
5482 vec![u.this.clone()],
5483 ),
5484 ));
5485 let ta = u.alias.clone();
5486 let col_aliases = if u.with_ordinality {
5487 vec![Identifier::new("pos".to_string())]
5488 } else {
5489 Vec::new()
5490 };
5491 (Some(explode), ta, col_aliases)
5492 }
5493 _ => (None, None, Vec::new()),
5494 };
5495 if let Some(func) = func_expr {
5496 select.lateral_views.push(crate::expressions::LateralView {
5497 this: func,
5498 table_alias,
5499 column_aliases: col_aliases,
5500 outer: false,
5501 });
5502 } else {
5503 new_joins.push(join);
5504 }
5505 } else {
5506 new_joins.push(join);
5507 }
5508 }
5509 select.joins = new_joins;
5510 return Ok(Expression::Select(select));
5511 }
5512 }
5513 }
5514
5515 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
5516 // for BigQuery, Presto/Trino, Snowflake
5517 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
5518 && matches!(
5519 target,
5520 DialectType::BigQuery
5521 | DialectType::Presto
5522 | DialectType::Trino
5523 | DialectType::Snowflake
5524 )
5525 {
5526 if let Expression::Select(ref s) = e {
5527 // Check if any SELECT expressions contain UNNEST
5528 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
5529 let has_unnest_in_select = s.expressions.iter().any(|expr| {
5530 fn contains_unnest(e: &Expression) -> bool {
5531 match e {
5532 Expression::Unnest(_) => true,
5533 Expression::Function(f)
5534 if f.name.eq_ignore_ascii_case("UNNEST") =>
5535 {
5536 true
5537 }
5538 Expression::Alias(a) => contains_unnest(&a.this),
5539 Expression::Add(op)
5540 | Expression::Sub(op)
5541 | Expression::Mul(op)
5542 | Expression::Div(op) => {
5543 contains_unnest(&op.left) || contains_unnest(&op.right)
5544 }
5545 _ => false,
5546 }
5547 }
5548 contains_unnest(expr)
5549 });
5550
5551 if has_unnest_in_select {
5552 let rewritten = Self::rewrite_unnest_expansion(s, target);
5553 if let Some(new_select) = rewritten {
5554 return Ok(Expression::Select(Box::new(new_select)));
5555 }
5556 }
5557 }
5558 }
5559
5560 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
5561 // BigQuery '\n' -> PostgreSQL literal newline in string
5562 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
5563 {
5564 if let Expression::Literal(ref lit) = e {
5565 if let Literal::String(ref s) = lit.as_ref() {
5566 if s.contains("\\n")
5567 || s.contains("\\t")
5568 || s.contains("\\r")
5569 || s.contains("\\\\")
5570 {
5571 let converted = s
5572 .replace("\\n", "\n")
5573 .replace("\\t", "\t")
5574 .replace("\\r", "\r")
5575 .replace("\\\\", "\\");
5576 return Ok(Expression::Literal(Box::new(Literal::String(converted))));
5577 }
5578 }
5579 }
5580 }
5581
5582 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
5583 // when source != target (identity tests keep the Literal::Timestamp for native handling)
5584 if source != target {
5585 if let Expression::Literal(ref lit) = e {
5586 if let Literal::Timestamp(ref s) = lit.as_ref() {
5587 let s = s.clone();
5588 // MySQL: TIMESTAMP handling depends on source dialect
5589 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
5590 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
5591 if matches!(target, DialectType::MySQL) {
5592 if matches!(source, DialectType::BigQuery) {
5593 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
5594 return Ok(Expression::Function(Box::new(Function::new(
5595 "TIMESTAMP".to_string(),
5596 vec![Expression::Literal(Box::new(Literal::String(s)))],
5597 ))));
5598 } else {
5599 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
5600 return Ok(Expression::Cast(Box::new(Cast {
5601 this: Expression::Literal(Box::new(Literal::String(s))),
5602 to: DataType::Custom {
5603 name: "DATETIME".to_string(),
5604 },
5605 trailing_comments: Vec::new(),
5606 double_colon_syntax: false,
5607 format: None,
5608 default: None,
5609 inferred_type: None,
5610 })));
5611 }
5612 }
5613 let dt = match target {
5614 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
5615 name: "DATETIME".to_string(),
5616 },
5617 DialectType::Snowflake => {
5618 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
5619 if matches!(source, DialectType::BigQuery) {
5620 DataType::Custom {
5621 name: "TIMESTAMPTZ".to_string(),
5622 }
5623 } else if matches!(
5624 source,
5625 DialectType::PostgreSQL
5626 | DialectType::Redshift
5627 | DialectType::Snowflake
5628 ) {
5629 DataType::Timestamp {
5630 precision: None,
5631 timezone: false,
5632 }
5633 } else {
5634 DataType::Custom {
5635 name: "TIMESTAMPNTZ".to_string(),
5636 }
5637 }
5638 }
5639 DialectType::Spark | DialectType::Databricks => {
5640 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
5641 if matches!(source, DialectType::BigQuery) {
5642 DataType::Timestamp {
5643 precision: None,
5644 timezone: false,
5645 }
5646 } else {
5647 DataType::Custom {
5648 name: "TIMESTAMP_NTZ".to_string(),
5649 }
5650 }
5651 }
5652 DialectType::ClickHouse => DataType::Nullable {
5653 inner: Box::new(DataType::Custom {
5654 name: "DateTime".to_string(),
5655 }),
5656 },
5657 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
5658 name: "DATETIME2".to_string(),
5659 },
5660 DialectType::DuckDB => {
5661 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
5662 // or when the timestamp string explicitly has timezone info
5663 if matches!(source, DialectType::BigQuery)
5664 || Self::timestamp_string_has_timezone(&s)
5665 {
5666 DataType::Custom {
5667 name: "TIMESTAMPTZ".to_string(),
5668 }
5669 } else {
5670 DataType::Timestamp {
5671 precision: None,
5672 timezone: false,
5673 }
5674 }
5675 }
5676 _ => DataType::Timestamp {
5677 precision: None,
5678 timezone: false,
5679 },
5680 };
5681 return Ok(Expression::Cast(Box::new(Cast {
5682 this: Expression::Literal(Box::new(Literal::String(s))),
5683 to: dt,
5684 trailing_comments: vec![],
5685 double_colon_syntax: false,
5686 format: None,
5687 default: None,
5688 inferred_type: None,
5689 })));
5690 }
5691 }
5692 }
5693
5694 // PostgreSQL DELETE requires explicit AS for table aliases
5695 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
5696 if let Expression::Delete(ref del) = e {
5697 if del.alias.is_some() && !del.alias_explicit_as {
5698 let mut new_del = del.clone();
5699 new_del.alias_explicit_as = true;
5700 return Ok(Expression::Delete(new_del));
5701 }
5702 }
5703 }
5704
5705 // UNION/INTERSECT/EXCEPT DISTINCT handling:
5706 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
5707 // while others don't support it (Presto, Spark, DuckDB, etc.)
5708 {
5709 let needs_distinct =
5710 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
5711 let drop_distinct = matches!(
5712 target,
5713 DialectType::Presto
5714 | DialectType::Trino
5715 | DialectType::Athena
5716 | DialectType::Spark
5717 | DialectType::Databricks
5718 | DialectType::DuckDB
5719 | DialectType::Hive
5720 | DialectType::MySQL
5721 | DialectType::PostgreSQL
5722 | DialectType::SQLite
5723 | DialectType::TSQL
5724 | DialectType::Redshift
5725 | DialectType::Snowflake
5726 | DialectType::Oracle
5727 | DialectType::Teradata
5728 | DialectType::Drill
5729 | DialectType::Doris
5730 | DialectType::StarRocks
5731 );
5732 match &e {
5733 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
5734 let mut new_u = (**u).clone();
5735 new_u.distinct = true;
5736 return Ok(Expression::Union(Box::new(new_u)));
5737 }
5738 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
5739 let mut new_i = (**i).clone();
5740 new_i.distinct = true;
5741 return Ok(Expression::Intersect(Box::new(new_i)));
5742 }
5743 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
5744 let mut new_ex = (**ex).clone();
5745 new_ex.distinct = true;
5746 return Ok(Expression::Except(Box::new(new_ex)));
5747 }
5748 Expression::Union(u) if u.distinct && drop_distinct => {
5749 let mut new_u = (**u).clone();
5750 new_u.distinct = false;
5751 return Ok(Expression::Union(Box::new(new_u)));
5752 }
5753 Expression::Intersect(i) if i.distinct && drop_distinct => {
5754 let mut new_i = (**i).clone();
5755 new_i.distinct = false;
5756 return Ok(Expression::Intersect(Box::new(new_i)));
5757 }
5758 Expression::Except(ex) if ex.distinct && drop_distinct => {
5759 let mut new_ex = (**ex).clone();
5760 new_ex.distinct = false;
5761 return Ok(Expression::Except(Box::new(new_ex)));
5762 }
5763 _ => {}
5764 }
5765 }
5766
5767 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
5768 if matches!(target, DialectType::ClickHouse) {
5769 if let Expression::Function(ref f) = e {
5770 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
5771 let mut new_f = f.as_ref().clone();
5772 new_f.name = "map".to_string();
5773 return Ok(Expression::Function(Box::new(new_f)));
5774 }
5775 }
5776 }
5777
5778 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
5779 if matches!(target, DialectType::ClickHouse) {
5780 if let Expression::Intersect(ref i) = e {
5781 if i.all {
5782 let mut new_i = (**i).clone();
5783 new_i.all = false;
5784 return Ok(Expression::Intersect(Box::new(new_i)));
5785 }
5786 }
5787 }
5788
5789 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
5790 // Only from Generic source, to prevent double-wrapping
5791 if matches!(source, DialectType::Generic) {
5792 if let Expression::Div(ref op) = e {
5793 let cast_type = match target {
5794 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
5795 precision: None,
5796 scale: None,
5797 real_spelling: false,
5798 }),
5799 DialectType::Drill
5800 | DialectType::Trino
5801 | DialectType::Athena
5802 | DialectType::Presto => Some(DataType::Double {
5803 precision: None,
5804 scale: None,
5805 }),
5806 DialectType::PostgreSQL
5807 | DialectType::Redshift
5808 | DialectType::Materialize
5809 | DialectType::Teradata
5810 | DialectType::RisingWave => Some(DataType::Double {
5811 precision: None,
5812 scale: None,
5813 }),
5814 _ => None,
5815 };
5816 if let Some(dt) = cast_type {
5817 let cast_left = Expression::Cast(Box::new(Cast {
5818 this: op.left.clone(),
5819 to: dt,
5820 double_colon_syntax: false,
5821 trailing_comments: Vec::new(),
5822 format: None,
5823 default: None,
5824 inferred_type: None,
5825 }));
5826 let new_op = crate::expressions::BinaryOp {
5827 left: cast_left,
5828 right: op.right.clone(),
5829 left_comments: op.left_comments.clone(),
5830 operator_comments: op.operator_comments.clone(),
5831 trailing_comments: op.trailing_comments.clone(),
5832 inferred_type: None,
5833 };
5834 return Ok(Expression::Div(Box::new(new_op)));
5835 }
5836 }
5837 }
5838
5839 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
5840 if matches!(target, DialectType::DuckDB) {
5841 if let Expression::CreateDatabase(db) = e {
5842 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
5843 schema.if_not_exists = db.if_not_exists;
5844 return Ok(Expression::CreateSchema(Box::new(schema)));
5845 }
5846 if let Expression::DropDatabase(db) = e {
5847 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
5848 schema.if_exists = db.if_exists;
5849 return Ok(Expression::DropSchema(Box::new(schema)));
5850 }
5851 }
5852
5853 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
5854 if matches!(source, DialectType::ClickHouse)
5855 && !matches!(target, DialectType::ClickHouse)
5856 {
5857 if let Expression::Cast(ref c) = e {
5858 if let DataType::Custom { ref name } = c.to {
5859 if name.len() >= 9
5860 && name[..9].eq_ignore_ascii_case("NULLABLE(")
5861 && name.ends_with(")")
5862 {
5863 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
5864 let inner_upper = inner.to_ascii_uppercase();
5865 let new_dt = match inner_upper.as_str() {
5866 "DATETIME" | "DATETIME64" => DataType::Timestamp {
5867 precision: None,
5868 timezone: false,
5869 },
5870 "DATE" => DataType::Date,
5871 "INT64" | "BIGINT" => DataType::BigInt { length: None },
5872 "INT32" | "INT" | "INTEGER" => DataType::Int {
5873 length: None,
5874 integer_spelling: false,
5875 },
5876 "FLOAT64" | "DOUBLE" => DataType::Double {
5877 precision: None,
5878 scale: None,
5879 },
5880 "STRING" => DataType::Text,
5881 _ => DataType::Custom {
5882 name: inner.to_string(),
5883 },
5884 };
5885 let mut new_cast = c.clone();
5886 new_cast.to = new_dt;
5887 return Ok(Expression::Cast(new_cast));
5888 }
5889 }
5890 }
5891 }
5892
5893 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
5894 if matches!(target, DialectType::Snowflake) {
5895 if let Expression::ArrayConcatAgg(ref agg) = e {
5896 let mut agg_clone = agg.as_ref().clone();
5897 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
5898 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
5899 let flatten = Expression::Function(Box::new(Function::new(
5900 "ARRAY_FLATTEN".to_string(),
5901 vec![array_agg],
5902 )));
5903 return Ok(flatten);
5904 }
5905 }
5906
5907 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
5908 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
5909 if let Expression::ArrayConcatAgg(agg) = e {
5910 let arg = agg.this;
5911 return Ok(Expression::Function(Box::new(Function::new(
5912 "ARRAY_CONCAT_AGG".to_string(),
5913 vec![arg],
5914 ))));
5915 }
5916 }
5917
5918 // Determine what action to take by inspecting e immutably
5919 let action = {
5920 let source_propagates_nulls =
5921 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
5922 let target_ignores_nulls =
5923 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
5924
5925 match &e {
5926 Expression::Function(f) => {
5927 let name = f.name.to_ascii_uppercase();
5928 // DuckDB json(x) is a synonym for CAST(x AS JSON) — parses a string.
5929 // Map to JSON_PARSE(x) for Trino/Presto/Athena to preserve semantics.
5930 if name == "JSON"
5931 && f.args.len() == 1
5932 && matches!(source, DialectType::DuckDB)
5933 && matches!(
5934 target,
5935 DialectType::Presto | DialectType::Trino | DialectType::Athena
5936 )
5937 {
5938 Action::DuckDBJsonFuncToJsonParse
5939 // DuckDB json_valid(x) has no direct Trino equivalent; emit the
5940 // SQL:2016 `x IS JSON` predicate which has matching semantics.
5941 } else if name == "JSON_VALID"
5942 && f.args.len() == 1
5943 && matches!(source, DialectType::DuckDB)
5944 && matches!(
5945 target,
5946 DialectType::Presto | DialectType::Trino | DialectType::Athena
5947 )
5948 {
5949 Action::DuckDBJsonValidToIsJson
5950 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
5951 } else if (name == "DATE_PART" || name == "DATEPART")
5952 && f.args.len() == 2
5953 && matches!(target, DialectType::Snowflake)
5954 && !matches!(source, DialectType::Snowflake)
5955 && matches!(
5956 &f.args[0],
5957 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
5958 )
5959 {
5960 Action::DatePartUnquote
5961 } else if source_propagates_nulls
5962 && target_ignores_nulls
5963 && (name == "GREATEST" || name == "LEAST")
5964 && f.args.len() >= 2
5965 {
5966 Action::GreatestLeastNull
5967 } else if matches!(source, DialectType::Snowflake)
5968 && name == "ARRAY_GENERATE_RANGE"
5969 && f.args.len() >= 2
5970 {
5971 Action::ArrayGenerateRange
5972 } else if matches!(source, DialectType::Snowflake)
5973 && matches!(target, DialectType::DuckDB)
5974 && name == "DATE_TRUNC"
5975 && f.args.len() == 2
5976 {
5977 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
5978 // Logic based on Python sqlglot's input_type_preserved flag:
5979 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
5980 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
5981 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
5982 let unit_str = match &f.args[0] {
5983 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
5984 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
5985 Some(s.to_ascii_uppercase())
5986 }
5987 _ => None,
5988 };
5989 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
5990 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
5991 });
5992 match &f.args[1] {
5993 Expression::Cast(c) => match &c.to {
5994 DataType::Time { .. } => Action::DateTruncWrapCast,
5995 DataType::Custom { name }
5996 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
5997 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
5998 {
5999 Action::DateTruncWrapCast
6000 }
6001 DataType::Timestamp { timezone: true, .. } => {
6002 Action::DateTruncWrapCast
6003 }
6004 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
6005 DataType::Timestamp {
6006 timezone: false, ..
6007 } if is_date_unit => Action::DateTruncWrapCast,
6008 _ => Action::None,
6009 },
6010 _ => Action::None,
6011 }
6012 } else if matches!(source, DialectType::Snowflake)
6013 && matches!(target, DialectType::DuckDB)
6014 && name == "TO_DATE"
6015 && f.args.len() == 1
6016 && !matches!(
6017 &f.args[0],
6018 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
6019 )
6020 {
6021 Action::ToDateToCast
6022 } else if !matches!(source, DialectType::Redshift)
6023 && matches!(target, DialectType::Redshift)
6024 && name == "CONVERT_TIMEZONE"
6025 && (f.args.len() == 2 || f.args.len() == 3)
6026 {
6027 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
6028 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
6029 // The Redshift parser adds 'UTC' as default source_tz, but when
6030 // transpiling from other dialects, we should preserve the original form.
6031 Action::ConvertTimezoneToExpr
6032 } else if matches!(source, DialectType::Snowflake)
6033 && matches!(target, DialectType::DuckDB)
6034 && name == "REGEXP_REPLACE"
6035 && f.args.len() == 4
6036 && !matches!(
6037 &f.args[3],
6038 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
6039 )
6040 {
6041 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
6042 Action::RegexpReplaceSnowflakeToDuckDB
6043 } else if matches!(source, DialectType::Snowflake)
6044 && matches!(target, DialectType::DuckDB)
6045 && name == "REGEXP_REPLACE"
6046 && f.args.len() == 5
6047 {
6048 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB
6049 Action::RegexpReplacePositionSnowflakeToDuckDB
6050 } else if matches!(source, DialectType::Snowflake)
6051 && matches!(target, DialectType::DuckDB)
6052 && name == "REGEXP_SUBSTR"
6053 {
6054 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
6055 Action::RegexpSubstrSnowflakeToDuckDB
6056 } else if matches!(source, DialectType::Snowflake)
6057 && matches!(target, DialectType::Snowflake)
6058 && (name == "REGEXP_SUBSTR" || name == "REGEXP_SUBSTR_ALL")
6059 && f.args.len() == 6
6060 {
6061 // Snowflake identity: strip trailing group=0
6062 Action::RegexpSubstrSnowflakeIdentity
6063 } else if matches!(source, DialectType::Snowflake)
6064 && matches!(target, DialectType::DuckDB)
6065 && name == "REGEXP_SUBSTR_ALL"
6066 {
6067 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
6068 Action::RegexpSubstrAllSnowflakeToDuckDB
6069 } else if matches!(source, DialectType::Snowflake)
6070 && matches!(target, DialectType::DuckDB)
6071 && name == "REGEXP_COUNT"
6072 {
6073 // Snowflake REGEXP_COUNT -> DuckDB LENGTH(REGEXP_EXTRACT_ALL(...))
6074 Action::RegexpCountSnowflakeToDuckDB
6075 } else if matches!(source, DialectType::Snowflake)
6076 && matches!(target, DialectType::DuckDB)
6077 && name == "REGEXP_INSTR"
6078 {
6079 // Snowflake REGEXP_INSTR -> DuckDB complex CASE expression
6080 Action::RegexpInstrSnowflakeToDuckDB
6081 } else if matches!(source, DialectType::BigQuery)
6082 && matches!(target, DialectType::Snowflake)
6083 && name == "REGEXP_EXTRACT_ALL"
6084 {
6085 // BigQuery REGEXP_EXTRACT_ALL -> Snowflake REGEXP_SUBSTR_ALL
6086 Action::RegexpExtractAllToSnowflake
6087 } else if name == "_BQ_TO_HEX" {
6088 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
6089 Action::BigQueryToHexBare
6090 } else if matches!(source, DialectType::BigQuery)
6091 && !matches!(target, DialectType::BigQuery)
6092 {
6093 // BigQuery-specific functions that need to be converted to standard forms
6094 match name.as_str() {
6095 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
6096 | "DATE_DIFF"
6097 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
6098 | "DATETIME_ADD" | "DATETIME_SUB"
6099 | "TIME_ADD" | "TIME_SUB"
6100 | "DATE_ADD" | "DATE_SUB"
6101 | "SAFE_DIVIDE"
6102 | "GENERATE_UUID"
6103 | "COUNTIF"
6104 | "EDIT_DISTANCE"
6105 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
6106 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
6107 | "TO_HEX"
6108 | "TO_JSON_STRING"
6109 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
6110 | "DIV"
6111 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
6112 | "LAST_DAY"
6113 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
6114 | "REGEXP_CONTAINS"
6115 | "CONTAINS_SUBSTR"
6116 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
6117 | "SAFE_CAST"
6118 | "GENERATE_DATE_ARRAY"
6119 | "PARSE_DATE" | "PARSE_TIMESTAMP"
6120 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
6121 | "ARRAY_CONCAT"
6122 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
6123 | "INSTR"
6124 | "MD5" | "SHA1" | "SHA256" | "SHA512"
6125 | "GENERATE_UUID()" // just in case
6126 | "REGEXP_EXTRACT_ALL"
6127 | "REGEXP_EXTRACT"
6128 | "INT64"
6129 | "ARRAY_CONCAT_AGG"
6130 | "DATE_DIFF(" // just in case
6131 | "TO_HEX_MD5" // internal
6132 | "MOD"
6133 | "CONCAT"
6134 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
6135 | "STRUCT"
6136 | "ROUND"
6137 | "MAKE_INTERVAL"
6138 | "ARRAY_TO_STRING"
6139 | "PERCENTILE_CONT"
6140 => Action::BigQueryFunctionNormalize,
6141 "ARRAY" if matches!(target, DialectType::Snowflake)
6142 && f.args.len() == 1
6143 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
6144 => Action::BigQueryArraySelectAsStructToSnowflake,
6145 _ => Action::None,
6146 }
6147 } else if matches!(source, DialectType::BigQuery)
6148 && matches!(target, DialectType::BigQuery)
6149 {
6150 // BigQuery -> BigQuery normalizations
6151 match name.as_str() {
6152 "TIMESTAMP_DIFF"
6153 | "DATETIME_DIFF"
6154 | "TIME_DIFF"
6155 | "DATE_DIFF"
6156 | "DATE_ADD"
6157 | "TO_HEX"
6158 | "CURRENT_TIMESTAMP"
6159 | "CURRENT_DATE"
6160 | "CURRENT_TIME"
6161 | "CURRENT_DATETIME"
6162 | "GENERATE_DATE_ARRAY"
6163 | "INSTR"
6164 | "FORMAT_DATETIME"
6165 | "DATETIME"
6166 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
6167 _ => Action::None,
6168 }
6169 } else {
6170 // Generic function normalization for non-BigQuery sources
6171 match name.as_str() {
6172 "ARBITRARY" | "AGGREGATE"
6173 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
6174 | "STRUCT_EXTRACT"
6175 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
6176 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
6177 | "SUBSTRINGINDEX"
6178 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
6179 | "UNICODE"
6180 | "XOR"
6181 | "ARRAY_REVERSE_SORT"
6182 | "ENCODE" | "DECODE"
6183 | "QUANTILE"
6184 | "EPOCH" | "EPOCH_MS"
6185 | "HASHBYTES"
6186 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
6187 | "APPROX_DISTINCT"
6188 | "DATE_PARSE" | "FORMAT_DATETIME"
6189 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
6190 | "RLIKE"
6191 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
6192 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
6193 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
6194 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
6195 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
6196 | "MAP" | "MAP_FROM_ENTRIES"
6197 | "COLLECT_LIST" | "COLLECT_SET"
6198 | "ISNAN" | "IS_NAN"
6199 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
6200 | "FORMAT_NUMBER"
6201 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
6202 | "ELEMENT_AT"
6203 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
6204 | "SPLIT_PART"
6205 // GENERATE_SERIES: handled separately below
6206 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
6207 | "JSON_QUERY" | "JSON_VALUE"
6208 | "JSON_SEARCH"
6209 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
6210 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
6211 | "CURDATE" | "CURTIME"
6212 | "ARRAY_TO_STRING"
6213 | "ARRAY_SORT" | "SORT_ARRAY"
6214 | "LEFT" | "RIGHT"
6215 | "MAP_FROM_ARRAYS"
6216 | "LIKE" | "ILIKE"
6217 | "ARRAY_CONCAT" | "LIST_CONCAT"
6218 | "QUANTILE_CONT" | "QUANTILE_DISC"
6219 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
6220 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
6221 | "LOCATE" | "STRPOS" | "INSTR"
6222 | "CHAR"
6223 // CONCAT: handled separately for COALESCE wrapping
6224 | "ARRAY_JOIN"
6225 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
6226 | "ISNULL"
6227 | "MONTHNAME"
6228 | "TO_TIMESTAMP"
6229 | "TO_DATE"
6230 | "TO_JSON"
6231 | "REGEXP_SPLIT"
6232 | "SPLIT"
6233 | "FORMATDATETIME"
6234 | "ARRAYJOIN"
6235 | "SPLITBYSTRING" | "SPLITBYREGEXP"
6236 | "NVL"
6237 | "TO_CHAR"
6238 | "DBMS_RANDOM.VALUE"
6239 | "REGEXP_LIKE"
6240 | "REPLICATE"
6241 | "LEN"
6242 | "COUNT_BIG"
6243 | "DATEFROMPARTS"
6244 | "DATETIMEFROMPARTS"
6245 | "CONVERT" | "TRY_CONVERT"
6246 | "STRFTIME" | "STRPTIME"
6247 | "DATE_FORMAT" | "FORMAT_DATE"
6248 | "PARSE_TIMESTAMP" | "PARSE_DATE"
6249 | "FROM_BASE64" | "TO_BASE64"
6250 | "GETDATE"
6251 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
6252 | "TO_UTF8" | "FROM_UTF8"
6253 | "STARTS_WITH" | "STARTSWITH"
6254 | "APPROX_COUNT_DISTINCT"
6255 | "JSON_FORMAT"
6256 | "SYSDATE"
6257 | "LOGICAL_OR" | "LOGICAL_AND"
6258 | "MONTHS_ADD"
6259 | "SCHEMA_NAME"
6260 | "STRTOL"
6261 | "EDITDIST3"
6262 | "FORMAT"
6263 | "LIST_CONTAINS" | "LIST_HAS"
6264 | "VARIANCE" | "STDDEV"
6265 | "ISINF"
6266 | "TO_UNIXTIME"
6267 | "FROM_UNIXTIME"
6268 | "DATEPART" | "DATE_PART"
6269 | "DATENAME"
6270 | "STRING_AGG"
6271 | "JSON_ARRAYAGG"
6272 | "APPROX_QUANTILE"
6273 | "MAKE_DATE"
6274 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
6275 | "RANGE"
6276 | "TRY_ELEMENT_AT"
6277 | "STR_TO_MAP"
6278 | "STRING"
6279 | "STR_TO_TIME"
6280 | "CURRENT_SCHEMA"
6281 | "LTRIM" | "RTRIM"
6282 | "UUID"
6283 | "FARM_FINGERPRINT"
6284 | "JSON_KEYS"
6285 | "WEEKOFYEAR"
6286 | "CONCAT_WS"
6287 | "ARRAY_SLICE"
6288 | "ARRAY_PREPEND"
6289 | "ARRAY_REMOVE"
6290 | "GENERATE_DATE_ARRAY"
6291 | "PARSE_JSON"
6292 | "JSON_REMOVE"
6293 | "JSON_SET"
6294 | "LEVENSHTEIN"
6295 | "CURRENT_VERSION"
6296 | "ARRAY_MAX"
6297 | "ARRAY_MIN"
6298 | "JAROWINKLER_SIMILARITY"
6299 | "CURRENT_SCHEMAS"
6300 | "TO_VARIANT"
6301 | "JSON_GROUP_ARRAY" | "JSON_GROUP_OBJECT"
6302 | "ARRAYS_OVERLAP" | "ARRAY_INTERSECTION"
6303 => Action::GenericFunctionNormalize,
6304 // Canonical date functions -> dialect-specific
6305 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
6306 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
6307 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
6308 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
6309 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
6310 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
6311 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
6312 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
6313 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
6314 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
6315 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
6316 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
6317 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
6318 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
6319 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
6320 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
6321 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
6322 // STR_TO_DATE(x, fmt) -> dialect-specific
6323 "STR_TO_DATE" if f.args.len() == 2
6324 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
6325 "STR_TO_DATE" => Action::GenericFunctionNormalize,
6326 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
6327 "TS_OR_DS_ADD" if f.args.len() == 3
6328 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
6329 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
6330 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
6331 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
6332 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
6333 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
6334 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
6335 // IS_ASCII(x) -> dialect-specific
6336 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
6337 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
6338 "STR_POSITION" => Action::StrPositionConvert,
6339 // ARRAY_SUM -> dialect-specific
6340 "ARRAY_SUM" => Action::ArraySumConvert,
6341 // ARRAY_SIZE -> dialect-specific (Drill only)
6342 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
6343 // ARRAY_ANY -> dialect-specific
6344 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
6345 // Functions needing specific cross-dialect transforms
6346 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
6347 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
6348 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
6349 "ARRAY" if matches!(source, DialectType::BigQuery)
6350 && matches!(target, DialectType::Snowflake)
6351 && f.args.len() == 1
6352 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
6353 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
6354 "TRUNC" if f.args.len() == 2 && matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
6355 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 && !f.args.get(1).map_or(false, |a| matches!(a, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) => Action::GenericFunctionNormalize,
6356 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
6357 "DATE_TRUNC" if f.args.len() == 2
6358 && matches!(source, DialectType::Generic)
6359 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
6360 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
6361 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
6362 "TIMESTAMP_TRUNC" if f.args.len() >= 2
6363 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
6364 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
6365 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
6366 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6367 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
6368 // GENERATE_SERIES with interval normalization for PG target
6369 "GENERATE_SERIES" if f.args.len() >= 3
6370 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6371 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
6372 "GENERATE_SERIES" => Action::None, // passthrough for other cases
6373 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
6374 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6375 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
6376 "CONCAT" => Action::GenericFunctionNormalize,
6377 // DIV(a, b) -> target-specific integer division
6378 "DIV" if f.args.len() == 2
6379 && matches!(source, DialectType::PostgreSQL)
6380 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
6381 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
6382 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
6383 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
6384 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
6385 "JSONB_EXISTS" if f.args.len() == 2
6386 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
6387 // DATE_BIN -> TIME_BUCKET for DuckDB
6388 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
6389 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
6390 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
6391 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
6392 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
6393 // ClickHouse any -> ANY_VALUE for other dialects
6394 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
6395 _ => Action::None,
6396 }
6397 }
6398 }
6399 Expression::AggregateFunction(af) => {
6400 let name = af.name.to_ascii_uppercase();
6401 match name.as_str() {
6402 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
6403 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
6404 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
6405 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
6406 if matches!(target, DialectType::DuckDB) =>
6407 {
6408 Action::JsonObjectAggConvert
6409 }
6410 "ARRAY_AGG"
6411 if matches!(
6412 target,
6413 DialectType::Hive
6414 | DialectType::Spark
6415 | DialectType::Databricks
6416 ) =>
6417 {
6418 Action::ArrayAggToCollectList
6419 }
6420 "MAX_BY" | "MIN_BY"
6421 if matches!(
6422 target,
6423 DialectType::ClickHouse
6424 | DialectType::Spark
6425 | DialectType::Databricks
6426 | DialectType::DuckDB
6427 ) =>
6428 {
6429 Action::MaxByMinByConvert
6430 }
6431 "COLLECT_LIST"
6432 if matches!(
6433 target,
6434 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
6435 ) =>
6436 {
6437 Action::CollectListToArrayAgg
6438 }
6439 "COLLECT_SET"
6440 if matches!(
6441 target,
6442 DialectType::Presto
6443 | DialectType::Trino
6444 | DialectType::Snowflake
6445 | DialectType::DuckDB
6446 ) =>
6447 {
6448 Action::CollectSetConvert
6449 }
6450 "PERCENTILE"
6451 if matches!(
6452 target,
6453 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6454 ) =>
6455 {
6456 Action::PercentileConvert
6457 }
6458 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
6459 "CORR"
6460 if matches!(target, DialectType::DuckDB)
6461 && matches!(source, DialectType::Snowflake) =>
6462 {
6463 Action::CorrIsnanWrap
6464 }
6465 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6466 "APPROX_QUANTILES"
6467 if matches!(source, DialectType::BigQuery)
6468 && matches!(target, DialectType::DuckDB) =>
6469 {
6470 Action::BigQueryApproxQuantiles
6471 }
6472 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
6473 "PERCENTILE_CONT"
6474 if matches!(source, DialectType::BigQuery)
6475 && matches!(target, DialectType::DuckDB)
6476 && af.args.len() >= 2 =>
6477 {
6478 Action::BigQueryPercentileContToDuckDB
6479 }
6480 _ => Action::None,
6481 }
6482 }
6483 Expression::JSONArrayAgg(_) => match target {
6484 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
6485 _ => Action::None,
6486 },
6487 Expression::ToNumber(tn) => {
6488 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
6489 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
6490 match target {
6491 DialectType::Oracle
6492 | DialectType::Snowflake
6493 | DialectType::Teradata => Action::None,
6494 _ => Action::GenericFunctionNormalize,
6495 }
6496 } else {
6497 Action::None
6498 }
6499 }
6500 Expression::Nvl2(_) => {
6501 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
6502 // Keep as NVL2 for dialects that support it natively
6503 match target {
6504 DialectType::Oracle
6505 | DialectType::Snowflake
6506 | DialectType::Teradata
6507 | DialectType::Spark
6508 | DialectType::Databricks
6509 | DialectType::Redshift => Action::None,
6510 _ => Action::Nvl2Expand,
6511 }
6512 }
6513 Expression::Decode(_) | Expression::DecodeCase(_) => {
6514 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
6515 // Keep as DECODE for Oracle/Snowflake
6516 match target {
6517 DialectType::Oracle | DialectType::Snowflake => Action::None,
6518 _ => Action::DecodeSimplify,
6519 }
6520 }
6521 Expression::Coalesce(ref cf) => {
6522 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
6523 // BigQuery keeps IFNULL natively when source is also BigQuery
6524 if cf.original_name.as_deref() == Some("IFNULL")
6525 && !(matches!(source, DialectType::BigQuery)
6526 && matches!(target, DialectType::BigQuery))
6527 {
6528 Action::IfnullToCoalesce
6529 } else {
6530 Action::None
6531 }
6532 }
6533 Expression::IfFunc(if_func) => {
6534 if matches!(source, DialectType::Snowflake)
6535 && matches!(
6536 target,
6537 DialectType::Presto | DialectType::Trino | DialectType::SQLite
6538 )
6539 && matches!(if_func.false_value, Some(Expression::Div(_)))
6540 {
6541 Action::Div0TypedDivision
6542 } else {
6543 Action::None
6544 }
6545 }
6546 Expression::ToJson(_) => match target {
6547 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
6548 DialectType::BigQuery => Action::ToJsonConvert,
6549 DialectType::DuckDB => Action::ToJsonConvert,
6550 _ => Action::None,
6551 },
6552 Expression::ArrayAgg(ref agg) => {
6553 if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
6554 Action::ArrayAggToGroupConcat
6555 } else if matches!(
6556 target,
6557 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6558 ) {
6559 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
6560 Action::ArrayAggToCollectList
6561 } else if matches!(
6562 source,
6563 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6564 ) && matches!(target, DialectType::DuckDB)
6565 && agg.filter.is_some()
6566 {
6567 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
6568 // Need to add NOT x IS NULL to existing filter
6569 Action::ArrayAggNullFilter
6570 } else if matches!(target, DialectType::DuckDB)
6571 && agg.ignore_nulls == Some(true)
6572 && !agg.order_by.is_empty()
6573 {
6574 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
6575 Action::ArrayAggIgnoreNullsDuckDB
6576 } else if !matches!(source, DialectType::Snowflake) {
6577 Action::None
6578 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6579 let is_array_agg = agg.name.as_deref().map_or(false, |n| n.eq_ignore_ascii_case("ARRAY_AGG"))
6580 || agg.name.is_none();
6581 if is_array_agg {
6582 Action::ArrayAggCollectList
6583 } else {
6584 Action::None
6585 }
6586 } else if matches!(
6587 target,
6588 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6589 ) && agg.filter.is_none()
6590 {
6591 Action::ArrayAggFilter
6592 } else {
6593 Action::None
6594 }
6595 }
6596 Expression::WithinGroup(wg) => {
6597 if matches!(source, DialectType::Snowflake)
6598 && matches!(
6599 target,
6600 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6601 )
6602 && matches!(wg.this, Expression::ArrayAgg(_))
6603 {
6604 Action::ArrayAggWithinGroupFilter
6605 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
6606 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
6607 || matches!(&wg.this, Expression::StringAgg(_))
6608 {
6609 Action::StringAggConvert
6610 } else if matches!(
6611 target,
6612 DialectType::Presto
6613 | DialectType::Trino
6614 | DialectType::Athena
6615 | DialectType::Spark
6616 | DialectType::Databricks
6617 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
6618 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
6619 || matches!(&wg.this, Expression::PercentileCont(_)))
6620 {
6621 Action::PercentileContConvert
6622 } else {
6623 Action::None
6624 }
6625 }
6626 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
6627 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
6628 // DATETIME is the timezone-unaware type
6629 Expression::Cast(ref c) => {
6630 if c.format.is_some()
6631 && (matches!(source, DialectType::BigQuery)
6632 || matches!(source, DialectType::Teradata))
6633 {
6634 Action::BigQueryCastFormat
6635 } else if matches!(target, DialectType::BigQuery)
6636 && !matches!(source, DialectType::BigQuery)
6637 && matches!(
6638 c.to,
6639 DataType::Timestamp {
6640 timezone: false,
6641 ..
6642 }
6643 )
6644 {
6645 Action::CastTimestampToDatetime
6646 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
6647 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
6648 && matches!(
6649 c.to,
6650 DataType::Timestamp {
6651 timezone: false,
6652 ..
6653 }
6654 )
6655 {
6656 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
6657 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
6658 Action::CastTimestampToDatetime
6659 } else if matches!(
6660 source,
6661 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6662 ) && matches!(
6663 target,
6664 DialectType::Presto
6665 | DialectType::Trino
6666 | DialectType::Athena
6667 | DialectType::DuckDB
6668 | DialectType::Snowflake
6669 | DialectType::BigQuery
6670 | DialectType::Databricks
6671 | DialectType::TSQL
6672 ) {
6673 Action::HiveCastToTryCast
6674 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6675 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
6676 {
6677 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
6678 Action::CastTimestamptzToFunc
6679 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6680 && matches!(
6681 target,
6682 DialectType::Hive
6683 | DialectType::Spark
6684 | DialectType::Databricks
6685 | DialectType::BigQuery
6686 )
6687 {
6688 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
6689 Action::CastTimestampStripTz
6690 } else if matches!(&c.to, DataType::Json)
6691 && matches!(&c.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
6692 && matches!(
6693 target,
6694 DialectType::Presto
6695 | DialectType::Trino
6696 | DialectType::Athena
6697 | DialectType::Snowflake
6698 )
6699 {
6700 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
6701 // Only when the input is a string literal (JSON 'value' syntax)
6702 Action::JsonLiteralToJsonParse
6703 } else if matches!(&c.to, DataType::Json)
6704 && matches!(source, DialectType::DuckDB)
6705 && matches!(
6706 target,
6707 DialectType::Presto | DialectType::Trino | DialectType::Athena
6708 )
6709 {
6710 // DuckDB's CAST(x AS JSON) parses the string value into a JSON value.
6711 // Trino/Presto/Athena's CAST(x AS JSON) instead wraps the value as a
6712 // JSON string (no parsing) — different semantics. Use JSON_PARSE(x)
6713 // in the target to preserve DuckDB's parse semantics.
6714 Action::JsonLiteralToJsonParse
6715 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
6716 && matches!(target, DialectType::Spark | DialectType::Databricks)
6717 {
6718 // CAST(x AS JSON) -> TO_JSON(x) for Spark
6719 Action::CastToJsonForSpark
6720 } else if (matches!(
6721 &c.to,
6722 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
6723 )) && matches!(
6724 target,
6725 DialectType::Spark | DialectType::Databricks
6726 ) && (matches!(&c.this, Expression::ParseJson(_))
6727 || matches!(
6728 &c.this,
6729 Expression::Function(f)
6730 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
6731 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
6732 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
6733 ))
6734 {
6735 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
6736 // -> FROM_JSON(..., type_string) for Spark
6737 Action::CastJsonToFromJson
6738 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6739 && matches!(
6740 c.to,
6741 DataType::Timestamp {
6742 timezone: false,
6743 ..
6744 }
6745 )
6746 && matches!(source, DialectType::DuckDB)
6747 {
6748 Action::StrftimeCastTimestamp
6749 } else if matches!(source, DialectType::DuckDB)
6750 && matches!(
6751 c.to,
6752 DataType::Decimal {
6753 precision: None,
6754 ..
6755 }
6756 )
6757 {
6758 Action::DecimalDefaultPrecision
6759 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
6760 && matches!(c.to, DataType::Char { length: None })
6761 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
6762 {
6763 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
6764 Action::MysqlCastCharToText
6765 } else if matches!(
6766 source,
6767 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6768 ) && matches!(
6769 target,
6770 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6771 ) && Self::has_varchar_char_type(&c.to)
6772 {
6773 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
6774 Action::SparkCastVarcharToString
6775 } else {
6776 Action::None
6777 }
6778 }
6779 Expression::SafeCast(ref c) => {
6780 if c.format.is_some()
6781 && matches!(source, DialectType::BigQuery)
6782 && !matches!(target, DialectType::BigQuery)
6783 {
6784 Action::BigQueryCastFormat
6785 } else {
6786 Action::None
6787 }
6788 }
6789 Expression::TryCast(ref c) => {
6790 if matches!(&c.to, DataType::Json)
6791 && matches!(source, DialectType::DuckDB)
6792 && matches!(
6793 target,
6794 DialectType::Presto | DialectType::Trino | DialectType::Athena
6795 )
6796 {
6797 // DuckDB's TRY_CAST(x AS JSON) tries to parse x as JSON, returning
6798 // NULL on parse failure. Trino/Presto/Athena's TRY_CAST(x AS JSON)
6799 // wraps the value as a JSON string (no parse). Emit TRY(JSON_PARSE(x))
6800 // to preserve DuckDB's parse-or-null semantics.
6801 Action::DuckDBTryCastJsonToTryJsonParse
6802 } else {
6803 Action::None
6804 }
6805 }
6806 // For DuckDB: DATE_TRUNC should preserve the input type
6807 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
6808 if matches!(source, DialectType::Snowflake)
6809 && matches!(target, DialectType::DuckDB)
6810 {
6811 Action::DateTruncWrapCast
6812 } else {
6813 Action::None
6814 }
6815 }
6816 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
6817 Expression::SetStatement(s) => {
6818 if matches!(target, DialectType::DuckDB)
6819 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
6820 && s.items.iter().any(|item| item.kind.is_none())
6821 {
6822 Action::SetToVariable
6823 } else {
6824 Action::None
6825 }
6826 }
6827 // Cross-dialect NULL ordering normalization.
6828 // When nulls_first is not specified, fill in the source dialect's implied
6829 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
6830 Expression::Ordered(o) => {
6831 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
6832 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
6833 Action::MysqlNullsOrdering
6834 } else {
6835 // Skip targets that don't support NULLS FIRST/LAST syntax
6836 let target_supports_nulls = !matches!(
6837 target,
6838 DialectType::MySQL
6839 | DialectType::TSQL
6840 | DialectType::StarRocks
6841 | DialectType::Doris
6842 );
6843 if o.nulls_first.is_none() && source != target && target_supports_nulls
6844 {
6845 Action::NullsOrdering
6846 } else {
6847 Action::None
6848 }
6849 }
6850 }
6851 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
6852 Expression::DataType(dt) => {
6853 if matches!(source, DialectType::BigQuery)
6854 && !matches!(target, DialectType::BigQuery)
6855 {
6856 match dt {
6857 DataType::Custom { ref name }
6858 if name.eq_ignore_ascii_case("INT64")
6859 || name.eq_ignore_ascii_case("FLOAT64")
6860 || name.eq_ignore_ascii_case("BOOL")
6861 || name.eq_ignore_ascii_case("BYTES")
6862 || name.eq_ignore_ascii_case("NUMERIC")
6863 || name.eq_ignore_ascii_case("STRING")
6864 || name.eq_ignore_ascii_case("DATETIME") =>
6865 {
6866 Action::BigQueryCastType
6867 }
6868 _ => Action::None,
6869 }
6870 } else if matches!(source, DialectType::TSQL) {
6871 // For TSQL source -> any target (including TSQL itself for REAL)
6872 match dt {
6873 // REAL -> FLOAT even for TSQL->TSQL
6874 DataType::Custom { ref name }
6875 if name.eq_ignore_ascii_case("REAL") =>
6876 {
6877 Action::TSQLTypeNormalize
6878 }
6879 DataType::Float {
6880 real_spelling: true,
6881 ..
6882 } => Action::TSQLTypeNormalize,
6883 // Other TSQL type normalizations only for non-TSQL targets
6884 DataType::Custom { ref name }
6885 if !matches!(target, DialectType::TSQL)
6886 && (name.eq_ignore_ascii_case("MONEY")
6887 || name.eq_ignore_ascii_case("SMALLMONEY")
6888 || name.eq_ignore_ascii_case("DATETIME2")
6889 || name.eq_ignore_ascii_case("IMAGE")
6890 || name.eq_ignore_ascii_case("BIT")
6891 || name.eq_ignore_ascii_case("ROWVERSION")
6892 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
6893 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
6894 || (name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC"))
6895 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("DATETIME2("))
6896 || (name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME("))) =>
6897 {
6898 Action::TSQLTypeNormalize
6899 }
6900 DataType::Float {
6901 precision: Some(_), ..
6902 } if !matches!(target, DialectType::TSQL) => {
6903 Action::TSQLTypeNormalize
6904 }
6905 DataType::TinyInt { .. }
6906 if !matches!(target, DialectType::TSQL) =>
6907 {
6908 Action::TSQLTypeNormalize
6909 }
6910 // INTEGER -> INT for Databricks/Spark targets
6911 DataType::Int {
6912 integer_spelling: true,
6913 ..
6914 } if matches!(
6915 target,
6916 DialectType::Databricks | DialectType::Spark
6917 ) =>
6918 {
6919 Action::TSQLTypeNormalize
6920 }
6921 _ => Action::None,
6922 }
6923 } else if (matches!(source, DialectType::Oracle)
6924 || matches!(source, DialectType::Generic))
6925 && !matches!(target, DialectType::Oracle)
6926 {
6927 match dt {
6928 DataType::Custom { ref name }
6929 if (name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2("))
6930 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2("))
6931 || name.eq_ignore_ascii_case("VARCHAR2")
6932 || name.eq_ignore_ascii_case("NVARCHAR2") =>
6933 {
6934 Action::OracleVarchar2ToVarchar
6935 }
6936 _ => Action::None,
6937 }
6938 } else if matches!(target, DialectType::Snowflake)
6939 && !matches!(source, DialectType::Snowflake)
6940 {
6941 // When target is Snowflake but source is NOT Snowflake,
6942 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
6943 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
6944 // should keep their FLOAT spelling.
6945 match dt {
6946 DataType::Float { .. } => Action::SnowflakeFloatProtect,
6947 _ => Action::None,
6948 }
6949 } else {
6950 Action::None
6951 }
6952 }
6953 // LOWER patterns from BigQuery TO_HEX conversions:
6954 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
6955 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
6956 Expression::Lower(uf) => {
6957 if matches!(source, DialectType::BigQuery) {
6958 match &uf.this {
6959 Expression::Lower(_) => Action::BigQueryToHexLower,
6960 Expression::Function(f)
6961 if f.name == "TO_HEX"
6962 && matches!(target, DialectType::BigQuery) =>
6963 {
6964 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
6965 Action::BigQueryToHexLower
6966 }
6967 _ => Action::None,
6968 }
6969 } else {
6970 Action::None
6971 }
6972 }
6973 // UPPER patterns from BigQuery TO_HEX conversions:
6974 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
6975 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
6976 Expression::Upper(uf) => {
6977 if matches!(source, DialectType::BigQuery) {
6978 match &uf.this {
6979 Expression::Lower(_) => Action::BigQueryToHexUpper,
6980 _ => Action::None,
6981 }
6982 } else {
6983 Action::None
6984 }
6985 }
6986 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
6987 // Snowflake supports LAST_DAY with unit, so keep it there
6988 Expression::LastDay(ld) => {
6989 if matches!(source, DialectType::BigQuery)
6990 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
6991 && ld.unit.is_some()
6992 {
6993 Action::BigQueryLastDayStripUnit
6994 } else {
6995 Action::None
6996 }
6997 }
6998 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
6999 Expression::SafeDivide(_) => {
7000 if matches!(source, DialectType::BigQuery)
7001 && !matches!(target, DialectType::BigQuery)
7002 {
7003 Action::BigQuerySafeDivide
7004 } else {
7005 Action::None
7006 }
7007 }
7008 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
7009 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
7010 Expression::AnyValue(ref agg) => {
7011 if matches!(source, DialectType::BigQuery)
7012 && matches!(target, DialectType::DuckDB)
7013 && agg.having_max.is_some()
7014 {
7015 Action::BigQueryAnyValueHaving
7016 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
7017 && !matches!(source, DialectType::Spark | DialectType::Databricks)
7018 && agg.ignore_nulls.is_none()
7019 {
7020 Action::AnyValueIgnoreNulls
7021 } else {
7022 Action::None
7023 }
7024 }
7025 Expression::Any(ref q) => {
7026 if matches!(source, DialectType::PostgreSQL)
7027 && matches!(
7028 target,
7029 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7030 )
7031 && q.op.is_some()
7032 && !matches!(
7033 q.subquery,
7034 Expression::Select(_) | Expression::Subquery(_)
7035 )
7036 {
7037 Action::AnyToExists
7038 } else {
7039 Action::None
7040 }
7041 }
7042 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
7043 // Snowflake RLIKE does full-string match; DuckDB REGEXP_FULL_MATCH also does full-string match
7044 Expression::RegexpLike(_)
7045 if matches!(source, DialectType::Snowflake)
7046 && matches!(target, DialectType::DuckDB) =>
7047 {
7048 Action::RlikeSnowflakeToDuckDB
7049 }
7050 // RegexpLike from non-DuckDB/non-Snowflake sources -> REGEXP_MATCHES for DuckDB target
7051 Expression::RegexpLike(_)
7052 if !matches!(source, DialectType::DuckDB)
7053 && matches!(target, DialectType::DuckDB) =>
7054 {
7055 Action::RegexpLikeToDuckDB
7056 }
7057 // RegexpLike -> Exasol: anchor pattern with .*...*
7058 Expression::RegexpLike(_)
7059 if matches!(target, DialectType::Exasol) =>
7060 {
7061 Action::RegexpLikeExasolAnchor
7062 }
7063 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
7064 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
7065 Expression::Div(ref op)
7066 if matches!(
7067 source,
7068 DialectType::MySQL
7069 | DialectType::DuckDB
7070 | DialectType::SingleStore
7071 | DialectType::TiDB
7072 | DialectType::ClickHouse
7073 | DialectType::Doris
7074 ) && matches!(
7075 target,
7076 DialectType::PostgreSQL
7077 | DialectType::Redshift
7078 | DialectType::Drill
7079 | DialectType::Trino
7080 | DialectType::Presto
7081 | DialectType::Athena
7082 | DialectType::TSQL
7083 | DialectType::Teradata
7084 | DialectType::SQLite
7085 | DialectType::BigQuery
7086 | DialectType::Snowflake
7087 | DialectType::Databricks
7088 | DialectType::Oracle
7089 | DialectType::Materialize
7090 | DialectType::RisingWave
7091 ) =>
7092 {
7093 // Only wrap if RHS is not already NULLIF
7094 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
7095 {
7096 Action::MySQLSafeDivide
7097 } else {
7098 Action::None
7099 }
7100 }
7101 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
7102 // For TSQL/Fabric, convert to sp_rename instead
7103 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
7104 if let Some(crate::expressions::AlterTableAction::RenameTable(
7105 ref new_tbl,
7106 )) = at.actions.first()
7107 {
7108 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
7109 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
7110 Action::AlterTableToSpRename
7111 } else if new_tbl.schema.is_some()
7112 && matches!(
7113 target,
7114 DialectType::BigQuery
7115 | DialectType::Doris
7116 | DialectType::StarRocks
7117 | DialectType::DuckDB
7118 | DialectType::PostgreSQL
7119 | DialectType::Redshift
7120 )
7121 {
7122 Action::AlterTableRenameStripSchema
7123 } else {
7124 Action::None
7125 }
7126 } else {
7127 Action::None
7128 }
7129 }
7130 // EPOCH(x) expression -> target-specific epoch conversion
7131 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
7132 Action::EpochConvert
7133 }
7134 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
7135 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
7136 Action::EpochMsConvert
7137 }
7138 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
7139 Expression::StringAgg(_) => {
7140 if matches!(
7141 target,
7142 DialectType::MySQL
7143 | DialectType::SingleStore
7144 | DialectType::Doris
7145 | DialectType::StarRocks
7146 | DialectType::SQLite
7147 ) {
7148 Action::StringAggConvert
7149 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
7150 Action::StringAggConvert
7151 } else {
7152 Action::None
7153 }
7154 }
7155 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
7156 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
7157 Expression::GroupConcat(_) => Action::GroupConcatConvert,
7158 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
7159 // DuckDB CARDINALITY -> keep as CARDINALITY for DuckDB target (used for maps)
7160 Expression::Cardinality(_)
7161 if matches!(source, DialectType::DuckDB)
7162 && matches!(target, DialectType::DuckDB) =>
7163 {
7164 Action::None
7165 }
7166 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
7167 Action::ArrayLengthConvert
7168 }
7169 Expression::ArraySize(_) => {
7170 if matches!(target, DialectType::Drill) {
7171 Action::ArraySizeDrill
7172 } else {
7173 Action::ArrayLengthConvert
7174 }
7175 }
7176 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
7177 Expression::ArrayRemove(_) => match target {
7178 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
7179 Action::ArrayRemoveConvert
7180 }
7181 _ => Action::None,
7182 },
7183 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
7184 Expression::ArrayReverse(_) => match target {
7185 DialectType::ClickHouse => Action::ArrayReverseConvert,
7186 _ => Action::None,
7187 },
7188 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
7189 Expression::JsonKeys(_) => match target {
7190 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
7191 Action::JsonKeysConvert
7192 }
7193 _ => Action::None,
7194 },
7195 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
7196 Expression::ParseJson(_) => match target {
7197 DialectType::SQLite
7198 | DialectType::Doris
7199 | DialectType::MySQL
7200 | DialectType::StarRocks => Action::ParseJsonStrip,
7201 _ => Action::None,
7202 },
7203 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
7204 Expression::WeekOfYear(_)
7205 if matches!(target, DialectType::Snowflake)
7206 && !matches!(source, DialectType::Snowflake) =>
7207 {
7208 Action::WeekOfYearToWeekIso
7209 }
7210 // NVL: clear original_name so generator uses dialect-specific function names
7211 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
7212 // XOR: expand for dialects that don't support the XOR keyword
7213 Expression::Xor(_) => {
7214 let target_supports_xor = matches!(
7215 target,
7216 DialectType::MySQL
7217 | DialectType::SingleStore
7218 | DialectType::Doris
7219 | DialectType::StarRocks
7220 );
7221 if !target_supports_xor {
7222 Action::XorExpand
7223 } else {
7224 Action::None
7225 }
7226 }
7227 // TSQL #table -> temp table normalization (CREATE TABLE)
7228 Expression::CreateTable(ct)
7229 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7230 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
7231 && ct.name.name.name.starts_with('#') =>
7232 {
7233 Action::TempTableHash
7234 }
7235 // TSQL #table -> strip # from table references in SELECT/etc.
7236 Expression::Table(tr)
7237 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7238 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
7239 && tr.name.name.starts_with('#') =>
7240 {
7241 Action::TempTableHash
7242 }
7243 // TSQL #table -> strip # from DROP TABLE names
7244 Expression::DropTable(ref dt)
7245 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7246 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
7247 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
7248 {
7249 Action::TempTableHash
7250 }
7251 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
7252 Expression::JsonExtract(_)
7253 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
7254 {
7255 Action::JsonExtractToTsql
7256 }
7257 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
7258 Expression::JsonExtractScalar(_)
7259 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
7260 {
7261 Action::JsonExtractToTsql
7262 }
7263 // JSON_EXTRACT -> JSONExtractString for ClickHouse
7264 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
7265 Action::JsonExtractToClickHouse
7266 }
7267 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
7268 Expression::JsonExtractScalar(_)
7269 if matches!(target, DialectType::ClickHouse) =>
7270 {
7271 Action::JsonExtractToClickHouse
7272 }
7273 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
7274 Expression::JsonExtract(ref f)
7275 if !f.arrow_syntax
7276 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
7277 {
7278 Action::JsonExtractToArrow
7279 }
7280 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
7281 Expression::JsonExtract(ref f)
7282 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
7283 && !matches!(
7284 source,
7285 DialectType::PostgreSQL
7286 | DialectType::Redshift
7287 | DialectType::Materialize
7288 )
7289 && matches!(&f.path, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$'))) =>
7290 {
7291 Action::JsonExtractToGetJsonObject
7292 }
7293 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
7294 Expression::JsonExtract(_)
7295 if matches!(
7296 target,
7297 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7298 ) =>
7299 {
7300 Action::JsonExtractToGetJsonObject
7301 }
7302 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
7303 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
7304 Expression::JsonExtractScalar(ref f)
7305 if !f.arrow_syntax
7306 && !f.hash_arrow_syntax
7307 && matches!(
7308 target,
7309 DialectType::PostgreSQL
7310 | DialectType::Redshift
7311 | DialectType::Snowflake
7312 | DialectType::SQLite
7313 | DialectType::DuckDB
7314 ) =>
7315 {
7316 Action::JsonExtractScalarConvert
7317 }
7318 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
7319 Expression::JsonExtractScalar(_)
7320 if matches!(
7321 target,
7322 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7323 ) =>
7324 {
7325 Action::JsonExtractScalarToGetJsonObject
7326 }
7327 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
7328 Expression::JsonExtract(ref f)
7329 if !f.arrow_syntax
7330 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
7331 {
7332 Action::JsonPathNormalize
7333 }
7334 // JsonQuery (parsed JSON_QUERY) -> target-specific
7335 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
7336 // JsonValue (parsed JSON_VALUE) -> target-specific
7337 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
7338 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
7339 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
7340 Expression::AtTimeZone(_)
7341 if matches!(
7342 target,
7343 DialectType::Presto
7344 | DialectType::Trino
7345 | DialectType::Athena
7346 | DialectType::Spark
7347 | DialectType::Databricks
7348 | DialectType::BigQuery
7349 | DialectType::Snowflake
7350 ) =>
7351 {
7352 Action::AtTimeZoneConvert
7353 }
7354 // DAY_OF_WEEK -> dialect-specific
7355 Expression::DayOfWeek(_)
7356 if matches!(
7357 target,
7358 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
7359 ) =>
7360 {
7361 Action::DayOfWeekConvert
7362 }
7363 // CURRENT_USER -> CURRENT_USER() for Snowflake
7364 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
7365 Action::CurrentUserParens
7366 }
7367 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
7368 Expression::ElementAt(_)
7369 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
7370 {
7371 Action::ElementAtConvert
7372 }
7373 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
7374 Expression::ArrayFunc(ref arr)
7375 if !arr.bracket_notation
7376 && matches!(
7377 target,
7378 DialectType::Spark
7379 | DialectType::Databricks
7380 | DialectType::Hive
7381 | DialectType::BigQuery
7382 | DialectType::DuckDB
7383 | DialectType::Snowflake
7384 | DialectType::Presto
7385 | DialectType::Trino
7386 | DialectType::Athena
7387 | DialectType::ClickHouse
7388 | DialectType::StarRocks
7389 ) =>
7390 {
7391 Action::ArraySyntaxConvert
7392 }
7393 // VARIANCE expression -> varSamp for ClickHouse
7394 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
7395 Action::VarianceToClickHouse
7396 }
7397 // STDDEV expression -> stddevSamp for ClickHouse
7398 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
7399 Action::StddevToClickHouse
7400 }
7401 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
7402 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
7403 Action::ApproxQuantileConvert
7404 }
7405 // MonthsBetween -> target-specific
7406 Expression::MonthsBetween(_)
7407 if !matches!(
7408 target,
7409 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7410 ) =>
7411 {
7412 Action::MonthsBetweenConvert
7413 }
7414 // AddMonths -> target-specific DATEADD/DATE_ADD
7415 Expression::AddMonths(_) => Action::AddMonthsConvert,
7416 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
7417 Expression::MapFromArrays(_)
7418 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
7419 {
7420 Action::MapFromArraysConvert
7421 }
7422 // CURRENT_USER -> CURRENT_USER() for Spark
7423 Expression::CurrentUser(_)
7424 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
7425 {
7426 Action::CurrentUserSparkParens
7427 }
7428 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
7429 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
7430 if matches!(
7431 source,
7432 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7433 ) && matches!(&f.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
7434 && matches!(
7435 target,
7436 DialectType::DuckDB
7437 | DialectType::Presto
7438 | DialectType::Trino
7439 | DialectType::Athena
7440 | DialectType::PostgreSQL
7441 | DialectType::Redshift
7442 ) =>
7443 {
7444 Action::SparkDateFuncCast
7445 }
7446 // $parameter -> @parameter for BigQuery
7447 Expression::Parameter(ref p)
7448 if matches!(target, DialectType::BigQuery)
7449 && matches!(source, DialectType::DuckDB)
7450 && (p.style == crate::expressions::ParameterStyle::Dollar
7451 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
7452 {
7453 Action::DollarParamConvert
7454 }
7455 // EscapeString literal: normalize literal newlines to \n
7456 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::EscapeString(ref s) if s.contains('\n') || s.contains('\r') || s.contains('\t'))
7457 =>
7458 {
7459 Action::EscapeStringNormalize
7460 }
7461 // straight_join: keep lowercase for DuckDB, quote for MySQL
7462 Expression::Column(ref col)
7463 if col.name.name == "STRAIGHT_JOIN"
7464 && col.table.is_none()
7465 && matches!(source, DialectType::DuckDB)
7466 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
7467 {
7468 Action::StraightJoinCase
7469 }
7470 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
7471 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
7472 Expression::Interval(ref iv)
7473 if matches!(
7474 target,
7475 DialectType::Snowflake
7476 | DialectType::PostgreSQL
7477 | DialectType::Redshift
7478 ) && iv.unit.is_some()
7479 && iv.this.as_ref().map_or(false, |t| matches!(t, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) =>
7480 {
7481 Action::SnowflakeIntervalFormat
7482 }
7483 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
7484 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
7485 if let Some(ref sample) = ts.sample {
7486 if !sample.explicit_method {
7487 Action::TablesampleReservoir
7488 } else {
7489 Action::None
7490 }
7491 } else {
7492 Action::None
7493 }
7494 }
7495 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
7496 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
7497 Expression::TableSample(ref ts)
7498 if matches!(target, DialectType::Snowflake)
7499 && !matches!(source, DialectType::Snowflake)
7500 && ts.sample.is_some() =>
7501 {
7502 if let Some(ref sample) = ts.sample {
7503 if !sample.explicit_method {
7504 Action::TablesampleSnowflakeStrip
7505 } else {
7506 Action::None
7507 }
7508 } else {
7509 Action::None
7510 }
7511 }
7512 Expression::Table(ref t)
7513 if matches!(target, DialectType::Snowflake)
7514 && !matches!(source, DialectType::Snowflake)
7515 && t.table_sample.is_some() =>
7516 {
7517 if let Some(ref sample) = t.table_sample {
7518 if !sample.explicit_method {
7519 Action::TablesampleSnowflakeStrip
7520 } else {
7521 Action::None
7522 }
7523 } else {
7524 Action::None
7525 }
7526 }
7527 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
7528 Expression::AlterTable(ref at)
7529 if matches!(target, DialectType::TSQL | DialectType::Fabric)
7530 && !at.actions.is_empty()
7531 && matches!(
7532 at.actions.first(),
7533 Some(crate::expressions::AlterTableAction::RenameTable(_))
7534 ) =>
7535 {
7536 Action::AlterTableToSpRename
7537 }
7538 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
7539 Expression::Subscript(ref sub)
7540 if matches!(
7541 target,
7542 DialectType::BigQuery
7543 | DialectType::Hive
7544 | DialectType::Spark
7545 | DialectType::Databricks
7546 ) && matches!(
7547 source,
7548 DialectType::DuckDB
7549 | DialectType::PostgreSQL
7550 | DialectType::Presto
7551 | DialectType::Trino
7552 | DialectType::Redshift
7553 | DialectType::ClickHouse
7554 ) && matches!(&sub.index, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(ref n) if n.parse::<i64>().unwrap_or(0) > 0)) =>
7555 {
7556 Action::ArrayIndexConvert
7557 }
7558 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
7559 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
7560 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
7561 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
7562 Expression::WindowFunction(ref wf) => {
7563 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
7564 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
7565 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
7566 if matches!(target, DialectType::BigQuery)
7567 && !is_row_number
7568 && !wf.over.order_by.is_empty()
7569 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
7570 {
7571 Action::BigQueryNullsOrdering
7572 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
7573 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
7574 } else {
7575 let source_nulls_last = matches!(source, DialectType::DuckDB);
7576 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
7577 matches!(
7578 f.kind,
7579 crate::expressions::WindowFrameKind::Range
7580 | crate::expressions::WindowFrameKind::Groups
7581 )
7582 });
7583 if source_nulls_last
7584 && matches!(target, DialectType::MySQL)
7585 && !wf.over.order_by.is_empty()
7586 && wf.over.order_by.iter().any(|o| !o.desc)
7587 && !has_range_frame
7588 {
7589 Action::MysqlNullsLastRewrite
7590 } else {
7591 // Check for Snowflake window frame handling for FIRST_VALUE/LAST_VALUE/NTH_VALUE
7592 let is_ranking_window_func = matches!(
7593 &wf.this,
7594 Expression::FirstValue(_)
7595 | Expression::LastValue(_)
7596 | Expression::NthValue(_)
7597 );
7598 let has_full_unbounded_frame = wf.over.frame.as_ref().map_or(false, |f| {
7599 matches!(f.kind, crate::expressions::WindowFrameKind::Rows)
7600 && matches!(f.start, crate::expressions::WindowFrameBound::UnboundedPreceding)
7601 && matches!(f.end, Some(crate::expressions::WindowFrameBound::UnboundedFollowing))
7602 && f.exclude.is_none()
7603 });
7604 if is_ranking_window_func && matches!(source, DialectType::Snowflake) {
7605 if has_full_unbounded_frame && matches!(target, DialectType::Snowflake) {
7606 // Strip the default frame for Snowflake target
7607 Action::SnowflakeWindowFrameStrip
7608 } else if !has_full_unbounded_frame && wf.over.frame.is_none() && !matches!(target, DialectType::Snowflake) {
7609 // Add default frame for non-Snowflake target
7610 Action::SnowflakeWindowFrameAdd
7611 } else {
7612 match &wf.this {
7613 Expression::FirstValue(ref vf)
7614 | Expression::LastValue(ref vf)
7615 if vf.ignore_nulls == Some(false) =>
7616 {
7617 match target {
7618 DialectType::SQLite => Action::RespectNullsConvert,
7619 _ => Action::None,
7620 }
7621 }
7622 _ => Action::None,
7623 }
7624 }
7625 } else {
7626 match &wf.this {
7627 Expression::FirstValue(ref vf)
7628 | Expression::LastValue(ref vf)
7629 if vf.ignore_nulls == Some(false) =>
7630 {
7631 // RESPECT NULLS
7632 match target {
7633 DialectType::SQLite | DialectType::PostgreSQL => {
7634 Action::RespectNullsConvert
7635 }
7636 _ => Action::None,
7637 }
7638 }
7639 _ => Action::None,
7640 }
7641 }
7642 }
7643 }
7644 }
7645 // CREATE TABLE a LIKE b -> dialect-specific transformations
7646 Expression::CreateTable(ref ct)
7647 if ct.columns.is_empty()
7648 && ct.constraints.iter().any(|c| {
7649 matches!(c, crate::expressions::TableConstraint::Like { .. })
7650 })
7651 && matches!(
7652 target,
7653 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
7654 ) =>
7655 {
7656 Action::CreateTableLikeToCtas
7657 }
7658 Expression::CreateTable(ref ct)
7659 if ct.columns.is_empty()
7660 && ct.constraints.iter().any(|c| {
7661 matches!(c, crate::expressions::TableConstraint::Like { .. })
7662 })
7663 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
7664 {
7665 Action::CreateTableLikeToSelectInto
7666 }
7667 Expression::CreateTable(ref ct)
7668 if ct.columns.is_empty()
7669 && ct.constraints.iter().any(|c| {
7670 matches!(c, crate::expressions::TableConstraint::Like { .. })
7671 })
7672 && matches!(target, DialectType::ClickHouse) =>
7673 {
7674 Action::CreateTableLikeToAs
7675 }
7676 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
7677 Expression::CreateTable(ref ct)
7678 if matches!(target, DialectType::DuckDB)
7679 && matches!(
7680 source,
7681 DialectType::DuckDB
7682 | DialectType::Spark
7683 | DialectType::Databricks
7684 | DialectType::Hive
7685 ) =>
7686 {
7687 let has_comment = ct.columns.iter().any(|c| {
7688 c.comment.is_some()
7689 || c.constraints.iter().any(|con| {
7690 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
7691 })
7692 });
7693 let has_props = !ct.properties.is_empty();
7694 if has_comment || has_props {
7695 Action::CreateTableStripComment
7696 } else {
7697 Action::None
7698 }
7699 }
7700 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
7701 Expression::Array(_)
7702 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
7703 {
7704 Action::ArrayConcatBracketConvert
7705 }
7706 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
7707 Expression::ArrayFunc(ref arr)
7708 if arr.bracket_notation
7709 && matches!(source, DialectType::BigQuery)
7710 && matches!(target, DialectType::Redshift) =>
7711 {
7712 Action::ArrayConcatBracketConvert
7713 }
7714 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
7715 Expression::BitwiseOrAgg(ref f)
7716 | Expression::BitwiseAndAgg(ref f)
7717 | Expression::BitwiseXorAgg(ref f) => {
7718 if matches!(target, DialectType::DuckDB) {
7719 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
7720 if let Expression::Cast(ref c) = f.this {
7721 match &c.to {
7722 DataType::Float { .. }
7723 | DataType::Double { .. }
7724 | DataType::Decimal { .. } => Action::BitAggFloatCast,
7725 DataType::Custom { ref name }
7726 if name.eq_ignore_ascii_case("REAL") =>
7727 {
7728 Action::BitAggFloatCast
7729 }
7730 _ => Action::None,
7731 }
7732 } else {
7733 Action::None
7734 }
7735 } else if matches!(target, DialectType::Snowflake) {
7736 Action::BitAggSnowflakeRename
7737 } else {
7738 Action::None
7739 }
7740 }
7741 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
7742 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
7743 Action::FilterToIff
7744 }
7745 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
7746 Expression::Avg(ref f)
7747 | Expression::Sum(ref f)
7748 | Expression::Min(ref f)
7749 | Expression::Max(ref f)
7750 | Expression::CountIf(ref f)
7751 | Expression::Stddev(ref f)
7752 | Expression::StddevPop(ref f)
7753 | Expression::StddevSamp(ref f)
7754 | Expression::Variance(ref f)
7755 | Expression::VarPop(ref f)
7756 | Expression::VarSamp(ref f)
7757 | Expression::Median(ref f)
7758 | Expression::Mode(ref f)
7759 | Expression::First(ref f)
7760 | Expression::Last(ref f)
7761 | Expression::ApproxDistinct(ref f)
7762 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7763 {
7764 Action::AggFilterToIff
7765 }
7766 Expression::Count(ref c)
7767 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7768 {
7769 Action::AggFilterToIff
7770 }
7771 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
7772 Expression::Count(ref c)
7773 if c.distinct
7774 && matches!(&c.this, Some(Expression::Tuple(_)))
7775 && matches!(
7776 target,
7777 DialectType::Presto
7778 | DialectType::Trino
7779 | DialectType::DuckDB
7780 | DialectType::PostgreSQL
7781 ) =>
7782 {
7783 Action::CountDistinctMultiArg
7784 }
7785 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
7786 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
7787 Action::JsonToGetPath
7788 }
7789 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
7790 Expression::Struct(_)
7791 if matches!(
7792 target,
7793 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7794 ) && matches!(source, DialectType::DuckDB) =>
7795 {
7796 Action::StructToRow
7797 }
7798 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
7799 Expression::MapFunc(ref m)
7800 if m.curly_brace_syntax
7801 && matches!(
7802 target,
7803 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7804 )
7805 && matches!(source, DialectType::DuckDB) =>
7806 {
7807 Action::StructToRow
7808 }
7809 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
7810 Expression::ApproxCountDistinct(_)
7811 if matches!(
7812 target,
7813 DialectType::Presto | DialectType::Trino | DialectType::Athena
7814 ) =>
7815 {
7816 Action::ApproxCountDistinctToApproxDistinct
7817 }
7818 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
7819 Expression::ArrayContains(_)
7820 if matches!(
7821 target,
7822 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
7823 ) && !(matches!(source, DialectType::Snowflake) && matches!(target, DialectType::Snowflake)) =>
7824 {
7825 Action::ArrayContainsConvert
7826 }
7827 // ARRAY_CONTAINS -> DuckDB NULL-aware CASE (from Snowflake source with check_null semantics)
7828 Expression::ArrayContains(_)
7829 if matches!(target, DialectType::DuckDB)
7830 && matches!(source, DialectType::Snowflake) =>
7831 {
7832 Action::ArrayContainsDuckDBConvert
7833 }
7834 // ARRAY_EXCEPT -> target-specific conversion
7835 Expression::ArrayExcept(_)
7836 if matches!(
7837 target,
7838 DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena
7839 ) =>
7840 {
7841 Action::ArrayExceptConvert
7842 }
7843 // ARRAY_POSITION -> swap args for Snowflake target (only when source is not Snowflake)
7844 Expression::ArrayPosition(_)
7845 if matches!(target, DialectType::Snowflake)
7846 && !matches!(source, DialectType::Snowflake) =>
7847 {
7848 Action::ArrayPositionSnowflakeSwap
7849 }
7850 // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB from Snowflake source
7851 Expression::ArrayPosition(_)
7852 if matches!(target, DialectType::DuckDB)
7853 && matches!(source, DialectType::Snowflake) =>
7854 {
7855 Action::SnowflakeArrayPositionToDuckDB
7856 }
7857 // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
7858 Expression::ArrayDistinct(_)
7859 if matches!(target, DialectType::ClickHouse) =>
7860 {
7861 Action::ArrayDistinctClickHouse
7862 }
7863 // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
7864 Expression::ArrayDistinct(_)
7865 if matches!(target, DialectType::DuckDB)
7866 && matches!(source, DialectType::Snowflake) =>
7867 {
7868 Action::ArrayDistinctConvert
7869 }
7870 // StrPosition with position -> complex expansion for Presto/DuckDB
7871 // STRPOS doesn't support a position arg in these dialects
7872 Expression::StrPosition(ref sp)
7873 if sp.position.is_some()
7874 && matches!(
7875 target,
7876 DialectType::Presto
7877 | DialectType::Trino
7878 | DialectType::Athena
7879 | DialectType::DuckDB
7880 ) =>
7881 {
7882 Action::StrPositionExpand
7883 }
7884 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
7885 Expression::First(ref f)
7886 if f.ignore_nulls == Some(true)
7887 && matches!(target, DialectType::DuckDB) =>
7888 {
7889 Action::FirstToAnyValue
7890 }
7891 // BEGIN -> START TRANSACTION for Presto/Trino
7892 Expression::Command(ref cmd)
7893 if cmd.this.eq_ignore_ascii_case("BEGIN")
7894 && matches!(
7895 target,
7896 DialectType::Presto | DialectType::Trino | DialectType::Athena
7897 ) =>
7898 {
7899 // Handled inline below
7900 Action::None // We'll handle it directly
7901 }
7902 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
7903 // PostgreSQL # is parsed as BitwiseXor (which is correct).
7904 // a || b (Concat operator) -> CONCAT function for Presto/Trino
7905 Expression::Concat(ref _op)
7906 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7907 && matches!(target, DialectType::Presto | DialectType::Trino) =>
7908 {
7909 Action::PipeConcatToConcat
7910 }
7911 _ => Action::None,
7912 }
7913 };
7914
7915 match action {
7916 Action::None => {
7917 // Handle inline transforms that don't need a dedicated action
7918
7919 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
7920 if let Expression::Between(ref b) = e {
7921 if let Some(sym) = b.symmetric {
7922 let keeps_symmetric =
7923 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
7924 if !keeps_symmetric {
7925 if sym {
7926 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
7927 let b = if let Expression::Between(b) = e {
7928 *b
7929 } else {
7930 unreachable!()
7931 };
7932 let between1 = Expression::Between(Box::new(
7933 crate::expressions::Between {
7934 this: b.this.clone(),
7935 low: b.low.clone(),
7936 high: b.high.clone(),
7937 not: b.not,
7938 symmetric: None,
7939 },
7940 ));
7941 let between2 = Expression::Between(Box::new(
7942 crate::expressions::Between {
7943 this: b.this,
7944 low: b.high,
7945 high: b.low,
7946 not: b.not,
7947 symmetric: None,
7948 },
7949 ));
7950 return Ok(Expression::Paren(Box::new(
7951 crate::expressions::Paren {
7952 this: Expression::Or(Box::new(
7953 crate::expressions::BinaryOp::new(
7954 between1, between2,
7955 ),
7956 )),
7957 trailing_comments: vec![],
7958 },
7959 )));
7960 } else {
7961 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
7962 let b = if let Expression::Between(b) = e {
7963 *b
7964 } else {
7965 unreachable!()
7966 };
7967 return Ok(Expression::Between(Box::new(
7968 crate::expressions::Between {
7969 this: b.this,
7970 low: b.low,
7971 high: b.high,
7972 not: b.not,
7973 symmetric: None,
7974 },
7975 )));
7976 }
7977 }
7978 }
7979 }
7980
7981 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
7982 if let Expression::ILike(ref _like) = e {
7983 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
7984 let like = if let Expression::ILike(l) = e {
7985 *l
7986 } else {
7987 unreachable!()
7988 };
7989 let lower_left = Expression::Function(Box::new(Function::new(
7990 "LOWER".to_string(),
7991 vec![like.left],
7992 )));
7993 let lower_right = Expression::Function(Box::new(Function::new(
7994 "LOWER".to_string(),
7995 vec![like.right],
7996 )));
7997 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
7998 left: lower_left,
7999 right: lower_right,
8000 escape: like.escape,
8001 quantifier: like.quantifier,
8002 inferred_type: None,
8003 })));
8004 }
8005 }
8006
8007 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
8008 if let Expression::MethodCall(ref mc) = e {
8009 if matches!(source, DialectType::Oracle)
8010 && mc.method.name.eq_ignore_ascii_case("VALUE")
8011 && mc.args.is_empty()
8012 {
8013 let is_dbms_random = match &mc.this {
8014 Expression::Identifier(id) => {
8015 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
8016 }
8017 Expression::Column(col) => {
8018 col.table.is_none()
8019 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
8020 }
8021 _ => false,
8022 };
8023 if is_dbms_random {
8024 let func_name = match target {
8025 DialectType::PostgreSQL
8026 | DialectType::Redshift
8027 | DialectType::DuckDB
8028 | DialectType::SQLite => "RANDOM",
8029 DialectType::Oracle => "DBMS_RANDOM.VALUE",
8030 _ => "RAND",
8031 };
8032 return Ok(Expression::Function(Box::new(Function::new(
8033 func_name.to_string(),
8034 vec![],
8035 ))));
8036 }
8037 }
8038 }
8039 // TRIM without explicit position -> add BOTH for ClickHouse
8040 if let Expression::Trim(ref trim) = e {
8041 if matches!(target, DialectType::ClickHouse)
8042 && trim.sql_standard_syntax
8043 && trim.characters.is_some()
8044 && !trim.position_explicit
8045 {
8046 let mut new_trim = (**trim).clone();
8047 new_trim.position_explicit = true;
8048 return Ok(Expression::Trim(Box::new(new_trim)));
8049 }
8050 }
8051 // BEGIN -> START TRANSACTION for Presto/Trino
8052 if let Expression::Transaction(ref txn) = e {
8053 if matches!(
8054 target,
8055 DialectType::Presto | DialectType::Trino | DialectType::Athena
8056 ) {
8057 // Convert BEGIN to START TRANSACTION by setting mark to "START"
8058 let mut txn = txn.clone();
8059 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
8060 "START".to_string(),
8061 ))));
8062 return Ok(Expression::Transaction(Box::new(*txn)));
8063 }
8064 }
8065 // IS TRUE/FALSE -> simplified forms for Presto/Trino
8066 if matches!(
8067 target,
8068 DialectType::Presto | DialectType::Trino | DialectType::Athena
8069 ) {
8070 match &e {
8071 Expression::IsTrue(itf) if !itf.not => {
8072 // x IS TRUE -> x
8073 return Ok(itf.this.clone());
8074 }
8075 Expression::IsTrue(itf) if itf.not => {
8076 // x IS NOT TRUE -> NOT x
8077 return Ok(Expression::Not(Box::new(
8078 crate::expressions::UnaryOp {
8079 this: itf.this.clone(),
8080 inferred_type: None,
8081 },
8082 )));
8083 }
8084 Expression::IsFalse(itf) if !itf.not => {
8085 // x IS FALSE -> NOT x
8086 return Ok(Expression::Not(Box::new(
8087 crate::expressions::UnaryOp {
8088 this: itf.this.clone(),
8089 inferred_type: None,
8090 },
8091 )));
8092 }
8093 Expression::IsFalse(itf) if itf.not => {
8094 // x IS NOT FALSE -> NOT NOT x
8095 let not_x =
8096 Expression::Not(Box::new(crate::expressions::UnaryOp {
8097 this: itf.this.clone(),
8098 inferred_type: None,
8099 }));
8100 return Ok(Expression::Not(Box::new(
8101 crate::expressions::UnaryOp {
8102 this: not_x,
8103 inferred_type: None,
8104 },
8105 )));
8106 }
8107 _ => {}
8108 }
8109 }
8110 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
8111 if matches!(target, DialectType::Redshift) {
8112 if let Expression::IsFalse(ref itf) = e {
8113 if itf.not {
8114 return Ok(Expression::Not(Box::new(
8115 crate::expressions::UnaryOp {
8116 this: Expression::IsFalse(Box::new(
8117 crate::expressions::IsTrueFalse {
8118 this: itf.this.clone(),
8119 not: false,
8120 },
8121 )),
8122 inferred_type: None,
8123 },
8124 )));
8125 }
8126 }
8127 }
8128 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
8129 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
8130 if let Expression::Function(ref f) = e {
8131 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
8132 && matches!(source, DialectType::Snowflake)
8133 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
8134 {
8135 if f.args.len() == 3 {
8136 let mut args = f.args.clone();
8137 args.push(Expression::string("g"));
8138 return Ok(Expression::Function(Box::new(Function::new(
8139 "REGEXP_REPLACE".to_string(),
8140 args,
8141 ))));
8142 } else if f.args.len() == 4 {
8143 // 4th arg might be position, add 'g' as 5th
8144 let mut args = f.args.clone();
8145 args.push(Expression::string("g"));
8146 return Ok(Expression::Function(Box::new(Function::new(
8147 "REGEXP_REPLACE".to_string(),
8148 args,
8149 ))));
8150 }
8151 }
8152 }
8153 Ok(e)
8154 }
8155
8156 Action::GreatestLeastNull => {
8157 let f = if let Expression::Function(f) = e {
8158 *f
8159 } else {
8160 unreachable!("action only triggered for Function expressions")
8161 };
8162 let mut null_checks: Vec<Expression> = f
8163 .args
8164 .iter()
8165 .map(|a| {
8166 Expression::IsNull(Box::new(IsNull {
8167 this: a.clone(),
8168 not: false,
8169 postfix_form: false,
8170 }))
8171 })
8172 .collect();
8173 let condition = if null_checks.len() == 1 {
8174 null_checks.remove(0)
8175 } else {
8176 let first = null_checks.remove(0);
8177 null_checks.into_iter().fold(first, |acc, check| {
8178 Expression::Or(Box::new(BinaryOp::new(acc, check)))
8179 })
8180 };
8181 Ok(Expression::Case(Box::new(Case {
8182 operand: None,
8183 whens: vec![(condition, Expression::Null(Null))],
8184 else_: Some(Expression::Function(Box::new(Function::new(
8185 f.name, f.args,
8186 )))),
8187 comments: Vec::new(),
8188 inferred_type: None,
8189 })))
8190 }
8191
8192 Action::ArrayGenerateRange => {
8193 let f = if let Expression::Function(f) = e {
8194 *f
8195 } else {
8196 unreachable!("action only triggered for Function expressions")
8197 };
8198 let start = f.args[0].clone();
8199 let end = f.args[1].clone();
8200 let step = f.args.get(2).cloned();
8201
8202 // Helper: compute end - 1 for converting exclusive→inclusive end.
8203 // When end is a literal number, simplify to a computed literal.
8204 fn exclusive_to_inclusive_end(end: &Expression) -> Expression {
8205 // Try to simplify literal numbers
8206 match end {
8207 Expression::Literal(lit)
8208 if matches!(lit.as_ref(), Literal::Number(_)) =>
8209 {
8210 let Literal::Number(n) = lit.as_ref() else {
8211 unreachable!()
8212 };
8213 if let Ok(val) = n.parse::<i64>() {
8214 return Expression::number(val - 1);
8215 }
8216 }
8217 Expression::Neg(u) => {
8218 if let Expression::Literal(lit) = &u.this {
8219 if let Literal::Number(n) = lit.as_ref() {
8220 if let Ok(val) = n.parse::<i64>() {
8221 return Expression::number(-val - 1);
8222 }
8223 }
8224 }
8225 }
8226 _ => {}
8227 }
8228 // Non-literal: produce end - 1 expression
8229 Expression::Sub(Box::new(BinaryOp::new(end.clone(), Expression::number(1))))
8230 }
8231
8232 match target {
8233 // Snowflake ARRAY_GENERATE_RANGE and DuckDB RANGE both use exclusive end,
8234 // so no adjustment needed — just rename the function.
8235 DialectType::Snowflake => {
8236 let mut args = vec![start, end];
8237 if let Some(s) = step {
8238 args.push(s);
8239 }
8240 Ok(Expression::Function(Box::new(Function::new(
8241 "ARRAY_GENERATE_RANGE".to_string(),
8242 args,
8243 ))))
8244 }
8245 DialectType::DuckDB => {
8246 let mut args = vec![start, end];
8247 if let Some(s) = step {
8248 args.push(s);
8249 }
8250 Ok(Expression::Function(Box::new(Function::new(
8251 "RANGE".to_string(),
8252 args,
8253 ))))
8254 }
8255 // These dialects use inclusive end, so convert exclusive→inclusive.
8256 // Presto/Trino: simplify literal numbers (3 → 2).
8257 DialectType::Presto | DialectType::Trino => {
8258 let end_inclusive = exclusive_to_inclusive_end(&end);
8259 let mut args = vec![start, end_inclusive];
8260 if let Some(s) = step {
8261 args.push(s);
8262 }
8263 Ok(Expression::Function(Box::new(Function::new(
8264 "SEQUENCE".to_string(),
8265 args,
8266 ))))
8267 }
8268 // PostgreSQL, Redshift, BigQuery: keep as end - 1 expression form.
8269 DialectType::PostgreSQL | DialectType::Redshift => {
8270 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
8271 end.clone(),
8272 Expression::number(1),
8273 )));
8274 let mut args = vec![start, end_minus_1];
8275 if let Some(s) = step {
8276 args.push(s);
8277 }
8278 Ok(Expression::Function(Box::new(Function::new(
8279 "GENERATE_SERIES".to_string(),
8280 args,
8281 ))))
8282 }
8283 DialectType::BigQuery => {
8284 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
8285 end.clone(),
8286 Expression::number(1),
8287 )));
8288 let mut args = vec![start, end_minus_1];
8289 if let Some(s) = step {
8290 args.push(s);
8291 }
8292 Ok(Expression::Function(Box::new(Function::new(
8293 "GENERATE_ARRAY".to_string(),
8294 args,
8295 ))))
8296 }
8297 _ => Ok(Expression::Function(Box::new(Function::new(
8298 f.name, f.args,
8299 )))),
8300 }
8301 }
8302
8303 Action::Div0TypedDivision => {
8304 let if_func = if let Expression::IfFunc(f) = e {
8305 *f
8306 } else {
8307 unreachable!("action only triggered for IfFunc expressions")
8308 };
8309 if let Some(Expression::Div(div)) = if_func.false_value {
8310 let cast_type = if matches!(target, DialectType::SQLite) {
8311 DataType::Float {
8312 precision: None,
8313 scale: None,
8314 real_spelling: true,
8315 }
8316 } else {
8317 DataType::Double {
8318 precision: None,
8319 scale: None,
8320 }
8321 };
8322 let casted_left = Expression::Cast(Box::new(Cast {
8323 this: div.left,
8324 to: cast_type,
8325 trailing_comments: vec![],
8326 double_colon_syntax: false,
8327 format: None,
8328 default: None,
8329 inferred_type: None,
8330 }));
8331 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8332 condition: if_func.condition,
8333 true_value: if_func.true_value,
8334 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
8335 casted_left,
8336 div.right,
8337 )))),
8338 original_name: if_func.original_name,
8339 inferred_type: None,
8340 })))
8341 } else {
8342 // Not actually a Div, reconstruct
8343 Ok(Expression::IfFunc(Box::new(if_func)))
8344 }
8345 }
8346
8347 Action::ArrayAggCollectList => {
8348 let agg = if let Expression::ArrayAgg(a) = e {
8349 *a
8350 } else {
8351 unreachable!("action only triggered for ArrayAgg expressions")
8352 };
8353 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8354 name: Some("COLLECT_LIST".to_string()),
8355 ..agg
8356 })))
8357 }
8358
8359 Action::ArrayAggToGroupConcat => {
8360 let agg = if let Expression::ArrayAgg(a) = e {
8361 *a
8362 } else {
8363 unreachable!("action only triggered for ArrayAgg expressions")
8364 };
8365 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8366 name: Some("GROUP_CONCAT".to_string()),
8367 ..agg
8368 })))
8369 }
8370
8371 Action::ArrayAggWithinGroupFilter => {
8372 let wg = if let Expression::WithinGroup(w) = e {
8373 *w
8374 } else {
8375 unreachable!("action only triggered for WithinGroup expressions")
8376 };
8377 if let Expression::ArrayAgg(inner_agg) = wg.this {
8378 let col = inner_agg.this.clone();
8379 let filter = Expression::IsNull(Box::new(IsNull {
8380 this: col,
8381 not: true,
8382 postfix_form: false,
8383 }));
8384 // For DuckDB, add explicit NULLS FIRST for DESC ordering
8385 let order_by = if matches!(target, DialectType::DuckDB) {
8386 wg.order_by
8387 .into_iter()
8388 .map(|mut o| {
8389 if o.desc && o.nulls_first.is_none() {
8390 o.nulls_first = Some(true);
8391 }
8392 o
8393 })
8394 .collect()
8395 } else {
8396 wg.order_by
8397 };
8398 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8399 this: inner_agg.this,
8400 distinct: inner_agg.distinct,
8401 filter: Some(filter),
8402 order_by,
8403 name: inner_agg.name,
8404 ignore_nulls: inner_agg.ignore_nulls,
8405 having_max: inner_agg.having_max,
8406 limit: inner_agg.limit,
8407 inferred_type: None,
8408 })))
8409 } else {
8410 Ok(Expression::WithinGroup(Box::new(wg)))
8411 }
8412 }
8413
8414 Action::ArrayAggFilter => {
8415 let agg = if let Expression::ArrayAgg(a) = e {
8416 *a
8417 } else {
8418 unreachable!("action only triggered for ArrayAgg expressions")
8419 };
8420 let col = agg.this.clone();
8421 let filter = Expression::IsNull(Box::new(IsNull {
8422 this: col,
8423 not: true,
8424 postfix_form: false,
8425 }));
8426 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8427 filter: Some(filter),
8428 ..agg
8429 })))
8430 }
8431
8432 Action::ArrayAggNullFilter => {
8433 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
8434 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
8435 let agg = if let Expression::ArrayAgg(a) = e {
8436 *a
8437 } else {
8438 unreachable!("action only triggered for ArrayAgg expressions")
8439 };
8440 let col = agg.this.clone();
8441 let not_null = Expression::IsNull(Box::new(IsNull {
8442 this: col,
8443 not: true,
8444 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
8445 }));
8446 let new_filter = if let Some(existing_filter) = agg.filter {
8447 // AND the NOT IS NULL with existing filter
8448 Expression::And(Box::new(crate::expressions::BinaryOp::new(
8449 existing_filter,
8450 not_null,
8451 )))
8452 } else {
8453 not_null
8454 };
8455 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8456 filter: Some(new_filter),
8457 ..agg
8458 })))
8459 }
8460
8461 Action::BigQueryArraySelectAsStructToSnowflake => {
8462 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
8463 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
8464 if let Expression::Function(mut f) = e {
8465 let is_match = f.args.len() == 1
8466 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
8467 if is_match {
8468 let inner_select = match f.args.remove(0) {
8469 Expression::Select(s) => *s,
8470 _ => unreachable!(
8471 "argument already verified to be a Select expression"
8472 ),
8473 };
8474 // Build OBJECT_CONSTRUCT args from SELECT expressions
8475 let mut oc_args = Vec::new();
8476 for expr in &inner_select.expressions {
8477 match expr {
8478 Expression::Alias(a) => {
8479 let key = Expression::Literal(Box::new(Literal::String(
8480 a.alias.name.clone(),
8481 )));
8482 let value = a.this.clone();
8483 oc_args.push(key);
8484 oc_args.push(value);
8485 }
8486 Expression::Column(c) => {
8487 let key = Expression::Literal(Box::new(Literal::String(
8488 c.name.name.clone(),
8489 )));
8490 oc_args.push(key);
8491 oc_args.push(expr.clone());
8492 }
8493 _ => {
8494 oc_args.push(expr.clone());
8495 }
8496 }
8497 }
8498 let object_construct = Expression::Function(Box::new(Function::new(
8499 "OBJECT_CONSTRUCT".to_string(),
8500 oc_args,
8501 )));
8502 let array_agg = Expression::Function(Box::new(Function::new(
8503 "ARRAY_AGG".to_string(),
8504 vec![object_construct],
8505 )));
8506 let mut new_select = crate::expressions::Select::new();
8507 new_select.expressions = vec![array_agg];
8508 new_select.from = inner_select.from.clone();
8509 new_select.where_clause = inner_select.where_clause.clone();
8510 new_select.group_by = inner_select.group_by.clone();
8511 new_select.having = inner_select.having.clone();
8512 new_select.joins = inner_select.joins.clone();
8513 Ok(Expression::Subquery(Box::new(
8514 crate::expressions::Subquery {
8515 this: Expression::Select(Box::new(new_select)),
8516 alias: None,
8517 column_aliases: Vec::new(),
8518 order_by: None,
8519 limit: None,
8520 offset: None,
8521 distribute_by: None,
8522 sort_by: None,
8523 cluster_by: None,
8524 lateral: false,
8525 modifiers_inside: false,
8526 trailing_comments: Vec::new(),
8527 inferred_type: None,
8528 },
8529 )))
8530 } else {
8531 Ok(Expression::Function(f))
8532 }
8533 } else {
8534 Ok(e)
8535 }
8536 }
8537
8538 Action::BigQueryPercentileContToDuckDB => {
8539 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
8540 if let Expression::AggregateFunction(mut af) = e {
8541 af.name = "QUANTILE_CONT".to_string();
8542 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
8543 // Keep only first 2 args
8544 if af.args.len() > 2 {
8545 af.args.truncate(2);
8546 }
8547 Ok(Expression::AggregateFunction(af))
8548 } else {
8549 Ok(e)
8550 }
8551 }
8552
8553 Action::ArrayAggIgnoreNullsDuckDB => {
8554 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
8555 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
8556 let mut agg = if let Expression::ArrayAgg(a) = e {
8557 *a
8558 } else {
8559 unreachable!("action only triggered for ArrayAgg expressions")
8560 };
8561 agg.ignore_nulls = None; // Strip IGNORE NULLS
8562 if !agg.order_by.is_empty() {
8563 agg.order_by[0].nulls_first = Some(true);
8564 }
8565 Ok(Expression::ArrayAgg(Box::new(agg)))
8566 }
8567
8568 Action::CountDistinctMultiArg => {
8569 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
8570 if let Expression::Count(c) = e {
8571 if let Some(Expression::Tuple(t)) = c.this {
8572 let args = t.expressions;
8573 // Build CASE expression:
8574 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
8575 let mut whens = Vec::new();
8576 for arg in &args {
8577 whens.push((
8578 Expression::IsNull(Box::new(IsNull {
8579 this: arg.clone(),
8580 not: false,
8581 postfix_form: false,
8582 })),
8583 Expression::Null(crate::expressions::Null),
8584 ));
8585 }
8586 // Build the tuple for ELSE
8587 let tuple_expr =
8588 Expression::Tuple(Box::new(crate::expressions::Tuple {
8589 expressions: args,
8590 }));
8591 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
8592 operand: None,
8593 whens,
8594 else_: Some(tuple_expr),
8595 comments: Vec::new(),
8596 inferred_type: None,
8597 }));
8598 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
8599 this: Some(case_expr),
8600 star: false,
8601 distinct: true,
8602 filter: c.filter,
8603 ignore_nulls: c.ignore_nulls,
8604 original_name: c.original_name,
8605 inferred_type: None,
8606 })))
8607 } else {
8608 Ok(Expression::Count(c))
8609 }
8610 } else {
8611 Ok(e)
8612 }
8613 }
8614
8615 Action::CastTimestampToDatetime => {
8616 let c = if let Expression::Cast(c) = e {
8617 *c
8618 } else {
8619 unreachable!("action only triggered for Cast expressions")
8620 };
8621 Ok(Expression::Cast(Box::new(Cast {
8622 to: DataType::Custom {
8623 name: "DATETIME".to_string(),
8624 },
8625 ..c
8626 })))
8627 }
8628
8629 Action::CastTimestampStripTz => {
8630 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
8631 let c = if let Expression::Cast(c) = e {
8632 *c
8633 } else {
8634 unreachable!("action only triggered for Cast expressions")
8635 };
8636 Ok(Expression::Cast(Box::new(Cast {
8637 to: DataType::Timestamp {
8638 precision: None,
8639 timezone: false,
8640 },
8641 ..c
8642 })))
8643 }
8644
8645 Action::CastTimestamptzToFunc => {
8646 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
8647 let c = if let Expression::Cast(c) = e {
8648 *c
8649 } else {
8650 unreachable!("action only triggered for Cast expressions")
8651 };
8652 Ok(Expression::Function(Box::new(Function::new(
8653 "TIMESTAMP".to_string(),
8654 vec![c.this],
8655 ))))
8656 }
8657
8658 Action::ToDateToCast => {
8659 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
8660 if let Expression::Function(f) = e {
8661 let arg = f.args.into_iter().next().unwrap();
8662 Ok(Expression::Cast(Box::new(Cast {
8663 this: arg,
8664 to: DataType::Date,
8665 double_colon_syntax: false,
8666 trailing_comments: vec![],
8667 format: None,
8668 default: None,
8669 inferred_type: None,
8670 })))
8671 } else {
8672 Ok(e)
8673 }
8674 }
8675 Action::DateTruncWrapCast => {
8676 // Handle both Expression::DateTrunc/TimestampTrunc and
8677 // Expression::Function("DATE_TRUNC", [unit, expr])
8678 match e {
8679 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
8680 let input_type = match &d.this {
8681 Expression::Cast(c) => Some(c.to.clone()),
8682 _ => None,
8683 };
8684 if let Some(cast_type) = input_type {
8685 let is_time = matches!(cast_type, DataType::Time { .. });
8686 if is_time {
8687 let date_expr = Expression::Cast(Box::new(Cast {
8688 this: Expression::Literal(Box::new(
8689 crate::expressions::Literal::String(
8690 "1970-01-01".to_string(),
8691 ),
8692 )),
8693 to: DataType::Date,
8694 double_colon_syntax: false,
8695 trailing_comments: vec![],
8696 format: None,
8697 default: None,
8698 inferred_type: None,
8699 }));
8700 let add_expr =
8701 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
8702 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
8703 this: add_expr,
8704 unit: d.unit,
8705 }));
8706 Ok(Expression::Cast(Box::new(Cast {
8707 this: inner,
8708 to: cast_type,
8709 double_colon_syntax: false,
8710 trailing_comments: vec![],
8711 format: None,
8712 default: None,
8713 inferred_type: None,
8714 })))
8715 } else {
8716 let inner = Expression::DateTrunc(Box::new(*d));
8717 Ok(Expression::Cast(Box::new(Cast {
8718 this: inner,
8719 to: cast_type,
8720 double_colon_syntax: false,
8721 trailing_comments: vec![],
8722 format: None,
8723 default: None,
8724 inferred_type: None,
8725 })))
8726 }
8727 } else {
8728 Ok(Expression::DateTrunc(d))
8729 }
8730 }
8731 Expression::Function(f) if f.args.len() == 2 => {
8732 // Function-based DATE_TRUNC(unit, expr)
8733 let input_type = match &f.args[1] {
8734 Expression::Cast(c) => Some(c.to.clone()),
8735 _ => None,
8736 };
8737 if let Some(cast_type) = input_type {
8738 let is_time = matches!(cast_type, DataType::Time { .. });
8739 if is_time {
8740 let date_expr = Expression::Cast(Box::new(Cast {
8741 this: Expression::Literal(Box::new(
8742 crate::expressions::Literal::String(
8743 "1970-01-01".to_string(),
8744 ),
8745 )),
8746 to: DataType::Date,
8747 double_colon_syntax: false,
8748 trailing_comments: vec![],
8749 format: None,
8750 default: None,
8751 inferred_type: None,
8752 }));
8753 let mut args = f.args;
8754 let unit_arg = args.remove(0);
8755 let time_expr = args.remove(0);
8756 let add_expr = Expression::Add(Box::new(BinaryOp::new(
8757 date_expr, time_expr,
8758 )));
8759 let inner = Expression::Function(Box::new(Function::new(
8760 "DATE_TRUNC".to_string(),
8761 vec![unit_arg, add_expr],
8762 )));
8763 Ok(Expression::Cast(Box::new(Cast {
8764 this: inner,
8765 to: cast_type,
8766 double_colon_syntax: false,
8767 trailing_comments: vec![],
8768 format: None,
8769 default: None,
8770 inferred_type: None,
8771 })))
8772 } else {
8773 // Wrap the function in CAST
8774 Ok(Expression::Cast(Box::new(Cast {
8775 this: Expression::Function(f),
8776 to: cast_type,
8777 double_colon_syntax: false,
8778 trailing_comments: vec![],
8779 format: None,
8780 default: None,
8781 inferred_type: None,
8782 })))
8783 }
8784 } else {
8785 Ok(Expression::Function(f))
8786 }
8787 }
8788 other => Ok(other),
8789 }
8790 }
8791
8792 Action::RegexpReplaceSnowflakeToDuckDB => {
8793 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
8794 if let Expression::Function(f) = e {
8795 let mut args = f.args;
8796 let subject = args.remove(0);
8797 let pattern = args.remove(0);
8798 let replacement = args.remove(0);
8799 Ok(Expression::Function(Box::new(Function::new(
8800 "REGEXP_REPLACE".to_string(),
8801 vec![
8802 subject,
8803 pattern,
8804 replacement,
8805 Expression::Literal(Box::new(crate::expressions::Literal::String(
8806 "g".to_string(),
8807 ))),
8808 ],
8809 ))))
8810 } else {
8811 Ok(e)
8812 }
8813 }
8814
8815 Action::RegexpReplacePositionSnowflakeToDuckDB => {
8816 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
8817 // pos=1, occ=1 -> REGEXP_REPLACE(s, p, r) (single replace, no 'g')
8818 // pos>1, occ=0 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r, 'g')
8819 // pos>1, occ=1 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r)
8820 // pos=1, occ=0 -> REGEXP_REPLACE(s, p, r, 'g') (replace all)
8821 if let Expression::Function(f) = e {
8822 let mut args = f.args;
8823 let subject = args.remove(0);
8824 let pattern = args.remove(0);
8825 let replacement = args.remove(0);
8826 let position = args.remove(0);
8827 let occurrence = args.remove(0);
8828
8829 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8830 let is_occ_0 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
8831 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8832
8833 if is_pos_1 && is_occ_1 {
8834 // REGEXP_REPLACE(s, p, r) - single replace, no flags
8835 Ok(Expression::Function(Box::new(Function::new(
8836 "REGEXP_REPLACE".to_string(),
8837 vec![subject, pattern, replacement],
8838 ))))
8839 } else if is_pos_1 && is_occ_0 {
8840 // REGEXP_REPLACE(s, p, r, 'g') - global replace
8841 Ok(Expression::Function(Box::new(Function::new(
8842 "REGEXP_REPLACE".to_string(),
8843 vec![
8844 subject,
8845 pattern,
8846 replacement,
8847 Expression::Literal(Box::new(Literal::String("g".to_string()))),
8848 ],
8849 ))))
8850 } else {
8851 // pos>1: SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r[, 'g'])
8852 // Pre-compute pos-1 when position is a numeric literal
8853 let pos_minus_1 = if let Expression::Literal(ref lit) = position {
8854 if let Literal::Number(ref n) = lit.as_ref() {
8855 if let Ok(val) = n.parse::<i64>() {
8856 Expression::number(val - 1)
8857 } else {
8858 Expression::Sub(Box::new(BinaryOp::new(
8859 position.clone(),
8860 Expression::number(1),
8861 )))
8862 }
8863 } else {
8864 position.clone()
8865 }
8866 } else {
8867 Expression::Sub(Box::new(BinaryOp::new(
8868 position.clone(),
8869 Expression::number(1),
8870 )))
8871 };
8872 let prefix = Expression::Function(Box::new(Function::new(
8873 "SUBSTRING".to_string(),
8874 vec![subject.clone(), Expression::number(1), pos_minus_1],
8875 )));
8876 let suffix_subject = Expression::Function(Box::new(Function::new(
8877 "SUBSTRING".to_string(),
8878 vec![subject, position],
8879 )));
8880 let mut replace_args = vec![suffix_subject, pattern, replacement];
8881 if is_occ_0 {
8882 replace_args.push(Expression::Literal(Box::new(Literal::String(
8883 "g".to_string(),
8884 ))));
8885 }
8886 let replace_expr = Expression::Function(Box::new(Function::new(
8887 "REGEXP_REPLACE".to_string(),
8888 replace_args,
8889 )));
8890 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
8891 this: Box::new(prefix),
8892 expression: Box::new(replace_expr),
8893 safe: None,
8894 })))
8895 }
8896 } else {
8897 Ok(e)
8898 }
8899 }
8900
8901 Action::RegexpSubstrSnowflakeToDuckDB => {
8902 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
8903 if let Expression::Function(f) = e {
8904 let mut args = f.args;
8905 let arg_count = args.len();
8906 match arg_count {
8907 // REGEXP_SUBSTR(s, p) -> REGEXP_EXTRACT(s, p)
8908 0..=2 => Ok(Expression::Function(Box::new(Function::new(
8909 "REGEXP_EXTRACT".to_string(),
8910 args,
8911 )))),
8912 // REGEXP_SUBSTR(s, p, pos) -> REGEXP_EXTRACT(NULLIF(SUBSTRING(s, pos), ''), p)
8913 3 => {
8914 let subject = args.remove(0);
8915 let pattern = args.remove(0);
8916 let position = args.remove(0);
8917 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8918 if is_pos_1 {
8919 Ok(Expression::Function(Box::new(Function::new(
8920 "REGEXP_EXTRACT".to_string(),
8921 vec![subject, pattern],
8922 ))))
8923 } else {
8924 let substring_expr =
8925 Expression::Function(Box::new(Function::new(
8926 "SUBSTRING".to_string(),
8927 vec![subject, position],
8928 )));
8929 let nullif_expr =
8930 Expression::Function(Box::new(Function::new(
8931 "NULLIF".to_string(),
8932 vec![
8933 substring_expr,
8934 Expression::Literal(Box::new(Literal::String(
8935 String::new(),
8936 ))),
8937 ],
8938 )));
8939 Ok(Expression::Function(Box::new(Function::new(
8940 "REGEXP_EXTRACT".to_string(),
8941 vec![nullif_expr, pattern],
8942 ))))
8943 }
8944 }
8945 // REGEXP_SUBSTR(s, p, pos, occ) -> depends on pos and occ
8946 4 => {
8947 let subject = args.remove(0);
8948 let pattern = args.remove(0);
8949 let position = args.remove(0);
8950 let occurrence = args.remove(0);
8951 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8952 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8953
8954 let effective_subject = if is_pos_1 {
8955 subject
8956 } else {
8957 let substring_expr =
8958 Expression::Function(Box::new(Function::new(
8959 "SUBSTRING".to_string(),
8960 vec![subject, position],
8961 )));
8962 Expression::Function(Box::new(Function::new(
8963 "NULLIF".to_string(),
8964 vec![
8965 substring_expr,
8966 Expression::Literal(Box::new(Literal::String(
8967 String::new(),
8968 ))),
8969 ],
8970 )))
8971 };
8972
8973 if is_occ_1 {
8974 Ok(Expression::Function(Box::new(Function::new(
8975 "REGEXP_EXTRACT".to_string(),
8976 vec![effective_subject, pattern],
8977 ))))
8978 } else {
8979 // ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, p), occ)
8980 let extract_all =
8981 Expression::Function(Box::new(Function::new(
8982 "REGEXP_EXTRACT_ALL".to_string(),
8983 vec![effective_subject, pattern],
8984 )));
8985 Ok(Expression::Function(Box::new(Function::new(
8986 "ARRAY_EXTRACT".to_string(),
8987 vec![extract_all, occurrence],
8988 ))))
8989 }
8990 }
8991 // REGEXP_SUBSTR(s, p, 1, 1, 'e') -> REGEXP_EXTRACT(s, p)
8992 5 => {
8993 let subject = args.remove(0);
8994 let pattern = args.remove(0);
8995 let _position = args.remove(0);
8996 let _occurrence = args.remove(0);
8997 let _flags = args.remove(0);
8998 // Strip 'e' flag, convert to REGEXP_EXTRACT
8999 Ok(Expression::Function(Box::new(Function::new(
9000 "REGEXP_EXTRACT".to_string(),
9001 vec![subject, pattern],
9002 ))))
9003 }
9004 // REGEXP_SUBSTR(s, p, 1, 1, 'e', group) -> REGEXP_EXTRACT(s, p[, group])
9005 _ => {
9006 let subject = args.remove(0);
9007 let pattern = args.remove(0);
9008 let _position = args.remove(0);
9009 let _occurrence = args.remove(0);
9010 let _flags = args.remove(0);
9011 let group = args.remove(0);
9012 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
9013 if is_group_0 {
9014 // Strip group=0 (default)
9015 Ok(Expression::Function(Box::new(Function::new(
9016 "REGEXP_EXTRACT".to_string(),
9017 vec![subject, pattern],
9018 ))))
9019 } else {
9020 Ok(Expression::Function(Box::new(Function::new(
9021 "REGEXP_EXTRACT".to_string(),
9022 vec![subject, pattern, group],
9023 ))))
9024 }
9025 }
9026 }
9027 } else {
9028 Ok(e)
9029 }
9030 }
9031
9032 Action::RegexpSubstrSnowflakeIdentity => {
9033 // Snowflake→Snowflake: REGEXP_SUBSTR/REGEXP_SUBSTR_ALL with 6 args
9034 // Strip trailing group=0
9035 if let Expression::Function(f) = e {
9036 let func_name = f.name.clone();
9037 let mut args = f.args;
9038 if args.len() == 6 {
9039 let is_group_0 = matches!(&args[5], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
9040 if is_group_0 {
9041 args.truncate(5);
9042 }
9043 }
9044 Ok(Expression::Function(Box::new(Function::new(
9045 func_name, args,
9046 ))))
9047 } else {
9048 Ok(e)
9049 }
9050 }
9051
9052 Action::RegexpSubstrAllSnowflakeToDuckDB => {
9053 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
9054 if let Expression::Function(f) = e {
9055 let mut args = f.args;
9056 let arg_count = args.len();
9057 match arg_count {
9058 // REGEXP_SUBSTR_ALL(s, p) -> REGEXP_EXTRACT_ALL(s, p)
9059 0..=2 => Ok(Expression::Function(Box::new(Function::new(
9060 "REGEXP_EXTRACT_ALL".to_string(),
9061 args,
9062 )))),
9063 // REGEXP_SUBSTR_ALL(s, p, pos) -> REGEXP_EXTRACT_ALL(SUBSTRING(s, pos), p)
9064 3 => {
9065 let subject = args.remove(0);
9066 let pattern = args.remove(0);
9067 let position = args.remove(0);
9068 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
9069 if is_pos_1 {
9070 Ok(Expression::Function(Box::new(Function::new(
9071 "REGEXP_EXTRACT_ALL".to_string(),
9072 vec![subject, pattern],
9073 ))))
9074 } else {
9075 let substring_expr =
9076 Expression::Function(Box::new(Function::new(
9077 "SUBSTRING".to_string(),
9078 vec![subject, position],
9079 )));
9080 Ok(Expression::Function(Box::new(Function::new(
9081 "REGEXP_EXTRACT_ALL".to_string(),
9082 vec![substring_expr, pattern],
9083 ))))
9084 }
9085 }
9086 // REGEXP_SUBSTR_ALL(s, p, 1, occ) -> REGEXP_EXTRACT_ALL(s, p)[occ:]
9087 4 => {
9088 let subject = args.remove(0);
9089 let pattern = args.remove(0);
9090 let position = args.remove(0);
9091 let occurrence = args.remove(0);
9092 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
9093 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
9094
9095 let effective_subject = if is_pos_1 {
9096 subject
9097 } else {
9098 Expression::Function(Box::new(Function::new(
9099 "SUBSTRING".to_string(),
9100 vec![subject, position],
9101 )))
9102 };
9103
9104 if is_occ_1 {
9105 Ok(Expression::Function(Box::new(Function::new(
9106 "REGEXP_EXTRACT_ALL".to_string(),
9107 vec![effective_subject, pattern],
9108 ))))
9109 } else {
9110 // REGEXP_EXTRACT_ALL(s, p)[occ:]
9111 let extract_all =
9112 Expression::Function(Box::new(Function::new(
9113 "REGEXP_EXTRACT_ALL".to_string(),
9114 vec![effective_subject, pattern],
9115 )));
9116 Ok(Expression::ArraySlice(Box::new(
9117 crate::expressions::ArraySlice {
9118 this: extract_all,
9119 start: Some(occurrence),
9120 end: None,
9121 },
9122 )))
9123 }
9124 }
9125 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e') -> REGEXP_EXTRACT_ALL(s, p)
9126 5 => {
9127 let subject = args.remove(0);
9128 let pattern = args.remove(0);
9129 let _position = args.remove(0);
9130 let _occurrence = args.remove(0);
9131 let _flags = args.remove(0);
9132 Ok(Expression::Function(Box::new(Function::new(
9133 "REGEXP_EXTRACT_ALL".to_string(),
9134 vec![subject, pattern],
9135 ))))
9136 }
9137 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e', 0) -> REGEXP_EXTRACT_ALL(s, p)
9138 _ => {
9139 let subject = args.remove(0);
9140 let pattern = args.remove(0);
9141 let _position = args.remove(0);
9142 let _occurrence = args.remove(0);
9143 let _flags = args.remove(0);
9144 let group = args.remove(0);
9145 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
9146 if is_group_0 {
9147 Ok(Expression::Function(Box::new(Function::new(
9148 "REGEXP_EXTRACT_ALL".to_string(),
9149 vec![subject, pattern],
9150 ))))
9151 } else {
9152 Ok(Expression::Function(Box::new(Function::new(
9153 "REGEXP_EXTRACT_ALL".to_string(),
9154 vec![subject, pattern, group],
9155 ))))
9156 }
9157 }
9158 }
9159 } else {
9160 Ok(e)
9161 }
9162 }
9163
9164 Action::RegexpCountSnowflakeToDuckDB => {
9165 // Snowflake REGEXP_COUNT(s, p[, pos[, flags]]) ->
9166 // DuckDB: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
9167 if let Expression::Function(f) = e {
9168 let mut args = f.args;
9169 let arg_count = args.len();
9170 let subject = args.remove(0);
9171 let pattern = args.remove(0);
9172
9173 // Handle position arg
9174 let effective_subject = if arg_count >= 3 {
9175 let position = args.remove(0);
9176 Expression::Function(Box::new(Function::new(
9177 "SUBSTRING".to_string(),
9178 vec![subject, position],
9179 )))
9180 } else {
9181 subject
9182 };
9183
9184 // Handle flags arg -> embed as (?flags) prefix in pattern
9185 let effective_pattern = if arg_count >= 4 {
9186 let flags = args.remove(0);
9187 match &flags {
9188 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(f_str) if !f_str.is_empty()) =>
9189 {
9190 let Literal::String(f_str) = lit.as_ref() else {
9191 unreachable!()
9192 };
9193 // Always use concatenation: '(?flags)' || pattern
9194 let prefix = Expression::Literal(Box::new(Literal::String(
9195 format!("(?{})", f_str),
9196 )));
9197 Expression::DPipe(Box::new(crate::expressions::DPipe {
9198 this: Box::new(prefix),
9199 expression: Box::new(pattern.clone()),
9200 safe: None,
9201 }))
9202 }
9203 _ => pattern.clone(),
9204 }
9205 } else {
9206 pattern.clone()
9207 };
9208
9209 // Build: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
9210 let extract_all = Expression::Function(Box::new(Function::new(
9211 "REGEXP_EXTRACT_ALL".to_string(),
9212 vec![effective_subject, effective_pattern.clone()],
9213 )));
9214 let length_expr =
9215 Expression::Length(Box::new(crate::expressions::UnaryFunc {
9216 this: extract_all,
9217 original_name: None,
9218 inferred_type: None,
9219 }));
9220 let condition = Expression::Eq(Box::new(BinaryOp::new(
9221 effective_pattern,
9222 Expression::Literal(Box::new(Literal::String(String::new()))),
9223 )));
9224 Ok(Expression::Case(Box::new(Case {
9225 operand: None,
9226 whens: vec![(condition, Expression::number(0))],
9227 else_: Some(length_expr),
9228 comments: vec![],
9229 inferred_type: None,
9230 })))
9231 } else {
9232 Ok(e)
9233 }
9234 }
9235
9236 Action::RegexpInstrSnowflakeToDuckDB => {
9237 // Snowflake REGEXP_INSTR(s, p[, pos[, occ[, option[, flags[, group]]]]]) ->
9238 // DuckDB: CASE WHEN s IS NULL OR p IS NULL [OR ...] THEN NULL
9239 // WHEN p = '' THEN 0
9240 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
9241 // ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(eff_s, eff_p)[1:occ], x -> LENGTH(x))), 0)
9242 // + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(eff_s, eff_p)[1:occ - 1], x -> LENGTH(x))), 0)
9243 // + pos_offset
9244 // END
9245 if let Expression::Function(f) = e {
9246 let mut args = f.args;
9247 let subject = args.remove(0);
9248 let pattern = if !args.is_empty() {
9249 args.remove(0)
9250 } else {
9251 Expression::Literal(Box::new(Literal::String(String::new())))
9252 };
9253
9254 // Collect all original args for NULL checks
9255 let position = if !args.is_empty() {
9256 Some(args.remove(0))
9257 } else {
9258 None
9259 };
9260 let occurrence = if !args.is_empty() {
9261 Some(args.remove(0))
9262 } else {
9263 None
9264 };
9265 let option = if !args.is_empty() {
9266 Some(args.remove(0))
9267 } else {
9268 None
9269 };
9270 let flags = if !args.is_empty() {
9271 Some(args.remove(0))
9272 } else {
9273 None
9274 };
9275 let _group = if !args.is_empty() {
9276 Some(args.remove(0))
9277 } else {
9278 None
9279 };
9280
9281 let is_pos_1 = position.as_ref().map_or(true, |p| matches!(p, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1")));
9282 let occurrence_expr = occurrence.clone().unwrap_or(Expression::number(1));
9283
9284 // Build NULL check: subject IS NULL OR pattern IS NULL [OR pos IS NULL ...]
9285 let mut null_checks: Vec<Expression> = vec![
9286 Expression::Is(Box::new(BinaryOp::new(
9287 subject.clone(),
9288 Expression::Null(Null),
9289 ))),
9290 Expression::Is(Box::new(BinaryOp::new(
9291 pattern.clone(),
9292 Expression::Null(Null),
9293 ))),
9294 ];
9295 // Add NULL checks for all provided optional args
9296 for opt_arg in [&position, &occurrence, &option, &flags].iter() {
9297 if let Some(arg) = opt_arg {
9298 null_checks.push(Expression::Is(Box::new(BinaryOp::new(
9299 (*arg).clone(),
9300 Expression::Null(Null),
9301 ))));
9302 }
9303 }
9304 // Chain with OR
9305 let null_condition = null_checks
9306 .into_iter()
9307 .reduce(|a, b| Expression::Or(Box::new(BinaryOp::new(a, b))))
9308 .unwrap();
9309
9310 // Effective subject (apply position offset)
9311 let effective_subject = if is_pos_1 {
9312 subject.clone()
9313 } else {
9314 let pos = position.clone().unwrap_or(Expression::number(1));
9315 Expression::Function(Box::new(Function::new(
9316 "SUBSTRING".to_string(),
9317 vec![subject.clone(), pos],
9318 )))
9319 };
9320
9321 // Effective pattern (apply flags if present)
9322 let effective_pattern = if let Some(ref fl) = flags {
9323 if let Expression::Literal(lit) = fl {
9324 if let Literal::String(f_str) = lit.as_ref() {
9325 if !f_str.is_empty() {
9326 let prefix = Expression::Literal(Box::new(
9327 Literal::String(format!("(?{})", f_str)),
9328 ));
9329 Expression::DPipe(Box::new(crate::expressions::DPipe {
9330 this: Box::new(prefix),
9331 expression: Box::new(pattern.clone()),
9332 safe: None,
9333 }))
9334 } else {
9335 pattern.clone()
9336 }
9337 } else {
9338 fl.clone()
9339 }
9340 } else {
9341 pattern.clone()
9342 }
9343 } else {
9344 pattern.clone()
9345 };
9346
9347 // WHEN pattern = '' THEN 0
9348 let empty_pattern_check = Expression::Eq(Box::new(BinaryOp::new(
9349 effective_pattern.clone(),
9350 Expression::Literal(Box::new(Literal::String(String::new()))),
9351 )));
9352
9353 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
9354 let match_count_check = Expression::Lt(Box::new(BinaryOp::new(
9355 Expression::Length(Box::new(crate::expressions::UnaryFunc {
9356 this: Expression::Function(Box::new(Function::new(
9357 "REGEXP_EXTRACT_ALL".to_string(),
9358 vec![effective_subject.clone(), effective_pattern.clone()],
9359 ))),
9360 original_name: None,
9361 inferred_type: None,
9362 })),
9363 occurrence_expr.clone(),
9364 )));
9365
9366 // Helper: build LENGTH lambda for LIST_TRANSFORM
9367 let make_len_lambda = || {
9368 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
9369 parameters: vec![crate::expressions::Identifier::new("x")],
9370 body: Expression::Length(Box::new(crate::expressions::UnaryFunc {
9371 this: Expression::Identifier(
9372 crate::expressions::Identifier::new("x"),
9373 ),
9374 original_name: None,
9375 inferred_type: None,
9376 })),
9377 colon: false,
9378 parameter_types: vec![],
9379 }))
9380 };
9381
9382 // COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(s, p)[1:occ], x -> LENGTH(x))), 0)
9383 let split_sliced =
9384 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
9385 this: Expression::Function(Box::new(Function::new(
9386 "STRING_SPLIT_REGEX".to_string(),
9387 vec![effective_subject.clone(), effective_pattern.clone()],
9388 ))),
9389 start: Some(Expression::number(1)),
9390 end: Some(occurrence_expr.clone()),
9391 }));
9392 let split_sum = Expression::Function(Box::new(Function::new(
9393 "COALESCE".to_string(),
9394 vec![
9395 Expression::Function(Box::new(Function::new(
9396 "LIST_SUM".to_string(),
9397 vec![Expression::Function(Box::new(Function::new(
9398 "LIST_TRANSFORM".to_string(),
9399 vec![split_sliced, make_len_lambda()],
9400 )))],
9401 ))),
9402 Expression::number(0),
9403 ],
9404 )));
9405
9406 // COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(s, p)[1:occ - 1], x -> LENGTH(x))), 0)
9407 let extract_sliced =
9408 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
9409 this: Expression::Function(Box::new(Function::new(
9410 "REGEXP_EXTRACT_ALL".to_string(),
9411 vec![effective_subject.clone(), effective_pattern.clone()],
9412 ))),
9413 start: Some(Expression::number(1)),
9414 end: Some(Expression::Sub(Box::new(BinaryOp::new(
9415 occurrence_expr.clone(),
9416 Expression::number(1),
9417 )))),
9418 }));
9419 let extract_sum = Expression::Function(Box::new(Function::new(
9420 "COALESCE".to_string(),
9421 vec![
9422 Expression::Function(Box::new(Function::new(
9423 "LIST_SUM".to_string(),
9424 vec![Expression::Function(Box::new(Function::new(
9425 "LIST_TRANSFORM".to_string(),
9426 vec![extract_sliced, make_len_lambda()],
9427 )))],
9428 ))),
9429 Expression::number(0),
9430 ],
9431 )));
9432
9433 // Position offset: pos - 1 when pos > 1, else 0
9434 let pos_offset: Expression = if !is_pos_1 {
9435 let pos = position.clone().unwrap_or(Expression::number(1));
9436 Expression::Sub(Box::new(BinaryOp::new(pos, Expression::number(1))))
9437 } else {
9438 Expression::number(0)
9439 };
9440
9441 // ELSE: 1 + split_sum + extract_sum + pos_offset
9442 let else_expr = Expression::Add(Box::new(BinaryOp::new(
9443 Expression::Add(Box::new(BinaryOp::new(
9444 Expression::Add(Box::new(BinaryOp::new(
9445 Expression::number(1),
9446 split_sum,
9447 ))),
9448 extract_sum,
9449 ))),
9450 pos_offset,
9451 )));
9452
9453 Ok(Expression::Case(Box::new(Case {
9454 operand: None,
9455 whens: vec![
9456 (null_condition, Expression::Null(Null)),
9457 (empty_pattern_check, Expression::number(0)),
9458 (match_count_check, Expression::number(0)),
9459 ],
9460 else_: Some(else_expr),
9461 comments: vec![],
9462 inferred_type: None,
9463 })))
9464 } else {
9465 Ok(e)
9466 }
9467 }
9468
9469 Action::RlikeSnowflakeToDuckDB => {
9470 // Snowflake RLIKE(a, b[, flags]) -> DuckDB REGEXP_FULL_MATCH(a, b[, flags])
9471 // Both do full-string matching, so no anchoring needed
9472 let (subject, pattern, flags) = match e {
9473 Expression::RegexpLike(ref rl) => {
9474 (rl.this.clone(), rl.pattern.clone(), rl.flags.clone())
9475 }
9476 Expression::Function(ref f) if f.args.len() >= 2 => {
9477 let s = f.args[0].clone();
9478 let p = f.args[1].clone();
9479 let fl = f.args.get(2).cloned();
9480 (s, p, fl)
9481 }
9482 _ => return Ok(e),
9483 };
9484
9485 let mut result_args = vec![subject, pattern];
9486 if let Some(fl) = flags {
9487 result_args.push(fl);
9488 }
9489 Ok(Expression::Function(Box::new(Function::new(
9490 "REGEXP_FULL_MATCH".to_string(),
9491 result_args,
9492 ))))
9493 }
9494
9495 Action::RegexpExtractAllToSnowflake => {
9496 // BigQuery REGEXP_EXTRACT_ALL(s, p) -> Snowflake REGEXP_SUBSTR_ALL(s, p)
9497 // With capture group: REGEXP_SUBSTR_ALL(s, p, 1, 1, 'c', 1)
9498 if let Expression::Function(f) = e {
9499 let mut args = f.args;
9500 if args.len() >= 2 {
9501 let str_expr = args.remove(0);
9502 let pattern = args.remove(0);
9503
9504 let has_groups = match &pattern {
9505 Expression::Literal(lit)
9506 if matches!(lit.as_ref(), Literal::String(_)) =>
9507 {
9508 let Literal::String(s) = lit.as_ref() else {
9509 unreachable!()
9510 };
9511 s.contains('(') && s.contains(')')
9512 }
9513 _ => false,
9514 };
9515
9516 if has_groups {
9517 Ok(Expression::Function(Box::new(Function::new(
9518 "REGEXP_SUBSTR_ALL".to_string(),
9519 vec![
9520 str_expr,
9521 pattern,
9522 Expression::number(1),
9523 Expression::number(1),
9524 Expression::Literal(Box::new(Literal::String(
9525 "c".to_string(),
9526 ))),
9527 Expression::number(1),
9528 ],
9529 ))))
9530 } else {
9531 Ok(Expression::Function(Box::new(Function::new(
9532 "REGEXP_SUBSTR_ALL".to_string(),
9533 vec![str_expr, pattern],
9534 ))))
9535 }
9536 } else {
9537 Ok(Expression::Function(Box::new(Function::new(
9538 "REGEXP_SUBSTR_ALL".to_string(),
9539 args,
9540 ))))
9541 }
9542 } else {
9543 Ok(e)
9544 }
9545 }
9546
9547 Action::SetToVariable => {
9548 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
9549 if let Expression::SetStatement(mut s) = e {
9550 for item in &mut s.items {
9551 if item.kind.is_none() {
9552 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
9553 let already_variable = match &item.name {
9554 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
9555 _ => false,
9556 };
9557 if already_variable {
9558 // Extract the actual name and set kind
9559 if let Expression::Identifier(ref mut id) = item.name {
9560 let actual_name = id.name["VARIABLE ".len()..].to_string();
9561 id.name = actual_name;
9562 }
9563 }
9564 item.kind = Some("VARIABLE".to_string());
9565 }
9566 }
9567 Ok(Expression::SetStatement(s))
9568 } else {
9569 Ok(e)
9570 }
9571 }
9572
9573 Action::ConvertTimezoneToExpr => {
9574 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
9575 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
9576 if let Expression::Function(f) = e {
9577 if f.args.len() == 2 {
9578 let mut args = f.args;
9579 let target_tz = args.remove(0);
9580 let timestamp = args.remove(0);
9581 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
9582 source_tz: None,
9583 target_tz: Some(Box::new(target_tz)),
9584 timestamp: Some(Box::new(timestamp)),
9585 options: vec![],
9586 })))
9587 } else if f.args.len() == 3 {
9588 let mut args = f.args;
9589 let source_tz = args.remove(0);
9590 let target_tz = args.remove(0);
9591 let timestamp = args.remove(0);
9592 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
9593 source_tz: Some(Box::new(source_tz)),
9594 target_tz: Some(Box::new(target_tz)),
9595 timestamp: Some(Box::new(timestamp)),
9596 options: vec![],
9597 })))
9598 } else {
9599 Ok(Expression::Function(f))
9600 }
9601 } else {
9602 Ok(e)
9603 }
9604 }
9605
9606 Action::BigQueryCastType => {
9607 // Convert BigQuery types to standard SQL types
9608 if let Expression::DataType(dt) = e {
9609 match dt {
9610 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
9611 Ok(Expression::DataType(DataType::BigInt { length: None }))
9612 }
9613 DataType::Custom { ref name }
9614 if name.eq_ignore_ascii_case("FLOAT64") =>
9615 {
9616 Ok(Expression::DataType(DataType::Double {
9617 precision: None,
9618 scale: None,
9619 }))
9620 }
9621 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
9622 Ok(Expression::DataType(DataType::Boolean))
9623 }
9624 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
9625 Ok(Expression::DataType(DataType::VarBinary { length: None }))
9626 }
9627 DataType::Custom { ref name }
9628 if name.eq_ignore_ascii_case("NUMERIC") =>
9629 {
9630 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
9631 // default precision (18, 3) being added to bare DECIMAL
9632 if matches!(target, DialectType::DuckDB) {
9633 Ok(Expression::DataType(DataType::Custom {
9634 name: "DECIMAL".to_string(),
9635 }))
9636 } else {
9637 Ok(Expression::DataType(DataType::Decimal {
9638 precision: None,
9639 scale: None,
9640 }))
9641 }
9642 }
9643 DataType::Custom { ref name }
9644 if name.eq_ignore_ascii_case("STRING") =>
9645 {
9646 Ok(Expression::DataType(DataType::String { length: None }))
9647 }
9648 DataType::Custom { ref name }
9649 if name.eq_ignore_ascii_case("DATETIME") =>
9650 {
9651 Ok(Expression::DataType(DataType::Timestamp {
9652 precision: None,
9653 timezone: false,
9654 }))
9655 }
9656 _ => Ok(Expression::DataType(dt)),
9657 }
9658 } else {
9659 Ok(e)
9660 }
9661 }
9662
9663 Action::BigQuerySafeDivide => {
9664 // Convert SafeDivide expression to IF/CASE form for most targets
9665 if let Expression::SafeDivide(sd) = e {
9666 let x = *sd.this;
9667 let y = *sd.expression;
9668 // Wrap x and y in parens if they're complex expressions
9669 let y_ref = match &y {
9670 Expression::Column(_)
9671 | Expression::Literal(_)
9672 | Expression::Identifier(_) => y.clone(),
9673 _ => Expression::Paren(Box::new(Paren {
9674 this: y.clone(),
9675 trailing_comments: vec![],
9676 })),
9677 };
9678 let x_ref = match &x {
9679 Expression::Column(_)
9680 | Expression::Literal(_)
9681 | Expression::Identifier(_) => x.clone(),
9682 _ => Expression::Paren(Box::new(Paren {
9683 this: x.clone(),
9684 trailing_comments: vec![],
9685 })),
9686 };
9687 let condition = Expression::Neq(Box::new(BinaryOp::new(
9688 y_ref.clone(),
9689 Expression::number(0),
9690 )));
9691 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
9692
9693 if matches!(target, DialectType::Presto | DialectType::Trino) {
9694 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
9695 let cast_x = Expression::Cast(Box::new(Cast {
9696 this: match &x {
9697 Expression::Column(_)
9698 | Expression::Literal(_)
9699 | Expression::Identifier(_) => x,
9700 _ => Expression::Paren(Box::new(Paren {
9701 this: x,
9702 trailing_comments: vec![],
9703 })),
9704 },
9705 to: DataType::Double {
9706 precision: None,
9707 scale: None,
9708 },
9709 trailing_comments: vec![],
9710 double_colon_syntax: false,
9711 format: None,
9712 default: None,
9713 inferred_type: None,
9714 }));
9715 let cast_div = Expression::Div(Box::new(BinaryOp::new(
9716 cast_x,
9717 match &y {
9718 Expression::Column(_)
9719 | Expression::Literal(_)
9720 | Expression::Identifier(_) => y,
9721 _ => Expression::Paren(Box::new(Paren {
9722 this: y,
9723 trailing_comments: vec![],
9724 })),
9725 },
9726 )));
9727 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9728 condition,
9729 true_value: cast_div,
9730 false_value: Some(Expression::Null(Null)),
9731 original_name: None,
9732 inferred_type: None,
9733 })))
9734 } else if matches!(target, DialectType::PostgreSQL) {
9735 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
9736 let cast_x = Expression::Cast(Box::new(Cast {
9737 this: match &x {
9738 Expression::Column(_)
9739 | Expression::Literal(_)
9740 | Expression::Identifier(_) => x,
9741 _ => Expression::Paren(Box::new(Paren {
9742 this: x,
9743 trailing_comments: vec![],
9744 })),
9745 },
9746 to: DataType::Custom {
9747 name: "DOUBLE PRECISION".to_string(),
9748 },
9749 trailing_comments: vec![],
9750 double_colon_syntax: false,
9751 format: None,
9752 default: None,
9753 inferred_type: None,
9754 }));
9755 let y_paren = match &y {
9756 Expression::Column(_)
9757 | Expression::Literal(_)
9758 | Expression::Identifier(_) => y,
9759 _ => Expression::Paren(Box::new(Paren {
9760 this: y,
9761 trailing_comments: vec![],
9762 })),
9763 };
9764 let cast_div =
9765 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
9766 Ok(Expression::Case(Box::new(Case {
9767 operand: None,
9768 whens: vec![(condition, cast_div)],
9769 else_: Some(Expression::Null(Null)),
9770 comments: Vec::new(),
9771 inferred_type: None,
9772 })))
9773 } else if matches!(target, DialectType::DuckDB) {
9774 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
9775 Ok(Expression::Case(Box::new(Case {
9776 operand: None,
9777 whens: vec![(condition, div_expr)],
9778 else_: Some(Expression::Null(Null)),
9779 comments: Vec::new(),
9780 inferred_type: None,
9781 })))
9782 } else if matches!(target, DialectType::Snowflake) {
9783 // Snowflake: IFF(y <> 0, x / y, NULL)
9784 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9785 condition,
9786 true_value: div_expr,
9787 false_value: Some(Expression::Null(Null)),
9788 original_name: Some("IFF".to_string()),
9789 inferred_type: None,
9790 })))
9791 } else {
9792 // All others: IF(y <> 0, x / y, NULL)
9793 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9794 condition,
9795 true_value: div_expr,
9796 false_value: Some(Expression::Null(Null)),
9797 original_name: None,
9798 inferred_type: None,
9799 })))
9800 }
9801 } else {
9802 Ok(e)
9803 }
9804 }
9805
9806 Action::BigQueryLastDayStripUnit => {
9807 if let Expression::LastDay(mut ld) = e {
9808 ld.unit = None; // Strip the unit (MONTH is default)
9809 match target {
9810 DialectType::PostgreSQL => {
9811 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
9812 let date_trunc = Expression::Function(Box::new(Function::new(
9813 "DATE_TRUNC".to_string(),
9814 vec![
9815 Expression::Literal(Box::new(
9816 crate::expressions::Literal::String(
9817 "MONTH".to_string(),
9818 ),
9819 )),
9820 ld.this.clone(),
9821 ],
9822 )));
9823 let plus_month =
9824 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9825 date_trunc,
9826 Expression::Interval(Box::new(
9827 crate::expressions::Interval {
9828 this: Some(Expression::Literal(Box::new(
9829 crate::expressions::Literal::String(
9830 "1 MONTH".to_string(),
9831 ),
9832 ))),
9833 unit: None,
9834 },
9835 )),
9836 )));
9837 let minus_day =
9838 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
9839 plus_month,
9840 Expression::Interval(Box::new(
9841 crate::expressions::Interval {
9842 this: Some(Expression::Literal(Box::new(
9843 crate::expressions::Literal::String(
9844 "1 DAY".to_string(),
9845 ),
9846 ))),
9847 unit: None,
9848 },
9849 )),
9850 )));
9851 Ok(Expression::Cast(Box::new(Cast {
9852 this: minus_day,
9853 to: DataType::Date,
9854 trailing_comments: vec![],
9855 double_colon_syntax: false,
9856 format: None,
9857 default: None,
9858 inferred_type: None,
9859 })))
9860 }
9861 DialectType::Presto => {
9862 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
9863 Ok(Expression::Function(Box::new(Function::new(
9864 "LAST_DAY_OF_MONTH".to_string(),
9865 vec![ld.this],
9866 ))))
9867 }
9868 DialectType::ClickHouse => {
9869 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
9870 // Need to wrap the DATE type in Nullable
9871 let nullable_date = match ld.this {
9872 Expression::Cast(mut c) => {
9873 c.to = DataType::Nullable {
9874 inner: Box::new(DataType::Date),
9875 };
9876 Expression::Cast(c)
9877 }
9878 other => other,
9879 };
9880 ld.this = nullable_date;
9881 Ok(Expression::LastDay(ld))
9882 }
9883 _ => Ok(Expression::LastDay(ld)),
9884 }
9885 } else {
9886 Ok(e)
9887 }
9888 }
9889
9890 Action::BigQueryCastFormat => {
9891 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
9892 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
9893 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
9894 let (this, to, format_expr, is_safe) = match e {
9895 Expression::Cast(ref c) if c.format.is_some() => (
9896 c.this.clone(),
9897 c.to.clone(),
9898 c.format.as_ref().unwrap().as_ref().clone(),
9899 false,
9900 ),
9901 Expression::SafeCast(ref c) if c.format.is_some() => (
9902 c.this.clone(),
9903 c.to.clone(),
9904 c.format.as_ref().unwrap().as_ref().clone(),
9905 true,
9906 ),
9907 _ => return Ok(e),
9908 };
9909 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
9910 if matches!(target, DialectType::BigQuery) {
9911 match &to {
9912 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
9913 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
9914 return Ok(e);
9915 }
9916 _ => {}
9917 }
9918 }
9919 // Extract timezone from format if AT TIME ZONE is present
9920 let (actual_format_expr, timezone) = match &format_expr {
9921 Expression::AtTimeZone(ref atz) => {
9922 (atz.this.clone(), Some(atz.zone.clone()))
9923 }
9924 _ => (format_expr.clone(), None),
9925 };
9926 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
9927 match target {
9928 DialectType::BigQuery => {
9929 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
9930 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
9931 let func_name = match &to {
9932 DataType::Date => "PARSE_DATE",
9933 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
9934 DataType::Time { .. } => "PARSE_TIMESTAMP",
9935 _ => "PARSE_TIMESTAMP",
9936 };
9937 let mut func_args = vec![strftime_fmt, this];
9938 if let Some(tz) = timezone {
9939 func_args.push(tz);
9940 }
9941 Ok(Expression::Function(Box::new(Function::new(
9942 func_name.to_string(),
9943 func_args,
9944 ))))
9945 }
9946 DialectType::DuckDB => {
9947 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
9948 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
9949 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
9950 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
9951 let parse_call = Expression::Function(Box::new(Function::new(
9952 parse_fn_name.to_string(),
9953 vec![this, duck_fmt],
9954 )));
9955 Ok(Expression::Cast(Box::new(Cast {
9956 this: parse_call,
9957 to,
9958 trailing_comments: vec![],
9959 double_colon_syntax: false,
9960 format: None,
9961 default: None,
9962 inferred_type: None,
9963 })))
9964 }
9965 _ => Ok(e),
9966 }
9967 }
9968
9969 Action::BigQueryFunctionNormalize => {
9970 Self::normalize_bigquery_function(e, source, target)
9971 }
9972
9973 Action::BigQueryToHexBare => {
9974 // Not used anymore - handled directly in normalize_bigquery_function
9975 Ok(e)
9976 }
9977
9978 Action::BigQueryToHexLower => {
9979 if let Expression::Lower(uf) = e {
9980 match uf.this {
9981 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
9982 Expression::Function(f)
9983 if matches!(target, DialectType::BigQuery)
9984 && f.name == "TO_HEX" =>
9985 {
9986 Ok(Expression::Function(f))
9987 }
9988 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
9989 Expression::Lower(inner_uf) => {
9990 if matches!(target, DialectType::BigQuery) {
9991 // BQ->BQ: extract TO_HEX
9992 if let Expression::Function(f) = inner_uf.this {
9993 Ok(Expression::Function(Box::new(Function::new(
9994 "TO_HEX".to_string(),
9995 f.args,
9996 ))))
9997 } else {
9998 Ok(Expression::Lower(inner_uf))
9999 }
10000 } else {
10001 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
10002 Ok(Expression::Lower(inner_uf))
10003 }
10004 }
10005 other => {
10006 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
10007 this: other,
10008 original_name: None,
10009 inferred_type: None,
10010 })))
10011 }
10012 }
10013 } else {
10014 Ok(e)
10015 }
10016 }
10017
10018 Action::BigQueryToHexUpper => {
10019 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
10020 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
10021 if let Expression::Upper(uf) = e {
10022 if let Expression::Lower(inner_uf) = uf.this {
10023 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
10024 if matches!(target, DialectType::BigQuery) {
10025 // Restore TO_HEX name in inner function
10026 if let Expression::Function(f) = inner_uf.this {
10027 let restored = Expression::Function(Box::new(Function::new(
10028 "TO_HEX".to_string(),
10029 f.args,
10030 )));
10031 Ok(Expression::Upper(Box::new(
10032 crate::expressions::UnaryFunc::new(restored),
10033 )))
10034 } else {
10035 Ok(Expression::Upper(inner_uf))
10036 }
10037 } else {
10038 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
10039 Ok(inner_uf.this)
10040 }
10041 } else {
10042 Ok(Expression::Upper(uf))
10043 }
10044 } else {
10045 Ok(e)
10046 }
10047 }
10048
10049 Action::BigQueryAnyValueHaving => {
10050 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
10051 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
10052 if let Expression::AnyValue(agg) = e {
10053 if let Some((having_expr, is_max)) = agg.having_max {
10054 let func_name = if is_max {
10055 "ARG_MAX_NULL"
10056 } else {
10057 "ARG_MIN_NULL"
10058 };
10059 Ok(Expression::Function(Box::new(Function::new(
10060 func_name.to_string(),
10061 vec![agg.this, *having_expr],
10062 ))))
10063 } else {
10064 Ok(Expression::AnyValue(agg))
10065 }
10066 } else {
10067 Ok(e)
10068 }
10069 }
10070
10071 Action::BigQueryApproxQuantiles => {
10072 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
10073 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
10074 if let Expression::AggregateFunction(agg) = e {
10075 if agg.args.len() >= 2 {
10076 let x_expr = agg.args[0].clone();
10077 let n_expr = &agg.args[1];
10078
10079 // Extract the numeric value from n_expr
10080 let n = match n_expr {
10081 Expression::Literal(lit)
10082 if matches!(
10083 lit.as_ref(),
10084 crate::expressions::Literal::Number(_)
10085 ) =>
10086 {
10087 let crate::expressions::Literal::Number(s) = lit.as_ref()
10088 else {
10089 unreachable!()
10090 };
10091 s.parse::<usize>().unwrap_or(2)
10092 }
10093 _ => 2,
10094 };
10095
10096 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
10097 let mut quantiles = Vec::new();
10098 for i in 0..=n {
10099 let q = i as f64 / n as f64;
10100 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
10101 if q == 0.0 {
10102 quantiles.push(Expression::number(0));
10103 } else if q == 1.0 {
10104 quantiles.push(Expression::number(1));
10105 } else {
10106 quantiles.push(Expression::Literal(Box::new(
10107 crate::expressions::Literal::Number(format!("{}", q)),
10108 )));
10109 }
10110 }
10111
10112 let array_expr =
10113 Expression::Array(Box::new(crate::expressions::Array {
10114 expressions: quantiles,
10115 }));
10116
10117 // Preserve DISTINCT modifier
10118 let mut new_func = Function::new(
10119 "APPROX_QUANTILE".to_string(),
10120 vec![x_expr, array_expr],
10121 );
10122 new_func.distinct = agg.distinct;
10123 Ok(Expression::Function(Box::new(new_func)))
10124 } else {
10125 Ok(Expression::AggregateFunction(agg))
10126 }
10127 } else {
10128 Ok(e)
10129 }
10130 }
10131
10132 Action::GenericFunctionNormalize => {
10133 // Helper closure to convert ARBITRARY to target-specific function
10134 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
10135 let name = match target {
10136 DialectType::ClickHouse => "any",
10137 DialectType::TSQL | DialectType::SQLite => "MAX",
10138 DialectType::Hive => "FIRST",
10139 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10140 "ARBITRARY"
10141 }
10142 _ => "ANY_VALUE",
10143 };
10144 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
10145 }
10146
10147 if let Expression::Function(f) = e {
10148 let name = f.name.to_ascii_uppercase();
10149 match name.as_str() {
10150 "ARBITRARY" if f.args.len() == 1 => {
10151 let arg = f.args.into_iter().next().unwrap();
10152 Ok(convert_arbitrary(arg, target))
10153 }
10154 "TO_NUMBER" if f.args.len() == 1 => {
10155 let arg = f.args.into_iter().next().unwrap();
10156 match target {
10157 DialectType::Oracle | DialectType::Snowflake => {
10158 Ok(Expression::Function(Box::new(Function::new(
10159 "TO_NUMBER".to_string(),
10160 vec![arg],
10161 ))))
10162 }
10163 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
10164 this: arg,
10165 to: crate::expressions::DataType::Double {
10166 precision: None,
10167 scale: None,
10168 },
10169 double_colon_syntax: false,
10170 trailing_comments: Vec::new(),
10171 format: None,
10172 default: None,
10173 inferred_type: None,
10174 }))),
10175 }
10176 }
10177 "AGGREGATE" if f.args.len() >= 3 => match target {
10178 DialectType::DuckDB
10179 | DialectType::Hive
10180 | DialectType::Presto
10181 | DialectType::Trino => Ok(Expression::Function(Box::new(
10182 Function::new("REDUCE".to_string(), f.args),
10183 ))),
10184 _ => Ok(Expression::Function(f)),
10185 },
10186 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep as-is for DuckDB
10187 "REGEXP_MATCHES" if f.args.len() >= 2 => {
10188 if matches!(target, DialectType::DuckDB) {
10189 Ok(Expression::Function(f))
10190 } else {
10191 let mut args = f.args;
10192 let this = args.remove(0);
10193 let pattern = args.remove(0);
10194 let flags = if args.is_empty() {
10195 None
10196 } else {
10197 Some(args.remove(0))
10198 };
10199 Ok(Expression::RegexpLike(Box::new(
10200 crate::expressions::RegexpFunc {
10201 this,
10202 pattern,
10203 flags,
10204 },
10205 )))
10206 }
10207 }
10208 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
10209 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
10210 if matches!(target, DialectType::DuckDB) {
10211 Ok(Expression::Function(f))
10212 } else {
10213 let mut args = f.args;
10214 let this = args.remove(0);
10215 let pattern = args.remove(0);
10216 let flags = if args.is_empty() {
10217 None
10218 } else {
10219 Some(args.remove(0))
10220 };
10221 Ok(Expression::RegexpLike(Box::new(
10222 crate::expressions::RegexpFunc {
10223 this,
10224 pattern,
10225 flags,
10226 },
10227 )))
10228 }
10229 }
10230 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
10231 "STRUCT_EXTRACT" if f.args.len() == 2 => {
10232 let mut args = f.args;
10233 let this = args.remove(0);
10234 let field_expr = args.remove(0);
10235 // Extract string literal to get field name
10236 let field_name = match &field_expr {
10237 Expression::Literal(lit)
10238 if matches!(
10239 lit.as_ref(),
10240 crate::expressions::Literal::String(_)
10241 ) =>
10242 {
10243 let crate::expressions::Literal::String(s) = lit.as_ref()
10244 else {
10245 unreachable!()
10246 };
10247 s.clone()
10248 }
10249 Expression::Identifier(id) => id.name.clone(),
10250 _ => {
10251 return Ok(Expression::Function(Box::new(Function::new(
10252 "STRUCT_EXTRACT".to_string(),
10253 vec![this, field_expr],
10254 ))))
10255 }
10256 };
10257 Ok(Expression::StructExtract(Box::new(
10258 crate::expressions::StructExtractFunc {
10259 this,
10260 field: crate::expressions::Identifier::new(field_name),
10261 },
10262 )))
10263 }
10264 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
10265 "LIST_FILTER" if f.args.len() == 2 => {
10266 let name = match target {
10267 DialectType::DuckDB => "LIST_FILTER",
10268 _ => "FILTER",
10269 };
10270 Ok(Expression::Function(Box::new(Function::new(
10271 name.to_string(),
10272 f.args,
10273 ))))
10274 }
10275 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
10276 "LIST_TRANSFORM" if f.args.len() == 2 => {
10277 let name = match target {
10278 DialectType::DuckDB => "LIST_TRANSFORM",
10279 _ => "TRANSFORM",
10280 };
10281 Ok(Expression::Function(Box::new(Function::new(
10282 name.to_string(),
10283 f.args,
10284 ))))
10285 }
10286 // LIST_SORT(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, SORT_ARRAY(x) for others
10287 "LIST_SORT" if f.args.len() >= 1 => {
10288 let name = match target {
10289 DialectType::DuckDB => "LIST_SORT",
10290 DialectType::Presto | DialectType::Trino => "ARRAY_SORT",
10291 _ => "SORT_ARRAY",
10292 };
10293 Ok(Expression::Function(Box::new(Function::new(
10294 name.to_string(),
10295 f.args,
10296 ))))
10297 }
10298 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
10299 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
10300 match target {
10301 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10302 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
10303 ))),
10304 DialectType::Spark
10305 | DialectType::Databricks
10306 | DialectType::Hive => {
10307 let mut args = f.args;
10308 args.push(Expression::Identifier(
10309 crate::expressions::Identifier::new("FALSE"),
10310 ));
10311 Ok(Expression::Function(Box::new(Function::new(
10312 "SORT_ARRAY".to_string(),
10313 args,
10314 ))))
10315 }
10316 DialectType::Presto
10317 | DialectType::Trino
10318 | DialectType::Athena => {
10319 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
10320 let arr = f.args.into_iter().next().unwrap();
10321 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
10322 parameters: vec![
10323 crate::expressions::Identifier::new("a"),
10324 crate::expressions::Identifier::new("b"),
10325 ],
10326 body: Expression::Case(Box::new(Case {
10327 operand: None,
10328 whens: vec![
10329 (
10330 Expression::Lt(Box::new(BinaryOp::new(
10331 Expression::Identifier(crate::expressions::Identifier::new("a")),
10332 Expression::Identifier(crate::expressions::Identifier::new("b")),
10333 ))),
10334 Expression::number(1),
10335 ),
10336 (
10337 Expression::Gt(Box::new(BinaryOp::new(
10338 Expression::Identifier(crate::expressions::Identifier::new("a")),
10339 Expression::Identifier(crate::expressions::Identifier::new("b")),
10340 ))),
10341 Expression::Literal(Box::new(Literal::Number("-1".to_string()))),
10342 ),
10343 ],
10344 else_: Some(Expression::number(0)),
10345 comments: Vec::new(),
10346 inferred_type: None,
10347 })),
10348 colon: false,
10349 parameter_types: Vec::new(),
10350 }));
10351 Ok(Expression::Function(Box::new(Function::new(
10352 "ARRAY_SORT".to_string(),
10353 vec![arr, lambda],
10354 ))))
10355 }
10356 _ => Ok(Expression::Function(Box::new(Function::new(
10357 "LIST_REVERSE_SORT".to_string(),
10358 f.args,
10359 )))),
10360 }
10361 }
10362 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
10363 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
10364 let mut args = f.args;
10365 args.push(Expression::string(","));
10366 let name = match target {
10367 DialectType::DuckDB => "STR_SPLIT",
10368 DialectType::Presto | DialectType::Trino => "SPLIT",
10369 DialectType::Spark
10370 | DialectType::Databricks
10371 | DialectType::Hive => "SPLIT",
10372 DialectType::PostgreSQL => "STRING_TO_ARRAY",
10373 DialectType::Redshift => "SPLIT_TO_ARRAY",
10374 _ => "SPLIT",
10375 };
10376 Ok(Expression::Function(Box::new(Function::new(
10377 name.to_string(),
10378 args,
10379 ))))
10380 }
10381 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
10382 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
10383 let name = match target {
10384 DialectType::DuckDB => "STR_SPLIT",
10385 DialectType::Presto | DialectType::Trino => "SPLIT",
10386 DialectType::Spark
10387 | DialectType::Databricks
10388 | DialectType::Hive => "SPLIT",
10389 DialectType::PostgreSQL => "STRING_TO_ARRAY",
10390 DialectType::Redshift => "SPLIT_TO_ARRAY",
10391 _ => "SPLIT",
10392 };
10393 Ok(Expression::Function(Box::new(Function::new(
10394 name.to_string(),
10395 f.args,
10396 ))))
10397 }
10398 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
10399 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
10400 let name = match target {
10401 DialectType::DuckDB => "STR_SPLIT",
10402 DialectType::Presto | DialectType::Trino => "SPLIT",
10403 DialectType::Spark
10404 | DialectType::Databricks
10405 | DialectType::Hive => "SPLIT",
10406 DialectType::Doris | DialectType::StarRocks => {
10407 "SPLIT_BY_STRING"
10408 }
10409 DialectType::PostgreSQL | DialectType::Redshift => {
10410 "STRING_TO_ARRAY"
10411 }
10412 _ => "SPLIT",
10413 };
10414 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
10415 if matches!(
10416 target,
10417 DialectType::Spark
10418 | DialectType::Databricks
10419 | DialectType::Hive
10420 ) {
10421 let mut args = f.args;
10422 let x = args.remove(0);
10423 let sep = args.remove(0);
10424 // Wrap separator in CONCAT('\\Q', sep, '\\E')
10425 let escaped_sep =
10426 Expression::Function(Box::new(Function::new(
10427 "CONCAT".to_string(),
10428 vec![
10429 Expression::string("\\Q"),
10430 sep,
10431 Expression::string("\\E"),
10432 ],
10433 )));
10434 Ok(Expression::Function(Box::new(Function::new(
10435 name.to_string(),
10436 vec![x, escaped_sep],
10437 ))))
10438 } else {
10439 Ok(Expression::Function(Box::new(Function::new(
10440 name.to_string(),
10441 f.args,
10442 ))))
10443 }
10444 }
10445 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
10446 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
10447 let name = match target {
10448 DialectType::DuckDB => "STR_SPLIT_REGEX",
10449 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
10450 DialectType::Spark
10451 | DialectType::Databricks
10452 | DialectType::Hive => "SPLIT",
10453 _ => "REGEXP_SPLIT",
10454 };
10455 Ok(Expression::Function(Box::new(Function::new(
10456 name.to_string(),
10457 f.args,
10458 ))))
10459 }
10460 // SPLIT(str, delim) from Snowflake -> DuckDB with CASE wrapper
10461 "SPLIT"
10462 if f.args.len() == 2
10463 && matches!(source, DialectType::Snowflake)
10464 && matches!(target, DialectType::DuckDB) =>
10465 {
10466 let mut args = f.args;
10467 let str_arg = args.remove(0);
10468 let delim_arg = args.remove(0);
10469
10470 // STR_SPLIT(str, delim) as the base
10471 let base_func = Expression::Function(Box::new(Function::new(
10472 "STR_SPLIT".to_string(),
10473 vec![str_arg.clone(), delim_arg.clone()],
10474 )));
10475
10476 // [str] - array with single element
10477 let array_with_input =
10478 Expression::Array(Box::new(crate::expressions::Array {
10479 expressions: vec![str_arg],
10480 }));
10481
10482 // CASE
10483 // WHEN delim IS NULL THEN NULL
10484 // WHEN delim = '' THEN [str]
10485 // ELSE STR_SPLIT(str, delim)
10486 // END
10487 Ok(Expression::Case(Box::new(Case {
10488 operand: None,
10489 whens: vec![
10490 (
10491 Expression::Is(Box::new(BinaryOp {
10492 left: delim_arg.clone(),
10493 right: Expression::Null(Null),
10494 left_comments: vec![],
10495 operator_comments: vec![],
10496 trailing_comments: vec![],
10497 inferred_type: None,
10498 })),
10499 Expression::Null(Null),
10500 ),
10501 (
10502 Expression::Eq(Box::new(BinaryOp {
10503 left: delim_arg,
10504 right: Expression::string(""),
10505 left_comments: vec![],
10506 operator_comments: vec![],
10507 trailing_comments: vec![],
10508 inferred_type: None,
10509 })),
10510 array_with_input,
10511 ),
10512 ],
10513 else_: Some(base_func),
10514 comments: vec![],
10515 inferred_type: None,
10516 })))
10517 }
10518 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
10519 "SPLIT"
10520 if f.args.len() == 2
10521 && matches!(
10522 source,
10523 DialectType::Presto
10524 | DialectType::Trino
10525 | DialectType::Athena
10526 | DialectType::StarRocks
10527 | DialectType::Doris
10528 )
10529 && matches!(
10530 target,
10531 DialectType::Spark
10532 | DialectType::Databricks
10533 | DialectType::Hive
10534 ) =>
10535 {
10536 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
10537 let mut args = f.args;
10538 let x = args.remove(0);
10539 let sep = args.remove(0);
10540 let escaped_sep = Expression::Function(Box::new(Function::new(
10541 "CONCAT".to_string(),
10542 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
10543 )));
10544 Ok(Expression::Function(Box::new(Function::new(
10545 "SPLIT".to_string(),
10546 vec![x, escaped_sep],
10547 ))))
10548 }
10549 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
10550 // For ClickHouse target, preserve original name to maintain camelCase
10551 "SUBSTRINGINDEX" => {
10552 let name = if matches!(target, DialectType::ClickHouse) {
10553 f.name.clone()
10554 } else {
10555 "SUBSTRING_INDEX".to_string()
10556 };
10557 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
10558 }
10559 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
10560 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
10561 // DuckDB source CARDINALITY -> DuckDB target: keep as CARDINALITY (used for maps)
10562 if name == "CARDINALITY"
10563 && matches!(source, DialectType::DuckDB)
10564 && matches!(target, DialectType::DuckDB)
10565 {
10566 return Ok(Expression::Function(f));
10567 }
10568 // Get the array argument (first arg, drop dimension args)
10569 let mut args = f.args;
10570 let arr = if args.is_empty() {
10571 return Ok(Expression::Function(Box::new(Function::new(
10572 name.to_string(),
10573 args,
10574 ))));
10575 } else {
10576 args.remove(0)
10577 };
10578 let name =
10579 match target {
10580 DialectType::Spark
10581 | DialectType::Databricks
10582 | DialectType::Hive => "SIZE",
10583 DialectType::Presto | DialectType::Trino => "CARDINALITY",
10584 DialectType::BigQuery => "ARRAY_LENGTH",
10585 DialectType::DuckDB => {
10586 // DuckDB: use ARRAY_LENGTH with all args
10587 let mut all_args = vec![arr];
10588 all_args.extend(args);
10589 return Ok(Expression::Function(Box::new(
10590 Function::new("ARRAY_LENGTH".to_string(), all_args),
10591 )));
10592 }
10593 DialectType::PostgreSQL | DialectType::Redshift => {
10594 // Keep ARRAY_LENGTH with dimension arg
10595 let mut all_args = vec![arr];
10596 all_args.extend(args);
10597 return Ok(Expression::Function(Box::new(
10598 Function::new("ARRAY_LENGTH".to_string(), all_args),
10599 )));
10600 }
10601 DialectType::ClickHouse => "LENGTH",
10602 _ => "ARRAY_LENGTH",
10603 };
10604 Ok(Expression::Function(Box::new(Function::new(
10605 name.to_string(),
10606 vec![arr],
10607 ))))
10608 }
10609 // TO_VARIANT(x) -> CAST(x AS VARIANT) for DuckDB
10610 "TO_VARIANT" if f.args.len() == 1 => match target {
10611 DialectType::DuckDB => {
10612 let arg = f.args.into_iter().next().unwrap();
10613 Ok(Expression::Cast(Box::new(Cast {
10614 this: arg,
10615 to: DataType::Custom {
10616 name: "VARIANT".to_string(),
10617 },
10618 double_colon_syntax: false,
10619 trailing_comments: Vec::new(),
10620 format: None,
10621 default: None,
10622 inferred_type: None,
10623 })))
10624 }
10625 _ => Ok(Expression::Function(f)),
10626 },
10627 // JSON_GROUP_ARRAY(x) -> JSON_AGG(x) for PostgreSQL
10628 "JSON_GROUP_ARRAY" if f.args.len() == 1 => match target {
10629 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10630 Function::new("JSON_AGG".to_string(), f.args),
10631 ))),
10632 _ => Ok(Expression::Function(f)),
10633 },
10634 // JSON_GROUP_OBJECT(key, value) -> JSON_OBJECT_AGG(key, value) for PostgreSQL
10635 "JSON_GROUP_OBJECT" if f.args.len() == 2 => match target {
10636 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10637 Function::new("JSON_OBJECT_AGG".to_string(), f.args),
10638 ))),
10639 _ => Ok(Expression::Function(f)),
10640 },
10641 // UNICODE(x) -> target-specific codepoint function
10642 "UNICODE" if f.args.len() == 1 => {
10643 match target {
10644 DialectType::SQLite | DialectType::DuckDB => {
10645 Ok(Expression::Function(Box::new(Function::new(
10646 "UNICODE".to_string(),
10647 f.args,
10648 ))))
10649 }
10650 DialectType::Oracle => {
10651 // ASCII(UNISTR(x))
10652 let inner = Expression::Function(Box::new(Function::new(
10653 "UNISTR".to_string(),
10654 f.args,
10655 )));
10656 Ok(Expression::Function(Box::new(Function::new(
10657 "ASCII".to_string(),
10658 vec![inner],
10659 ))))
10660 }
10661 DialectType::MySQL => {
10662 // ORD(CONVERT(x USING utf32))
10663 let arg = f.args.into_iter().next().unwrap();
10664 let convert_expr = Expression::ConvertToCharset(Box::new(
10665 crate::expressions::ConvertToCharset {
10666 this: Box::new(arg),
10667 dest: Some(Box::new(Expression::Identifier(
10668 crate::expressions::Identifier::new("utf32"),
10669 ))),
10670 source: None,
10671 },
10672 ));
10673 Ok(Expression::Function(Box::new(Function::new(
10674 "ORD".to_string(),
10675 vec![convert_expr],
10676 ))))
10677 }
10678 _ => Ok(Expression::Function(Box::new(Function::new(
10679 "ASCII".to_string(),
10680 f.args,
10681 )))),
10682 }
10683 }
10684 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
10685 "XOR" if f.args.len() >= 2 => {
10686 match target {
10687 DialectType::ClickHouse => {
10688 // ClickHouse: keep as xor() function with lowercase name
10689 Ok(Expression::Function(Box::new(Function::new(
10690 "xor".to_string(),
10691 f.args,
10692 ))))
10693 }
10694 DialectType::Presto | DialectType::Trino => {
10695 if f.args.len() == 2 {
10696 Ok(Expression::Function(Box::new(Function::new(
10697 "BITWISE_XOR".to_string(),
10698 f.args,
10699 ))))
10700 } else {
10701 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
10702 let mut args = f.args;
10703 let first = args.remove(0);
10704 let second = args.remove(0);
10705 let mut result =
10706 Expression::Function(Box::new(Function::new(
10707 "BITWISE_XOR".to_string(),
10708 vec![first, second],
10709 )));
10710 for arg in args {
10711 result =
10712 Expression::Function(Box::new(Function::new(
10713 "BITWISE_XOR".to_string(),
10714 vec![result, arg],
10715 )));
10716 }
10717 Ok(result)
10718 }
10719 }
10720 DialectType::MySQL
10721 | DialectType::SingleStore
10722 | DialectType::Doris
10723 | DialectType::StarRocks => {
10724 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
10725 let args = f.args;
10726 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
10727 this: None,
10728 expression: None,
10729 expressions: args,
10730 })))
10731 }
10732 DialectType::PostgreSQL | DialectType::Redshift => {
10733 // PostgreSQL: a # b (hash operator for XOR)
10734 let mut args = f.args;
10735 let first = args.remove(0);
10736 let second = args.remove(0);
10737 let mut result = Expression::BitwiseXor(Box::new(
10738 BinaryOp::new(first, second),
10739 ));
10740 for arg in args {
10741 result = Expression::BitwiseXor(Box::new(
10742 BinaryOp::new(result, arg),
10743 ));
10744 }
10745 Ok(result)
10746 }
10747 DialectType::DuckDB => {
10748 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
10749 Ok(Expression::Function(Box::new(Function::new(
10750 "XOR".to_string(),
10751 f.args,
10752 ))))
10753 }
10754 DialectType::BigQuery => {
10755 // BigQuery: a ^ b (caret operator for XOR)
10756 let mut args = f.args;
10757 let first = args.remove(0);
10758 let second = args.remove(0);
10759 let mut result = Expression::BitwiseXor(Box::new(
10760 BinaryOp::new(first, second),
10761 ));
10762 for arg in args {
10763 result = Expression::BitwiseXor(Box::new(
10764 BinaryOp::new(result, arg),
10765 ));
10766 }
10767 Ok(result)
10768 }
10769 _ => Ok(Expression::Function(Box::new(Function::new(
10770 "XOR".to_string(),
10771 f.args,
10772 )))),
10773 }
10774 }
10775 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
10776 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
10777 match target {
10778 DialectType::Spark
10779 | DialectType::Databricks
10780 | DialectType::Hive => {
10781 let mut args = f.args;
10782 args.push(Expression::Identifier(
10783 crate::expressions::Identifier::new("FALSE"),
10784 ));
10785 Ok(Expression::Function(Box::new(Function::new(
10786 "SORT_ARRAY".to_string(),
10787 args,
10788 ))))
10789 }
10790 DialectType::Presto
10791 | DialectType::Trino
10792 | DialectType::Athena => {
10793 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
10794 let arr = f.args.into_iter().next().unwrap();
10795 let lambda = Expression::Lambda(Box::new(
10796 crate::expressions::LambdaExpr {
10797 parameters: vec![
10798 Identifier::new("a"),
10799 Identifier::new("b"),
10800 ],
10801 colon: false,
10802 parameter_types: Vec::new(),
10803 body: Expression::Case(Box::new(Case {
10804 operand: None,
10805 whens: vec![
10806 (
10807 Expression::Lt(Box::new(
10808 BinaryOp::new(
10809 Expression::Identifier(
10810 Identifier::new("a"),
10811 ),
10812 Expression::Identifier(
10813 Identifier::new("b"),
10814 ),
10815 ),
10816 )),
10817 Expression::number(1),
10818 ),
10819 (
10820 Expression::Gt(Box::new(
10821 BinaryOp::new(
10822 Expression::Identifier(
10823 Identifier::new("a"),
10824 ),
10825 Expression::Identifier(
10826 Identifier::new("b"),
10827 ),
10828 ),
10829 )),
10830 Expression::Neg(Box::new(
10831 crate::expressions::UnaryOp {
10832 this: Expression::number(1),
10833 inferred_type: None,
10834 },
10835 )),
10836 ),
10837 ],
10838 else_: Some(Expression::number(0)),
10839 comments: Vec::new(),
10840 inferred_type: None,
10841 })),
10842 },
10843 ));
10844 Ok(Expression::Function(Box::new(Function::new(
10845 "ARRAY_SORT".to_string(),
10846 vec![arr, lambda],
10847 ))))
10848 }
10849 _ => Ok(Expression::Function(Box::new(Function::new(
10850 "ARRAY_REVERSE_SORT".to_string(),
10851 f.args,
10852 )))),
10853 }
10854 }
10855 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
10856 "ENCODE" if f.args.len() == 1 => match target {
10857 DialectType::Spark
10858 | DialectType::Databricks
10859 | DialectType::Hive => {
10860 let mut args = f.args;
10861 args.push(Expression::string("utf-8"));
10862 Ok(Expression::Function(Box::new(Function::new(
10863 "ENCODE".to_string(),
10864 args,
10865 ))))
10866 }
10867 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10868 Ok(Expression::Function(Box::new(Function::new(
10869 "TO_UTF8".to_string(),
10870 f.args,
10871 ))))
10872 }
10873 _ => Ok(Expression::Function(Box::new(Function::new(
10874 "ENCODE".to_string(),
10875 f.args,
10876 )))),
10877 },
10878 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
10879 "DECODE" if f.args.len() == 1 => match target {
10880 DialectType::Spark
10881 | DialectType::Databricks
10882 | DialectType::Hive => {
10883 let mut args = f.args;
10884 args.push(Expression::string("utf-8"));
10885 Ok(Expression::Function(Box::new(Function::new(
10886 "DECODE".to_string(),
10887 args,
10888 ))))
10889 }
10890 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10891 Ok(Expression::Function(Box::new(Function::new(
10892 "FROM_UTF8".to_string(),
10893 f.args,
10894 ))))
10895 }
10896 _ => Ok(Expression::Function(Box::new(Function::new(
10897 "DECODE".to_string(),
10898 f.args,
10899 )))),
10900 },
10901 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
10902 "QUANTILE" if f.args.len() == 2 => {
10903 let name = match target {
10904 DialectType::Spark
10905 | DialectType::Databricks
10906 | DialectType::Hive => "PERCENTILE",
10907 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
10908 DialectType::BigQuery => "PERCENTILE_CONT",
10909 _ => "QUANTILE",
10910 };
10911 Ok(Expression::Function(Box::new(Function::new(
10912 name.to_string(),
10913 f.args,
10914 ))))
10915 }
10916 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
10917 "QUANTILE_CONT" if f.args.len() == 2 => {
10918 let mut args = f.args;
10919 let column = args.remove(0);
10920 let quantile = args.remove(0);
10921 match target {
10922 DialectType::DuckDB => {
10923 Ok(Expression::Function(Box::new(Function::new(
10924 "QUANTILE_CONT".to_string(),
10925 vec![column, quantile],
10926 ))))
10927 }
10928 DialectType::PostgreSQL
10929 | DialectType::Redshift
10930 | DialectType::Snowflake => {
10931 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
10932 let inner = Expression::PercentileCont(Box::new(
10933 crate::expressions::PercentileFunc {
10934 this: column.clone(),
10935 percentile: quantile,
10936 order_by: None,
10937 filter: None,
10938 },
10939 ));
10940 Ok(Expression::WithinGroup(Box::new(
10941 crate::expressions::WithinGroup {
10942 this: inner,
10943 order_by: vec![crate::expressions::Ordered {
10944 this: column,
10945 desc: false,
10946 nulls_first: None,
10947 explicit_asc: false,
10948 with_fill: None,
10949 }],
10950 },
10951 )))
10952 }
10953 _ => Ok(Expression::Function(Box::new(Function::new(
10954 "QUANTILE_CONT".to_string(),
10955 vec![column, quantile],
10956 )))),
10957 }
10958 }
10959 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
10960 "QUANTILE_DISC" if f.args.len() == 2 => {
10961 let mut args = f.args;
10962 let column = args.remove(0);
10963 let quantile = args.remove(0);
10964 match target {
10965 DialectType::DuckDB => {
10966 Ok(Expression::Function(Box::new(Function::new(
10967 "QUANTILE_DISC".to_string(),
10968 vec![column, quantile],
10969 ))))
10970 }
10971 DialectType::PostgreSQL
10972 | DialectType::Redshift
10973 | DialectType::Snowflake => {
10974 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
10975 let inner = Expression::PercentileDisc(Box::new(
10976 crate::expressions::PercentileFunc {
10977 this: column.clone(),
10978 percentile: quantile,
10979 order_by: None,
10980 filter: None,
10981 },
10982 ));
10983 Ok(Expression::WithinGroup(Box::new(
10984 crate::expressions::WithinGroup {
10985 this: inner,
10986 order_by: vec![crate::expressions::Ordered {
10987 this: column,
10988 desc: false,
10989 nulls_first: None,
10990 explicit_asc: false,
10991 with_fill: None,
10992 }],
10993 },
10994 )))
10995 }
10996 _ => Ok(Expression::Function(Box::new(Function::new(
10997 "QUANTILE_DISC".to_string(),
10998 vec![column, quantile],
10999 )))),
11000 }
11001 }
11002 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
11003 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
11004 let name = match target {
11005 DialectType::Presto
11006 | DialectType::Trino
11007 | DialectType::Athena => "APPROX_PERCENTILE",
11008 DialectType::Spark
11009 | DialectType::Databricks
11010 | DialectType::Hive => "PERCENTILE_APPROX",
11011 DialectType::DuckDB => "APPROX_QUANTILE",
11012 DialectType::PostgreSQL | DialectType::Redshift => {
11013 "PERCENTILE_CONT"
11014 }
11015 _ => &f.name,
11016 };
11017 Ok(Expression::Function(Box::new(Function::new(
11018 name.to_string(),
11019 f.args,
11020 ))))
11021 }
11022 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
11023 "EPOCH" if f.args.len() == 1 => {
11024 let name = match target {
11025 DialectType::Spark
11026 | DialectType::Databricks
11027 | DialectType::Hive => "UNIX_TIMESTAMP",
11028 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
11029 _ => "EPOCH",
11030 };
11031 Ok(Expression::Function(Box::new(Function::new(
11032 name.to_string(),
11033 f.args,
11034 ))))
11035 }
11036 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
11037 "EPOCH_MS" if f.args.len() == 1 => {
11038 match target {
11039 DialectType::Spark | DialectType::Databricks => {
11040 Ok(Expression::Function(Box::new(Function::new(
11041 "TIMESTAMP_MILLIS".to_string(),
11042 f.args,
11043 ))))
11044 }
11045 DialectType::Hive => {
11046 // Hive: FROM_UNIXTIME(x / 1000)
11047 let arg = f.args.into_iter().next().unwrap();
11048 let div_expr = Expression::Div(Box::new(
11049 crate::expressions::BinaryOp::new(
11050 arg,
11051 Expression::number(1000),
11052 ),
11053 ));
11054 Ok(Expression::Function(Box::new(Function::new(
11055 "FROM_UNIXTIME".to_string(),
11056 vec![div_expr],
11057 ))))
11058 }
11059 DialectType::Presto | DialectType::Trino => {
11060 Ok(Expression::Function(Box::new(Function::new(
11061 "FROM_UNIXTIME".to_string(),
11062 vec![Expression::Div(Box::new(
11063 crate::expressions::BinaryOp::new(
11064 f.args.into_iter().next().unwrap(),
11065 Expression::number(1000),
11066 ),
11067 ))],
11068 ))))
11069 }
11070 _ => Ok(Expression::Function(Box::new(Function::new(
11071 "EPOCH_MS".to_string(),
11072 f.args,
11073 )))),
11074 }
11075 }
11076 // HASHBYTES('algorithm', x) -> target-specific hash function
11077 "HASHBYTES" if f.args.len() == 2 => {
11078 // Keep HASHBYTES as-is for TSQL target
11079 if matches!(target, DialectType::TSQL) {
11080 return Ok(Expression::Function(f));
11081 }
11082 let algo_expr = &f.args[0];
11083 let algo = match algo_expr {
11084 Expression::Literal(lit)
11085 if matches!(
11086 lit.as_ref(),
11087 crate::expressions::Literal::String(_)
11088 ) =>
11089 {
11090 let crate::expressions::Literal::String(s) = lit.as_ref()
11091 else {
11092 unreachable!()
11093 };
11094 s.to_ascii_uppercase()
11095 }
11096 _ => return Ok(Expression::Function(f)),
11097 };
11098 let data_arg = f.args.into_iter().nth(1).unwrap();
11099 match algo.as_str() {
11100 "SHA1" => {
11101 let name = match target {
11102 DialectType::Spark | DialectType::Databricks => "SHA",
11103 DialectType::Hive => "SHA1",
11104 _ => "SHA1",
11105 };
11106 Ok(Expression::Function(Box::new(Function::new(
11107 name.to_string(),
11108 vec![data_arg],
11109 ))))
11110 }
11111 "SHA2_256" => {
11112 Ok(Expression::Function(Box::new(Function::new(
11113 "SHA2".to_string(),
11114 vec![data_arg, Expression::number(256)],
11115 ))))
11116 }
11117 "SHA2_512" => {
11118 Ok(Expression::Function(Box::new(Function::new(
11119 "SHA2".to_string(),
11120 vec![data_arg, Expression::number(512)],
11121 ))))
11122 }
11123 "MD5" => Ok(Expression::Function(Box::new(Function::new(
11124 "MD5".to_string(),
11125 vec![data_arg],
11126 )))),
11127 _ => Ok(Expression::Function(Box::new(Function::new(
11128 "HASHBYTES".to_string(),
11129 vec![Expression::string(&algo), data_arg],
11130 )))),
11131 }
11132 }
11133 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
11134 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
11135 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
11136 let mut args = f.args;
11137 let json_expr = args.remove(0);
11138 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
11139 let mut json_path = "$".to_string();
11140 for a in &args {
11141 match a {
11142 Expression::Literal(lit)
11143 if matches!(
11144 lit.as_ref(),
11145 crate::expressions::Literal::String(_)
11146 ) =>
11147 {
11148 let crate::expressions::Literal::String(s) =
11149 lit.as_ref()
11150 else {
11151 unreachable!()
11152 };
11153 // Numeric string keys become array indices: [0]
11154 if s.chars().all(|c| c.is_ascii_digit()) {
11155 json_path.push('[');
11156 json_path.push_str(s);
11157 json_path.push(']');
11158 } else {
11159 json_path.push('.');
11160 json_path.push_str(s);
11161 }
11162 }
11163 _ => {
11164 json_path.push_str(".?");
11165 }
11166 }
11167 }
11168 match target {
11169 DialectType::Spark
11170 | DialectType::Databricks
11171 | DialectType::Hive => {
11172 Ok(Expression::Function(Box::new(Function::new(
11173 "GET_JSON_OBJECT".to_string(),
11174 vec![json_expr, Expression::string(&json_path)],
11175 ))))
11176 }
11177 DialectType::Presto | DialectType::Trino => {
11178 let func_name = if is_text {
11179 "JSON_EXTRACT_SCALAR"
11180 } else {
11181 "JSON_EXTRACT"
11182 };
11183 Ok(Expression::Function(Box::new(Function::new(
11184 func_name.to_string(),
11185 vec![json_expr, Expression::string(&json_path)],
11186 ))))
11187 }
11188 DialectType::BigQuery | DialectType::MySQL => {
11189 let func_name = if is_text {
11190 "JSON_EXTRACT_SCALAR"
11191 } else {
11192 "JSON_EXTRACT"
11193 };
11194 Ok(Expression::Function(Box::new(Function::new(
11195 func_name.to_string(),
11196 vec![json_expr, Expression::string(&json_path)],
11197 ))))
11198 }
11199 DialectType::PostgreSQL | DialectType::Materialize => {
11200 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
11201 let func_name = if is_text {
11202 "JSON_EXTRACT_PATH_TEXT"
11203 } else {
11204 "JSON_EXTRACT_PATH"
11205 };
11206 let mut new_args = vec![json_expr];
11207 new_args.extend(args);
11208 Ok(Expression::Function(Box::new(Function::new(
11209 func_name.to_string(),
11210 new_args,
11211 ))))
11212 }
11213 DialectType::DuckDB | DialectType::SQLite => {
11214 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
11215 if is_text {
11216 Ok(Expression::JsonExtractScalar(Box::new(
11217 crate::expressions::JsonExtractFunc {
11218 this: json_expr,
11219 path: Expression::string(&json_path),
11220 returning: None,
11221 arrow_syntax: true,
11222 hash_arrow_syntax: false,
11223 wrapper_option: None,
11224 quotes_option: None,
11225 on_scalar_string: false,
11226 on_error: None,
11227 },
11228 )))
11229 } else {
11230 Ok(Expression::JsonExtract(Box::new(
11231 crate::expressions::JsonExtractFunc {
11232 this: json_expr,
11233 path: Expression::string(&json_path),
11234 returning: None,
11235 arrow_syntax: true,
11236 hash_arrow_syntax: false,
11237 wrapper_option: None,
11238 quotes_option: None,
11239 on_scalar_string: false,
11240 on_error: None,
11241 },
11242 )))
11243 }
11244 }
11245 DialectType::Redshift => {
11246 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
11247 let mut new_args = vec![json_expr];
11248 new_args.extend(args);
11249 Ok(Expression::Function(Box::new(Function::new(
11250 "JSON_EXTRACT_PATH_TEXT".to_string(),
11251 new_args,
11252 ))))
11253 }
11254 DialectType::TSQL => {
11255 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
11256 let jq = Expression::Function(Box::new(Function::new(
11257 "JSON_QUERY".to_string(),
11258 vec![json_expr.clone(), Expression::string(&json_path)],
11259 )));
11260 let jv = Expression::Function(Box::new(Function::new(
11261 "JSON_VALUE".to_string(),
11262 vec![json_expr, Expression::string(&json_path)],
11263 )));
11264 Ok(Expression::Function(Box::new(Function::new(
11265 "ISNULL".to_string(),
11266 vec![jq, jv],
11267 ))))
11268 }
11269 DialectType::ClickHouse => {
11270 let func_name = if is_text {
11271 "JSONExtractString"
11272 } else {
11273 "JSONExtractRaw"
11274 };
11275 let mut new_args = vec![json_expr];
11276 new_args.extend(args);
11277 Ok(Expression::Function(Box::new(Function::new(
11278 func_name.to_string(),
11279 new_args,
11280 ))))
11281 }
11282 _ => {
11283 let func_name = if is_text {
11284 "JSON_EXTRACT_SCALAR"
11285 } else {
11286 "JSON_EXTRACT"
11287 };
11288 Ok(Expression::Function(Box::new(Function::new(
11289 func_name.to_string(),
11290 vec![json_expr, Expression::string(&json_path)],
11291 ))))
11292 }
11293 }
11294 }
11295 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
11296 "APPROX_DISTINCT" if f.args.len() >= 1 => {
11297 let name = match target {
11298 DialectType::Spark
11299 | DialectType::Databricks
11300 | DialectType::Hive
11301 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
11302 _ => "APPROX_DISTINCT",
11303 };
11304 let mut args = f.args;
11305 // Hive doesn't support the accuracy parameter
11306 if name == "APPROX_COUNT_DISTINCT"
11307 && matches!(target, DialectType::Hive)
11308 {
11309 args.truncate(1);
11310 }
11311 Ok(Expression::Function(Box::new(Function::new(
11312 name.to_string(),
11313 args,
11314 ))))
11315 }
11316 // REGEXP_EXTRACT(x, pattern) - normalize default group index
11317 "REGEXP_EXTRACT" if f.args.len() == 2 => {
11318 // Determine source default group index
11319 let source_default = match source {
11320 DialectType::Presto
11321 | DialectType::Trino
11322 | DialectType::DuckDB => 0,
11323 _ => 1, // Hive/Spark/Databricks default = 1
11324 };
11325 // Determine target default group index
11326 let target_default = match target {
11327 DialectType::Presto
11328 | DialectType::Trino
11329 | DialectType::DuckDB
11330 | DialectType::BigQuery => 0,
11331 DialectType::Snowflake => {
11332 // Snowflake uses REGEXP_SUBSTR
11333 return Ok(Expression::Function(Box::new(Function::new(
11334 "REGEXP_SUBSTR".to_string(),
11335 f.args,
11336 ))));
11337 }
11338 _ => 1, // Hive/Spark/Databricks default = 1
11339 };
11340 if source_default != target_default {
11341 let mut args = f.args;
11342 args.push(Expression::number(source_default));
11343 Ok(Expression::Function(Box::new(Function::new(
11344 "REGEXP_EXTRACT".to_string(),
11345 args,
11346 ))))
11347 } else {
11348 Ok(Expression::Function(Box::new(Function::new(
11349 "REGEXP_EXTRACT".to_string(),
11350 f.args,
11351 ))))
11352 }
11353 }
11354 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
11355 "RLIKE" if f.args.len() == 2 => {
11356 let mut args = f.args;
11357 let str_expr = args.remove(0);
11358 let pattern = args.remove(0);
11359 match target {
11360 DialectType::DuckDB => {
11361 // REGEXP_MATCHES(str, pattern)
11362 Ok(Expression::Function(Box::new(Function::new(
11363 "REGEXP_MATCHES".to_string(),
11364 vec![str_expr, pattern],
11365 ))))
11366 }
11367 _ => {
11368 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
11369 Ok(Expression::RegexpLike(Box::new(
11370 crate::expressions::RegexpFunc {
11371 this: str_expr,
11372 pattern,
11373 flags: None,
11374 },
11375 )))
11376 }
11377 }
11378 }
11379 // EOMONTH(date[, month_offset]) -> target-specific
11380 "EOMONTH" if f.args.len() >= 1 => {
11381 let mut args = f.args;
11382 let date_arg = args.remove(0);
11383 let month_offset = if !args.is_empty() {
11384 Some(args.remove(0))
11385 } else {
11386 None
11387 };
11388
11389 // Helper: wrap date in CAST to DATE
11390 let cast_to_date = |e: Expression| -> Expression {
11391 Expression::Cast(Box::new(Cast {
11392 this: e,
11393 to: DataType::Date,
11394 trailing_comments: vec![],
11395 double_colon_syntax: false,
11396 format: None,
11397 default: None,
11398 inferred_type: None,
11399 }))
11400 };
11401
11402 match target {
11403 DialectType::TSQL | DialectType::Fabric => {
11404 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
11405 let date = cast_to_date(date_arg);
11406 let date = if let Some(offset) = month_offset {
11407 Expression::Function(Box::new(Function::new(
11408 "DATEADD".to_string(),
11409 vec![
11410 Expression::Identifier(Identifier::new(
11411 "MONTH",
11412 )),
11413 offset,
11414 date,
11415 ],
11416 )))
11417 } else {
11418 date
11419 };
11420 Ok(Expression::Function(Box::new(Function::new(
11421 "EOMONTH".to_string(),
11422 vec![date],
11423 ))))
11424 }
11425 DialectType::Presto
11426 | DialectType::Trino
11427 | DialectType::Athena => {
11428 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
11429 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
11430 let cast_ts = Expression::Cast(Box::new(Cast {
11431 this: date_arg,
11432 to: DataType::Timestamp {
11433 timezone: false,
11434 precision: None,
11435 },
11436 trailing_comments: vec![],
11437 double_colon_syntax: false,
11438 format: None,
11439 default: None,
11440 inferred_type: None,
11441 }));
11442 let date = cast_to_date(cast_ts);
11443 let date = if let Some(offset) = month_offset {
11444 Expression::Function(Box::new(Function::new(
11445 "DATE_ADD".to_string(),
11446 vec![Expression::string("MONTH"), offset, date],
11447 )))
11448 } else {
11449 date
11450 };
11451 Ok(Expression::Function(Box::new(Function::new(
11452 "LAST_DAY_OF_MONTH".to_string(),
11453 vec![date],
11454 ))))
11455 }
11456 DialectType::PostgreSQL => {
11457 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
11458 let date = cast_to_date(date_arg);
11459 let date = if let Some(offset) = month_offset {
11460 let interval_str = format!(
11461 "{} MONTH",
11462 Self::expr_to_string_static(&offset)
11463 );
11464 Expression::Add(Box::new(
11465 crate::expressions::BinaryOp::new(
11466 date,
11467 Expression::Interval(Box::new(
11468 crate::expressions::Interval {
11469 this: Some(Expression::string(
11470 &interval_str,
11471 )),
11472 unit: None,
11473 },
11474 )),
11475 ),
11476 ))
11477 } else {
11478 date
11479 };
11480 let truncated =
11481 Expression::Function(Box::new(Function::new(
11482 "DATE_TRUNC".to_string(),
11483 vec![Expression::string("MONTH"), date],
11484 )));
11485 let plus_month = Expression::Add(Box::new(
11486 crate::expressions::BinaryOp::new(
11487 truncated,
11488 Expression::Interval(Box::new(
11489 crate::expressions::Interval {
11490 this: Some(Expression::string("1 MONTH")),
11491 unit: None,
11492 },
11493 )),
11494 ),
11495 ));
11496 let minus_day = Expression::Sub(Box::new(
11497 crate::expressions::BinaryOp::new(
11498 plus_month,
11499 Expression::Interval(Box::new(
11500 crate::expressions::Interval {
11501 this: Some(Expression::string("1 DAY")),
11502 unit: None,
11503 },
11504 )),
11505 ),
11506 ));
11507 Ok(Expression::Cast(Box::new(Cast {
11508 this: minus_day,
11509 to: DataType::Date,
11510 trailing_comments: vec![],
11511 double_colon_syntax: false,
11512 format: None,
11513 default: None,
11514 inferred_type: None,
11515 })))
11516 }
11517 DialectType::DuckDB => {
11518 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
11519 let date = cast_to_date(date_arg);
11520 let date = if let Some(offset) = month_offset {
11521 // Wrap negative numbers in parentheses for DuckDB INTERVAL
11522 let interval_val =
11523 if matches!(&offset, Expression::Neg(_)) {
11524 Expression::Paren(Box::new(
11525 crate::expressions::Paren {
11526 this: offset,
11527 trailing_comments: Vec::new(),
11528 },
11529 ))
11530 } else {
11531 offset
11532 };
11533 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
11534 date,
11535 Expression::Interval(Box::new(crate::expressions::Interval {
11536 this: Some(interval_val),
11537 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
11538 unit: crate::expressions::IntervalUnit::Month,
11539 use_plural: false,
11540 }),
11541 })),
11542 )))
11543 } else {
11544 date
11545 };
11546 Ok(Expression::Function(Box::new(Function::new(
11547 "LAST_DAY".to_string(),
11548 vec![date],
11549 ))))
11550 }
11551 DialectType::Snowflake | DialectType::Redshift => {
11552 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
11553 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
11554 let date = if matches!(target, DialectType::Snowflake) {
11555 Expression::Function(Box::new(Function::new(
11556 "TO_DATE".to_string(),
11557 vec![date_arg],
11558 )))
11559 } else {
11560 cast_to_date(date_arg)
11561 };
11562 let date = if let Some(offset) = month_offset {
11563 Expression::Function(Box::new(Function::new(
11564 "DATEADD".to_string(),
11565 vec![
11566 Expression::Identifier(Identifier::new(
11567 "MONTH",
11568 )),
11569 offset,
11570 date,
11571 ],
11572 )))
11573 } else {
11574 date
11575 };
11576 Ok(Expression::Function(Box::new(Function::new(
11577 "LAST_DAY".to_string(),
11578 vec![date],
11579 ))))
11580 }
11581 DialectType::Spark | DialectType::Databricks => {
11582 // Spark: LAST_DAY(TO_DATE(date))
11583 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
11584 let date = Expression::Function(Box::new(Function::new(
11585 "TO_DATE".to_string(),
11586 vec![date_arg],
11587 )));
11588 let date = if let Some(offset) = month_offset {
11589 Expression::Function(Box::new(Function::new(
11590 "ADD_MONTHS".to_string(),
11591 vec![date, offset],
11592 )))
11593 } else {
11594 date
11595 };
11596 Ok(Expression::Function(Box::new(Function::new(
11597 "LAST_DAY".to_string(),
11598 vec![date],
11599 ))))
11600 }
11601 DialectType::MySQL => {
11602 // MySQL: LAST_DAY(DATE(date)) - no offset
11603 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
11604 let date = if let Some(offset) = month_offset {
11605 let iu = crate::expressions::IntervalUnit::Month;
11606 Expression::DateAdd(Box::new(
11607 crate::expressions::DateAddFunc {
11608 this: date_arg,
11609 interval: offset,
11610 unit: iu,
11611 },
11612 ))
11613 } else {
11614 Expression::Function(Box::new(Function::new(
11615 "DATE".to_string(),
11616 vec![date_arg],
11617 )))
11618 };
11619 Ok(Expression::Function(Box::new(Function::new(
11620 "LAST_DAY".to_string(),
11621 vec![date],
11622 ))))
11623 }
11624 DialectType::BigQuery => {
11625 // BigQuery: LAST_DAY(CAST(date AS DATE))
11626 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
11627 let date = cast_to_date(date_arg);
11628 let date = if let Some(offset) = month_offset {
11629 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
11630 this: Some(offset),
11631 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
11632 unit: crate::expressions::IntervalUnit::Month,
11633 use_plural: false,
11634 }),
11635 }));
11636 Expression::Function(Box::new(Function::new(
11637 "DATE_ADD".to_string(),
11638 vec![date, interval],
11639 )))
11640 } else {
11641 date
11642 };
11643 Ok(Expression::Function(Box::new(Function::new(
11644 "LAST_DAY".to_string(),
11645 vec![date],
11646 ))))
11647 }
11648 DialectType::ClickHouse => {
11649 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
11650 let date = Expression::Cast(Box::new(Cast {
11651 this: date_arg,
11652 to: DataType::Nullable {
11653 inner: Box::new(DataType::Date),
11654 },
11655 trailing_comments: vec![],
11656 double_colon_syntax: false,
11657 format: None,
11658 default: None,
11659 inferred_type: None,
11660 }));
11661 let date = if let Some(offset) = month_offset {
11662 Expression::Function(Box::new(Function::new(
11663 "DATE_ADD".to_string(),
11664 vec![
11665 Expression::Identifier(Identifier::new(
11666 "MONTH",
11667 )),
11668 offset,
11669 date,
11670 ],
11671 )))
11672 } else {
11673 date
11674 };
11675 Ok(Expression::Function(Box::new(Function::new(
11676 "LAST_DAY".to_string(),
11677 vec![date],
11678 ))))
11679 }
11680 DialectType::Hive => {
11681 // Hive: LAST_DAY(date)
11682 let date = if let Some(offset) = month_offset {
11683 Expression::Function(Box::new(Function::new(
11684 "ADD_MONTHS".to_string(),
11685 vec![date_arg, offset],
11686 )))
11687 } else {
11688 date_arg
11689 };
11690 Ok(Expression::Function(Box::new(Function::new(
11691 "LAST_DAY".to_string(),
11692 vec![date],
11693 ))))
11694 }
11695 _ => {
11696 // Default: LAST_DAY(date)
11697 let date = if let Some(offset) = month_offset {
11698 let unit =
11699 Expression::Identifier(Identifier::new("MONTH"));
11700 Expression::Function(Box::new(Function::new(
11701 "DATEADD".to_string(),
11702 vec![unit, offset, date_arg],
11703 )))
11704 } else {
11705 date_arg
11706 };
11707 Ok(Expression::Function(Box::new(Function::new(
11708 "LAST_DAY".to_string(),
11709 vec![date],
11710 ))))
11711 }
11712 }
11713 }
11714 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
11715 "LAST_DAY" | "LAST_DAY_OF_MONTH"
11716 if !matches!(source, DialectType::BigQuery)
11717 && f.args.len() >= 1 =>
11718 {
11719 let first_arg = f.args.into_iter().next().unwrap();
11720 match target {
11721 DialectType::TSQL | DialectType::Fabric => {
11722 Ok(Expression::Function(Box::new(Function::new(
11723 "EOMONTH".to_string(),
11724 vec![first_arg],
11725 ))))
11726 }
11727 DialectType::Presto
11728 | DialectType::Trino
11729 | DialectType::Athena => {
11730 Ok(Expression::Function(Box::new(Function::new(
11731 "LAST_DAY_OF_MONTH".to_string(),
11732 vec![first_arg],
11733 ))))
11734 }
11735 _ => Ok(Expression::Function(Box::new(Function::new(
11736 "LAST_DAY".to_string(),
11737 vec![first_arg],
11738 )))),
11739 }
11740 }
11741 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
11742 "MAP"
11743 if f.args.len() == 2
11744 && matches!(
11745 source,
11746 DialectType::Presto
11747 | DialectType::Trino
11748 | DialectType::Athena
11749 ) =>
11750 {
11751 let keys_arg = f.args[0].clone();
11752 let vals_arg = f.args[1].clone();
11753
11754 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
11755 fn extract_array_elements(
11756 expr: &Expression,
11757 ) -> Option<&Vec<Expression>> {
11758 match expr {
11759 Expression::Array(arr) => Some(&arr.expressions),
11760 Expression::ArrayFunc(arr) => Some(&arr.expressions),
11761 Expression::Function(f)
11762 if f.name.eq_ignore_ascii_case("ARRAY") =>
11763 {
11764 Some(&f.args)
11765 }
11766 _ => None,
11767 }
11768 }
11769
11770 match target {
11771 DialectType::Spark | DialectType::Databricks => {
11772 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
11773 Ok(Expression::Function(Box::new(Function::new(
11774 "MAP_FROM_ARRAYS".to_string(),
11775 f.args,
11776 ))))
11777 }
11778 DialectType::Hive => {
11779 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
11780 if let (Some(keys), Some(vals)) = (
11781 extract_array_elements(&keys_arg),
11782 extract_array_elements(&vals_arg),
11783 ) {
11784 if keys.len() == vals.len() {
11785 let mut interleaved = Vec::new();
11786 for (k, v) in keys.iter().zip(vals.iter()) {
11787 interleaved.push(k.clone());
11788 interleaved.push(v.clone());
11789 }
11790 Ok(Expression::Function(Box::new(Function::new(
11791 "MAP".to_string(),
11792 interleaved,
11793 ))))
11794 } else {
11795 Ok(Expression::Function(Box::new(Function::new(
11796 "MAP".to_string(),
11797 f.args,
11798 ))))
11799 }
11800 } else {
11801 Ok(Expression::Function(Box::new(Function::new(
11802 "MAP".to_string(),
11803 f.args,
11804 ))))
11805 }
11806 }
11807 DialectType::Snowflake => {
11808 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
11809 if let (Some(keys), Some(vals)) = (
11810 extract_array_elements(&keys_arg),
11811 extract_array_elements(&vals_arg),
11812 ) {
11813 if keys.len() == vals.len() {
11814 let mut interleaved = Vec::new();
11815 for (k, v) in keys.iter().zip(vals.iter()) {
11816 interleaved.push(k.clone());
11817 interleaved.push(v.clone());
11818 }
11819 Ok(Expression::Function(Box::new(Function::new(
11820 "OBJECT_CONSTRUCT".to_string(),
11821 interleaved,
11822 ))))
11823 } else {
11824 Ok(Expression::Function(Box::new(Function::new(
11825 "MAP".to_string(),
11826 f.args,
11827 ))))
11828 }
11829 } else {
11830 Ok(Expression::Function(Box::new(Function::new(
11831 "MAP".to_string(),
11832 f.args,
11833 ))))
11834 }
11835 }
11836 _ => Ok(Expression::Function(f)),
11837 }
11838 }
11839 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
11840 "MAP"
11841 if f.args.is_empty()
11842 && matches!(
11843 source,
11844 DialectType::Hive
11845 | DialectType::Spark
11846 | DialectType::Databricks
11847 )
11848 && matches!(
11849 target,
11850 DialectType::Presto
11851 | DialectType::Trino
11852 | DialectType::Athena
11853 ) =>
11854 {
11855 let empty_keys =
11856 Expression::Array(Box::new(crate::expressions::Array {
11857 expressions: vec![],
11858 }));
11859 let empty_vals =
11860 Expression::Array(Box::new(crate::expressions::Array {
11861 expressions: vec![],
11862 }));
11863 Ok(Expression::Function(Box::new(Function::new(
11864 "MAP".to_string(),
11865 vec![empty_keys, empty_vals],
11866 ))))
11867 }
11868 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
11869 "MAP"
11870 if f.args.len() >= 2
11871 && f.args.len() % 2 == 0
11872 && matches!(
11873 source,
11874 DialectType::Hive
11875 | DialectType::Spark
11876 | DialectType::Databricks
11877 | DialectType::ClickHouse
11878 ) =>
11879 {
11880 let args = f.args;
11881 match target {
11882 DialectType::DuckDB => {
11883 // MAP([k1, k2], [v1, v2])
11884 let mut keys = Vec::new();
11885 let mut vals = Vec::new();
11886 for (i, arg) in args.into_iter().enumerate() {
11887 if i % 2 == 0 {
11888 keys.push(arg);
11889 } else {
11890 vals.push(arg);
11891 }
11892 }
11893 let keys_arr = Expression::Array(Box::new(
11894 crate::expressions::Array { expressions: keys },
11895 ));
11896 let vals_arr = Expression::Array(Box::new(
11897 crate::expressions::Array { expressions: vals },
11898 ));
11899 Ok(Expression::Function(Box::new(Function::new(
11900 "MAP".to_string(),
11901 vec![keys_arr, vals_arr],
11902 ))))
11903 }
11904 DialectType::Presto | DialectType::Trino => {
11905 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
11906 let mut keys = Vec::new();
11907 let mut vals = Vec::new();
11908 for (i, arg) in args.into_iter().enumerate() {
11909 if i % 2 == 0 {
11910 keys.push(arg);
11911 } else {
11912 vals.push(arg);
11913 }
11914 }
11915 let keys_arr = Expression::Array(Box::new(
11916 crate::expressions::Array { expressions: keys },
11917 ));
11918 let vals_arr = Expression::Array(Box::new(
11919 crate::expressions::Array { expressions: vals },
11920 ));
11921 Ok(Expression::Function(Box::new(Function::new(
11922 "MAP".to_string(),
11923 vec![keys_arr, vals_arr],
11924 ))))
11925 }
11926 DialectType::Snowflake => Ok(Expression::Function(Box::new(
11927 Function::new("OBJECT_CONSTRUCT".to_string(), args),
11928 ))),
11929 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
11930 Function::new("map".to_string(), args),
11931 ))),
11932 _ => Ok(Expression::Function(Box::new(Function::new(
11933 "MAP".to_string(),
11934 args,
11935 )))),
11936 }
11937 }
11938 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
11939 "COLLECT_LIST" if f.args.len() >= 1 => {
11940 let name = match target {
11941 DialectType::Spark
11942 | DialectType::Databricks
11943 | DialectType::Hive => "COLLECT_LIST",
11944 DialectType::DuckDB
11945 | DialectType::PostgreSQL
11946 | DialectType::Redshift
11947 | DialectType::Snowflake
11948 | DialectType::BigQuery => "ARRAY_AGG",
11949 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
11950 _ => "ARRAY_AGG",
11951 };
11952 Ok(Expression::Function(Box::new(Function::new(
11953 name.to_string(),
11954 f.args,
11955 ))))
11956 }
11957 // COLLECT_SET(x) -> target-specific distinct array aggregation
11958 "COLLECT_SET" if f.args.len() >= 1 => {
11959 let name = match target {
11960 DialectType::Spark
11961 | DialectType::Databricks
11962 | DialectType::Hive => "COLLECT_SET",
11963 DialectType::Presto
11964 | DialectType::Trino
11965 | DialectType::Athena => "SET_AGG",
11966 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
11967 _ => "ARRAY_AGG",
11968 };
11969 Ok(Expression::Function(Box::new(Function::new(
11970 name.to_string(),
11971 f.args,
11972 ))))
11973 }
11974 // ISNAN(x) / IS_NAN(x) - normalize
11975 "ISNAN" | "IS_NAN" => {
11976 let name = match target {
11977 DialectType::Spark
11978 | DialectType::Databricks
11979 | DialectType::Hive => "ISNAN",
11980 DialectType::Presto
11981 | DialectType::Trino
11982 | DialectType::Athena => "IS_NAN",
11983 DialectType::BigQuery
11984 | DialectType::PostgreSQL
11985 | DialectType::Redshift => "IS_NAN",
11986 DialectType::ClickHouse => "IS_NAN",
11987 _ => "ISNAN",
11988 };
11989 Ok(Expression::Function(Box::new(Function::new(
11990 name.to_string(),
11991 f.args,
11992 ))))
11993 }
11994 // SPLIT_PART(str, delim, index) -> target-specific
11995 "SPLIT_PART" if f.args.len() == 3 => {
11996 match target {
11997 DialectType::Spark | DialectType::Databricks => {
11998 // Keep as SPLIT_PART (Spark 3.4+)
11999 Ok(Expression::Function(Box::new(Function::new(
12000 "SPLIT_PART".to_string(),
12001 f.args,
12002 ))))
12003 }
12004 DialectType::DuckDB
12005 if matches!(source, DialectType::Snowflake) =>
12006 {
12007 // Snowflake SPLIT_PART -> DuckDB with CASE wrapper:
12008 // - part_index 0 treated as 1
12009 // - empty delimiter: return whole string if index 1 or -1, else ''
12010 let mut args = f.args;
12011 let str_arg = args.remove(0);
12012 let delim_arg = args.remove(0);
12013 let idx_arg = args.remove(0);
12014
12015 // (CASE WHEN idx = 0 THEN 1 ELSE idx END)
12016 let adjusted_idx = Expression::Paren(Box::new(Paren {
12017 this: Expression::Case(Box::new(Case {
12018 operand: None,
12019 whens: vec![(
12020 Expression::Eq(Box::new(BinaryOp {
12021 left: idx_arg.clone(),
12022 right: Expression::number(0),
12023 left_comments: vec![],
12024 operator_comments: vec![],
12025 trailing_comments: vec![],
12026 inferred_type: None,
12027 })),
12028 Expression::number(1),
12029 )],
12030 else_: Some(idx_arg.clone()),
12031 comments: vec![],
12032 inferred_type: None,
12033 })),
12034 trailing_comments: vec![],
12035 }));
12036
12037 // SPLIT_PART(str, delim, adjusted_idx)
12038 let base_func =
12039 Expression::Function(Box::new(Function::new(
12040 "SPLIT_PART".to_string(),
12041 vec![
12042 str_arg.clone(),
12043 delim_arg.clone(),
12044 adjusted_idx.clone(),
12045 ],
12046 )));
12047
12048 // (CASE WHEN adjusted_idx = 1 OR adjusted_idx = -1 THEN str ELSE '' END)
12049 let empty_delim_case = Expression::Paren(Box::new(Paren {
12050 this: Expression::Case(Box::new(Case {
12051 operand: None,
12052 whens: vec![(
12053 Expression::Or(Box::new(BinaryOp {
12054 left: Expression::Eq(Box::new(BinaryOp {
12055 left: adjusted_idx.clone(),
12056 right: Expression::number(1),
12057 left_comments: vec![],
12058 operator_comments: vec![],
12059 trailing_comments: vec![],
12060 inferred_type: None,
12061 })),
12062 right: Expression::Eq(Box::new(BinaryOp {
12063 left: adjusted_idx,
12064 right: Expression::number(-1),
12065 left_comments: vec![],
12066 operator_comments: vec![],
12067 trailing_comments: vec![],
12068 inferred_type: None,
12069 })),
12070 left_comments: vec![],
12071 operator_comments: vec![],
12072 trailing_comments: vec![],
12073 inferred_type: None,
12074 })),
12075 str_arg,
12076 )],
12077 else_: Some(Expression::string("")),
12078 comments: vec![],
12079 inferred_type: None,
12080 })),
12081 trailing_comments: vec![],
12082 }));
12083
12084 // CASE WHEN delim = '' THEN (empty case) ELSE SPLIT_PART(...) END
12085 Ok(Expression::Case(Box::new(Case {
12086 operand: None,
12087 whens: vec![(
12088 Expression::Eq(Box::new(BinaryOp {
12089 left: delim_arg,
12090 right: Expression::string(""),
12091 left_comments: vec![],
12092 operator_comments: vec![],
12093 trailing_comments: vec![],
12094 inferred_type: None,
12095 })),
12096 empty_delim_case,
12097 )],
12098 else_: Some(base_func),
12099 comments: vec![],
12100 inferred_type: None,
12101 })))
12102 }
12103 DialectType::DuckDB
12104 | DialectType::PostgreSQL
12105 | DialectType::Snowflake
12106 | DialectType::Redshift
12107 | DialectType::Trino
12108 | DialectType::Presto => Ok(Expression::Function(Box::new(
12109 Function::new("SPLIT_PART".to_string(), f.args),
12110 ))),
12111 DialectType::Hive => {
12112 // SPLIT(str, delim)[index]
12113 // Complex conversion, just keep as-is for now
12114 Ok(Expression::Function(Box::new(Function::new(
12115 "SPLIT_PART".to_string(),
12116 f.args,
12117 ))))
12118 }
12119 _ => Ok(Expression::Function(Box::new(Function::new(
12120 "SPLIT_PART".to_string(),
12121 f.args,
12122 )))),
12123 }
12124 }
12125 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
12126 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
12127 let is_scalar = name == "JSON_EXTRACT_SCALAR";
12128 match target {
12129 DialectType::Spark
12130 | DialectType::Databricks
12131 | DialectType::Hive => {
12132 let mut args = f.args;
12133 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
12134 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
12135 if let Some(Expression::Function(inner)) = args.first() {
12136 if inner.name.eq_ignore_ascii_case("TRY")
12137 && inner.args.len() == 1
12138 {
12139 let mut inner_args = inner.args.clone();
12140 args[0] = inner_args.remove(0);
12141 }
12142 }
12143 Ok(Expression::Function(Box::new(Function::new(
12144 "GET_JSON_OBJECT".to_string(),
12145 args,
12146 ))))
12147 }
12148 DialectType::DuckDB | DialectType::SQLite => {
12149 // json -> path syntax
12150 let mut args = f.args;
12151 let json_expr = args.remove(0);
12152 let path = args.remove(0);
12153 Ok(Expression::JsonExtract(Box::new(
12154 crate::expressions::JsonExtractFunc {
12155 this: json_expr,
12156 path,
12157 returning: None,
12158 arrow_syntax: true,
12159 hash_arrow_syntax: false,
12160 wrapper_option: None,
12161 quotes_option: None,
12162 on_scalar_string: false,
12163 on_error: None,
12164 },
12165 )))
12166 }
12167 DialectType::TSQL => {
12168 let func_name = if is_scalar {
12169 "JSON_VALUE"
12170 } else {
12171 "JSON_QUERY"
12172 };
12173 Ok(Expression::Function(Box::new(Function::new(
12174 func_name.to_string(),
12175 f.args,
12176 ))))
12177 }
12178 DialectType::PostgreSQL | DialectType::Redshift => {
12179 let func_name = if is_scalar {
12180 "JSON_EXTRACT_PATH_TEXT"
12181 } else {
12182 "JSON_EXTRACT_PATH"
12183 };
12184 Ok(Expression::Function(Box::new(Function::new(
12185 func_name.to_string(),
12186 f.args,
12187 ))))
12188 }
12189 _ => Ok(Expression::Function(Box::new(Function::new(
12190 name.to_string(),
12191 f.args,
12192 )))),
12193 }
12194 }
12195 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
12196 "JSON_SEARCH"
12197 if matches!(target, DialectType::DuckDB)
12198 && (3..=5).contains(&f.args.len()) =>
12199 {
12200 let args = &f.args;
12201
12202 // Only rewrite deterministic modes and NULL/no escape-char variant.
12203 let mode = match &args[1] {
12204 Expression::Literal(lit)
12205 if matches!(
12206 lit.as_ref(),
12207 crate::expressions::Literal::String(_)
12208 ) =>
12209 {
12210 let crate::expressions::Literal::String(s) = lit.as_ref()
12211 else {
12212 unreachable!()
12213 };
12214 s.to_ascii_lowercase()
12215 }
12216 _ => return Ok(Expression::Function(f)),
12217 };
12218 if mode != "one" && mode != "all" {
12219 return Ok(Expression::Function(f));
12220 }
12221 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
12222 return Ok(Expression::Function(f));
12223 }
12224
12225 let json_doc_sql = match Generator::sql(&args[0]) {
12226 Ok(sql) => sql,
12227 Err(_) => return Ok(Expression::Function(f)),
12228 };
12229 let search_sql = match Generator::sql(&args[2]) {
12230 Ok(sql) => sql,
12231 Err(_) => return Ok(Expression::Function(f)),
12232 };
12233 let path_sql = if args.len() == 5 {
12234 match Generator::sql(&args[4]) {
12235 Ok(sql) => sql,
12236 Err(_) => return Ok(Expression::Function(f)),
12237 }
12238 } else {
12239 "'$'".to_string()
12240 };
12241
12242 let rewrite_sql = if mode == "all" {
12243 format!(
12244 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
12245 json_doc_sql, path_sql, search_sql
12246 )
12247 } else {
12248 format!(
12249 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
12250 json_doc_sql, path_sql, search_sql
12251 )
12252 };
12253
12254 Ok(Expression::Raw(crate::expressions::Raw {
12255 sql: rewrite_sql,
12256 }))
12257 }
12258 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
12259 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
12260 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
12261 if f.args.len() >= 2
12262 && matches!(source, DialectType::SingleStore) =>
12263 {
12264 let is_bson = name == "BSON_EXTRACT_BSON";
12265 let mut args = f.args;
12266 let json_expr = args.remove(0);
12267
12268 // Build JSONPath from remaining arguments
12269 let mut path = String::from("$");
12270 for arg in &args {
12271 if let Expression::Literal(lit) = arg {
12272 if let crate::expressions::Literal::String(s) = lit.as_ref()
12273 {
12274 // Check if it's a numeric string (array index)
12275 if s.parse::<i64>().is_ok() {
12276 path.push('[');
12277 path.push_str(s);
12278 path.push(']');
12279 } else {
12280 path.push('.');
12281 path.push_str(s);
12282 }
12283 }
12284 }
12285 }
12286
12287 let target_func = if is_bson {
12288 "JSONB_EXTRACT"
12289 } else {
12290 "JSON_EXTRACT"
12291 };
12292 Ok(Expression::Function(Box::new(Function::new(
12293 target_func.to_string(),
12294 vec![json_expr, Expression::string(&path)],
12295 ))))
12296 }
12297 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
12298 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
12299 Ok(Expression::Function(Box::new(Function {
12300 name: "arraySum".to_string(),
12301 args: f.args,
12302 distinct: f.distinct,
12303 trailing_comments: f.trailing_comments,
12304 use_bracket_syntax: f.use_bracket_syntax,
12305 no_parens: f.no_parens,
12306 quoted: f.quoted,
12307 span: None,
12308 inferred_type: None,
12309 })))
12310 }
12311 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
12312 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
12313 // and is handled by JsonQueryValueConvert action. This handles the case where
12314 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
12315 "JSON_QUERY" | "JSON_VALUE"
12316 if f.args.len() == 2
12317 && matches!(
12318 source,
12319 DialectType::TSQL | DialectType::Fabric
12320 ) =>
12321 {
12322 match target {
12323 DialectType::Spark
12324 | DialectType::Databricks
12325 | DialectType::Hive => Ok(Expression::Function(Box::new(
12326 Function::new("GET_JSON_OBJECT".to_string(), f.args),
12327 ))),
12328 _ => Ok(Expression::Function(Box::new(Function::new(
12329 name.to_string(),
12330 f.args,
12331 )))),
12332 }
12333 }
12334 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
12335 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
12336 let arg = f.args.into_iter().next().unwrap();
12337 let is_hive_source = matches!(
12338 source,
12339 DialectType::Hive
12340 | DialectType::Spark
12341 | DialectType::Databricks
12342 );
12343 match target {
12344 DialectType::DuckDB if is_hive_source => {
12345 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
12346 let strptime =
12347 Expression::Function(Box::new(Function::new(
12348 "STRPTIME".to_string(),
12349 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
12350 )));
12351 Ok(Expression::Function(Box::new(Function::new(
12352 "EPOCH".to_string(),
12353 vec![strptime],
12354 ))))
12355 }
12356 DialectType::Presto | DialectType::Trino if is_hive_source => {
12357 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
12358 let cast_varchar =
12359 Expression::Cast(Box::new(crate::expressions::Cast {
12360 this: arg.clone(),
12361 to: DataType::VarChar {
12362 length: None,
12363 parenthesized_length: false,
12364 },
12365 trailing_comments: vec![],
12366 double_colon_syntax: false,
12367 format: None,
12368 default: None,
12369 inferred_type: None,
12370 }));
12371 let date_parse =
12372 Expression::Function(Box::new(Function::new(
12373 "DATE_PARSE".to_string(),
12374 vec![
12375 cast_varchar,
12376 Expression::string("%Y-%m-%d %T"),
12377 ],
12378 )));
12379 let try_expr = Expression::Function(Box::new(
12380 Function::new("TRY".to_string(), vec![date_parse]),
12381 ));
12382 let date_format =
12383 Expression::Function(Box::new(Function::new(
12384 "DATE_FORMAT".to_string(),
12385 vec![arg, Expression::string("%Y-%m-%d %T")],
12386 )));
12387 let parse_datetime =
12388 Expression::Function(Box::new(Function::new(
12389 "PARSE_DATETIME".to_string(),
12390 vec![
12391 date_format,
12392 Expression::string("yyyy-MM-dd HH:mm:ss"),
12393 ],
12394 )));
12395 let coalesce =
12396 Expression::Function(Box::new(Function::new(
12397 "COALESCE".to_string(),
12398 vec![try_expr, parse_datetime],
12399 )));
12400 Ok(Expression::Function(Box::new(Function::new(
12401 "TO_UNIXTIME".to_string(),
12402 vec![coalesce],
12403 ))))
12404 }
12405 DialectType::Presto | DialectType::Trino => {
12406 Ok(Expression::Function(Box::new(Function::new(
12407 "TO_UNIXTIME".to_string(),
12408 vec![arg],
12409 ))))
12410 }
12411 _ => Ok(Expression::Function(Box::new(Function::new(
12412 "UNIX_TIMESTAMP".to_string(),
12413 vec![arg],
12414 )))),
12415 }
12416 }
12417 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
12418 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
12419 DialectType::Spark
12420 | DialectType::Databricks
12421 | DialectType::Hive => Ok(Expression::Function(Box::new(
12422 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
12423 ))),
12424 _ => Ok(Expression::Function(Box::new(Function::new(
12425 "TO_UNIX_TIMESTAMP".to_string(),
12426 f.args,
12427 )))),
12428 },
12429 // CURDATE() -> CURRENT_DATE
12430 "CURDATE" => {
12431 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
12432 }
12433 // CURTIME() -> CURRENT_TIME
12434 "CURTIME" => {
12435 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
12436 precision: None,
12437 }))
12438 }
12439 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive, LIST_SORT for DuckDB
12440 "ARRAY_SORT" if f.args.len() >= 1 => {
12441 match target {
12442 DialectType::Hive => {
12443 let mut args = f.args;
12444 args.truncate(1); // Drop lambda comparator
12445 Ok(Expression::Function(Box::new(Function::new(
12446 "SORT_ARRAY".to_string(),
12447 args,
12448 ))))
12449 }
12450 DialectType::DuckDB
12451 if matches!(source, DialectType::Snowflake) =>
12452 {
12453 // Snowflake ARRAY_SORT(arr[, asc_bool[, nulls_first_bool]]) -> DuckDB LIST_SORT(arr[, 'ASC'/'DESC'[, 'NULLS FIRST']])
12454 let mut args_iter = f.args.into_iter();
12455 let arr = args_iter.next().unwrap();
12456 let asc_arg = args_iter.next();
12457 let nulls_first_arg = args_iter.next();
12458
12459 let is_asc_bool = asc_arg
12460 .as_ref()
12461 .map(|a| matches!(a, Expression::Boolean(_)))
12462 .unwrap_or(false);
12463 let is_nf_bool = nulls_first_arg
12464 .as_ref()
12465 .map(|a| matches!(a, Expression::Boolean(_)))
12466 .unwrap_or(false);
12467
12468 // No boolean args: pass through as-is
12469 if !is_asc_bool && !is_nf_bool {
12470 let mut result_args = vec![arr];
12471 if let Some(asc) = asc_arg {
12472 result_args.push(asc);
12473 if let Some(nf) = nulls_first_arg {
12474 result_args.push(nf);
12475 }
12476 }
12477 Ok(Expression::Function(Box::new(Function::new(
12478 "LIST_SORT".to_string(),
12479 result_args,
12480 ))))
12481 } else {
12482 // Has boolean args: convert to DuckDB LIST_SORT format
12483 let descending = matches!(&asc_arg, Some(Expression::Boolean(b)) if !b.value);
12484
12485 // Snowflake defaults: nulls_first = TRUE for DESC, FALSE for ASC
12486 let nulls_are_first = match &nulls_first_arg {
12487 Some(Expression::Boolean(b)) => b.value,
12488 None if is_asc_bool => descending, // Snowflake default
12489 _ => false,
12490 };
12491 let nulls_first_sql = if nulls_are_first {
12492 Some(Expression::string("NULLS FIRST"))
12493 } else {
12494 None
12495 };
12496
12497 if !is_asc_bool {
12498 // asc is non-boolean expression, nulls_first is boolean
12499 let mut result_args = vec![arr];
12500 if let Some(asc) = asc_arg {
12501 result_args.push(asc);
12502 }
12503 if let Some(nf) = nulls_first_sql {
12504 result_args.push(nf);
12505 }
12506 Ok(Expression::Function(Box::new(Function::new(
12507 "LIST_SORT".to_string(),
12508 result_args,
12509 ))))
12510 } else {
12511 if !descending && !nulls_are_first {
12512 // ASC, NULLS LAST (default) -> LIST_SORT(arr)
12513 Ok(Expression::Function(Box::new(
12514 Function::new(
12515 "LIST_SORT".to_string(),
12516 vec![arr],
12517 ),
12518 )))
12519 } else if descending && !nulls_are_first {
12520 // DESC, NULLS LAST -> ARRAY_REVERSE_SORT(arr)
12521 Ok(Expression::Function(Box::new(
12522 Function::new(
12523 "ARRAY_REVERSE_SORT".to_string(),
12524 vec![arr],
12525 ),
12526 )))
12527 } else {
12528 // NULLS FIRST -> LIST_SORT(arr, 'ASC'/'DESC', 'NULLS FIRST')
12529 let order_str =
12530 if descending { "DESC" } else { "ASC" };
12531 Ok(Expression::Function(Box::new(
12532 Function::new(
12533 "LIST_SORT".to_string(),
12534 vec![
12535 arr,
12536 Expression::string(order_str),
12537 Expression::string("NULLS FIRST"),
12538 ],
12539 ),
12540 )))
12541 }
12542 }
12543 }
12544 }
12545 DialectType::DuckDB => {
12546 // Non-Snowflake source: ARRAY_SORT(x, lambda) -> ARRAY_SORT(x) (drop comparator)
12547 let mut args = f.args;
12548 args.truncate(1); // Drop lambda comparator for DuckDB
12549 Ok(Expression::Function(Box::new(Function::new(
12550 "ARRAY_SORT".to_string(),
12551 args,
12552 ))))
12553 }
12554 _ => Ok(Expression::Function(f)),
12555 }
12556 }
12557 // SORT_ARRAY(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, keep for Hive/Spark
12558 "SORT_ARRAY" if f.args.len() == 1 => match target {
12559 DialectType::Hive
12560 | DialectType::Spark
12561 | DialectType::Databricks => Ok(Expression::Function(f)),
12562 DialectType::DuckDB => Ok(Expression::Function(Box::new(
12563 Function::new("LIST_SORT".to_string(), f.args),
12564 ))),
12565 _ => Ok(Expression::Function(Box::new(Function::new(
12566 "ARRAY_SORT".to_string(),
12567 f.args,
12568 )))),
12569 },
12570 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
12571 "SORT_ARRAY" if f.args.len() == 2 => {
12572 let is_desc =
12573 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
12574 if is_desc {
12575 match target {
12576 DialectType::DuckDB => {
12577 Ok(Expression::Function(Box::new(Function::new(
12578 "ARRAY_REVERSE_SORT".to_string(),
12579 vec![f.args.into_iter().next().unwrap()],
12580 ))))
12581 }
12582 DialectType::Presto | DialectType::Trino => {
12583 let arr_arg = f.args.into_iter().next().unwrap();
12584 let a = Expression::Column(Box::new(
12585 crate::expressions::Column {
12586 name: crate::expressions::Identifier::new("a"),
12587 table: None,
12588 join_mark: false,
12589 trailing_comments: Vec::new(),
12590 span: None,
12591 inferred_type: None,
12592 },
12593 ));
12594 let b = Expression::Column(Box::new(
12595 crate::expressions::Column {
12596 name: crate::expressions::Identifier::new("b"),
12597 table: None,
12598 join_mark: false,
12599 trailing_comments: Vec::new(),
12600 span: None,
12601 inferred_type: None,
12602 },
12603 ));
12604 let case_expr = Expression::Case(Box::new(
12605 crate::expressions::Case {
12606 operand: None,
12607 whens: vec![
12608 (
12609 Expression::Lt(Box::new(
12610 BinaryOp::new(a.clone(), b.clone()),
12611 )),
12612 Expression::Literal(Box::new(
12613 Literal::Number("1".to_string()),
12614 )),
12615 ),
12616 (
12617 Expression::Gt(Box::new(
12618 BinaryOp::new(a.clone(), b.clone()),
12619 )),
12620 Expression::Literal(Box::new(
12621 Literal::Number("-1".to_string()),
12622 )),
12623 ),
12624 ],
12625 else_: Some(Expression::Literal(Box::new(
12626 Literal::Number("0".to_string()),
12627 ))),
12628 comments: Vec::new(),
12629 inferred_type: None,
12630 },
12631 ));
12632 let lambda = Expression::Lambda(Box::new(
12633 crate::expressions::LambdaExpr {
12634 parameters: vec![
12635 crate::expressions::Identifier::new("a"),
12636 crate::expressions::Identifier::new("b"),
12637 ],
12638 body: case_expr,
12639 colon: false,
12640 parameter_types: Vec::new(),
12641 },
12642 ));
12643 Ok(Expression::Function(Box::new(Function::new(
12644 "ARRAY_SORT".to_string(),
12645 vec![arr_arg, lambda],
12646 ))))
12647 }
12648 _ => Ok(Expression::Function(f)),
12649 }
12650 } else {
12651 // SORT_ARRAY(x, TRUE) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for others
12652 match target {
12653 DialectType::Hive => Ok(Expression::Function(f)),
12654 DialectType::DuckDB => {
12655 Ok(Expression::Function(Box::new(Function::new(
12656 "LIST_SORT".to_string(),
12657 vec![f.args.into_iter().next().unwrap()],
12658 ))))
12659 }
12660 _ => Ok(Expression::Function(Box::new(Function::new(
12661 "ARRAY_SORT".to_string(),
12662 vec![f.args.into_iter().next().unwrap()],
12663 )))),
12664 }
12665 }
12666 }
12667 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
12668 "LEFT" if f.args.len() == 2 => {
12669 match target {
12670 DialectType::Hive
12671 | DialectType::Presto
12672 | DialectType::Trino
12673 | DialectType::Athena => {
12674 let x = f.args[0].clone();
12675 let n = f.args[1].clone();
12676 Ok(Expression::Function(Box::new(Function::new(
12677 "SUBSTRING".to_string(),
12678 vec![x, Expression::number(1), n],
12679 ))))
12680 }
12681 DialectType::Spark | DialectType::Databricks
12682 if matches!(
12683 source,
12684 DialectType::TSQL | DialectType::Fabric
12685 ) =>
12686 {
12687 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
12688 let x = f.args[0].clone();
12689 let n = f.args[1].clone();
12690 let cast_x = Expression::Cast(Box::new(Cast {
12691 this: x,
12692 to: DataType::VarChar {
12693 length: None,
12694 parenthesized_length: false,
12695 },
12696 double_colon_syntax: false,
12697 trailing_comments: Vec::new(),
12698 format: None,
12699 default: None,
12700 inferred_type: None,
12701 }));
12702 Ok(Expression::Function(Box::new(Function::new(
12703 "LEFT".to_string(),
12704 vec![cast_x, n],
12705 ))))
12706 }
12707 _ => Ok(Expression::Function(f)),
12708 }
12709 }
12710 "RIGHT" if f.args.len() == 2 => {
12711 match target {
12712 DialectType::Hive
12713 | DialectType::Presto
12714 | DialectType::Trino
12715 | DialectType::Athena => {
12716 let x = f.args[0].clone();
12717 let n = f.args[1].clone();
12718 // SUBSTRING(x, LENGTH(x) - (n - 1))
12719 let len_x = Expression::Function(Box::new(Function::new(
12720 "LENGTH".to_string(),
12721 vec![x.clone()],
12722 )));
12723 let n_minus_1 = Expression::Sub(Box::new(
12724 crate::expressions::BinaryOp::new(
12725 n,
12726 Expression::number(1),
12727 ),
12728 ));
12729 let n_minus_1_paren = Expression::Paren(Box::new(
12730 crate::expressions::Paren {
12731 this: n_minus_1,
12732 trailing_comments: Vec::new(),
12733 },
12734 ));
12735 let offset = Expression::Sub(Box::new(
12736 crate::expressions::BinaryOp::new(
12737 len_x,
12738 n_minus_1_paren,
12739 ),
12740 ));
12741 Ok(Expression::Function(Box::new(Function::new(
12742 "SUBSTRING".to_string(),
12743 vec![x, offset],
12744 ))))
12745 }
12746 DialectType::Spark | DialectType::Databricks
12747 if matches!(
12748 source,
12749 DialectType::TSQL | DialectType::Fabric
12750 ) =>
12751 {
12752 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
12753 let x = f.args[0].clone();
12754 let n = f.args[1].clone();
12755 let cast_x = Expression::Cast(Box::new(Cast {
12756 this: x,
12757 to: DataType::VarChar {
12758 length: None,
12759 parenthesized_length: false,
12760 },
12761 double_colon_syntax: false,
12762 trailing_comments: Vec::new(),
12763 format: None,
12764 default: None,
12765 inferred_type: None,
12766 }));
12767 Ok(Expression::Function(Box::new(Function::new(
12768 "RIGHT".to_string(),
12769 vec![cast_x, n],
12770 ))))
12771 }
12772 _ => Ok(Expression::Function(f)),
12773 }
12774 }
12775 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
12776 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
12777 DialectType::Snowflake => Ok(Expression::Function(Box::new(
12778 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
12779 ))),
12780 DialectType::Spark | DialectType::Databricks => {
12781 Ok(Expression::Function(Box::new(Function::new(
12782 "MAP_FROM_ARRAYS".to_string(),
12783 f.args,
12784 ))))
12785 }
12786 _ => Ok(Expression::Function(Box::new(Function::new(
12787 "MAP".to_string(),
12788 f.args,
12789 )))),
12790 },
12791 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
12792 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
12793 "LIKE" if f.args.len() >= 2 => {
12794 let (this, pattern) = if matches!(source, DialectType::SQLite) {
12795 // SQLite: LIKE(pattern, string) -> string LIKE pattern
12796 (f.args[1].clone(), f.args[0].clone())
12797 } else {
12798 // Standard: LIKE(string, pattern) -> string LIKE pattern
12799 (f.args[0].clone(), f.args[1].clone())
12800 };
12801 let escape = if f.args.len() >= 3 {
12802 Some(f.args[2].clone())
12803 } else {
12804 None
12805 };
12806 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
12807 left: this,
12808 right: pattern,
12809 escape,
12810 quantifier: None,
12811 inferred_type: None,
12812 })))
12813 }
12814 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
12815 "ILIKE" if f.args.len() >= 2 => {
12816 let this = f.args[0].clone();
12817 let pattern = f.args[1].clone();
12818 let escape = if f.args.len() >= 3 {
12819 Some(f.args[2].clone())
12820 } else {
12821 None
12822 };
12823 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
12824 left: this,
12825 right: pattern,
12826 escape,
12827 quantifier: None,
12828 inferred_type: None,
12829 })))
12830 }
12831 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
12832 "CHAR" if f.args.len() == 1 => match target {
12833 DialectType::MySQL
12834 | DialectType::SingleStore
12835 | DialectType::TSQL => Ok(Expression::Function(f)),
12836 _ => Ok(Expression::Function(Box::new(Function::new(
12837 "CHR".to_string(),
12838 f.args,
12839 )))),
12840 },
12841 // CONCAT(a, b) -> a || b for PostgreSQL
12842 "CONCAT"
12843 if f.args.len() == 2
12844 && matches!(target, DialectType::PostgreSQL)
12845 && matches!(
12846 source,
12847 DialectType::ClickHouse | DialectType::MySQL
12848 ) =>
12849 {
12850 let mut args = f.args;
12851 let right = args.pop().unwrap();
12852 let left = args.pop().unwrap();
12853 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
12854 this: Box::new(left),
12855 expression: Box::new(right),
12856 safe: None,
12857 })))
12858 }
12859 // ARRAY_TO_STRING(arr, delim) -> target-specific
12860 "ARRAY_TO_STRING"
12861 if f.args.len() == 2
12862 && matches!(target, DialectType::DuckDB)
12863 && matches!(source, DialectType::Snowflake) =>
12864 {
12865 let mut args = f.args;
12866 let arr = args.remove(0);
12867 let sep = args.remove(0);
12868 // sep IS NULL
12869 let sep_is_null = Expression::IsNull(Box::new(IsNull {
12870 this: sep.clone(),
12871 not: false,
12872 postfix_form: false,
12873 }));
12874 // COALESCE(CAST(x AS TEXT), '')
12875 let cast_x = Expression::Cast(Box::new(Cast {
12876 this: Expression::Identifier(Identifier::new("x")),
12877 to: DataType::Text,
12878 trailing_comments: Vec::new(),
12879 double_colon_syntax: false,
12880 format: None,
12881 default: None,
12882 inferred_type: None,
12883 }));
12884 let coalesce = Expression::Coalesce(Box::new(
12885 crate::expressions::VarArgFunc {
12886 original_name: None,
12887 expressions: vec![
12888 cast_x,
12889 Expression::Literal(Box::new(Literal::String(
12890 String::new(),
12891 ))),
12892 ],
12893 inferred_type: None,
12894 },
12895 ));
12896 let lambda =
12897 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
12898 parameters: vec![Identifier::new("x")],
12899 body: coalesce,
12900 colon: false,
12901 parameter_types: Vec::new(),
12902 }));
12903 let list_transform = Expression::Function(Box::new(Function::new(
12904 "LIST_TRANSFORM".to_string(),
12905 vec![arr, lambda],
12906 )));
12907 let array_to_string =
12908 Expression::Function(Box::new(Function::new(
12909 "ARRAY_TO_STRING".to_string(),
12910 vec![list_transform, sep],
12911 )));
12912 Ok(Expression::Case(Box::new(Case {
12913 operand: None,
12914 whens: vec![(sep_is_null, Expression::Null(Null))],
12915 else_: Some(array_to_string),
12916 comments: Vec::new(),
12917 inferred_type: None,
12918 })))
12919 }
12920 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
12921 DialectType::Presto | DialectType::Trino => {
12922 Ok(Expression::Function(Box::new(Function::new(
12923 "ARRAY_JOIN".to_string(),
12924 f.args,
12925 ))))
12926 }
12927 DialectType::TSQL => Ok(Expression::Function(Box::new(
12928 Function::new("STRING_AGG".to_string(), f.args),
12929 ))),
12930 _ => Ok(Expression::Function(f)),
12931 },
12932 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
12933 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
12934 DialectType::Spark
12935 | DialectType::Databricks
12936 | DialectType::Hive => Ok(Expression::Function(Box::new(
12937 Function::new("CONCAT".to_string(), f.args),
12938 ))),
12939 DialectType::Snowflake => Ok(Expression::Function(Box::new(
12940 Function::new("ARRAY_CAT".to_string(), f.args),
12941 ))),
12942 DialectType::Redshift => Ok(Expression::Function(Box::new(
12943 Function::new("ARRAY_CONCAT".to_string(), f.args),
12944 ))),
12945 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12946 Function::new("ARRAY_CAT".to_string(), f.args),
12947 ))),
12948 DialectType::DuckDB => Ok(Expression::Function(Box::new(
12949 Function::new("LIST_CONCAT".to_string(), f.args),
12950 ))),
12951 DialectType::Presto | DialectType::Trino => {
12952 Ok(Expression::Function(Box::new(Function::new(
12953 "CONCAT".to_string(),
12954 f.args,
12955 ))))
12956 }
12957 DialectType::BigQuery => Ok(Expression::Function(Box::new(
12958 Function::new("ARRAY_CONCAT".to_string(), f.args),
12959 ))),
12960 _ => Ok(Expression::Function(f)),
12961 },
12962 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
12963 "HAS" if f.args.len() == 2 => match target {
12964 DialectType::Spark
12965 | DialectType::Databricks
12966 | DialectType::Hive => Ok(Expression::Function(Box::new(
12967 Function::new("ARRAY_CONTAINS".to_string(), f.args),
12968 ))),
12969 DialectType::Presto | DialectType::Trino => {
12970 Ok(Expression::Function(Box::new(Function::new(
12971 "CONTAINS".to_string(),
12972 f.args,
12973 ))))
12974 }
12975 _ => Ok(Expression::Function(f)),
12976 },
12977 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
12978 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
12979 Function::new("COALESCE".to_string(), f.args),
12980 ))),
12981 // ISNULL(x) in MySQL -> (x IS NULL)
12982 "ISNULL"
12983 if f.args.len() == 1
12984 && matches!(source, DialectType::MySQL)
12985 && matches!(target, DialectType::MySQL) =>
12986 {
12987 let arg = f.args.into_iter().next().unwrap();
12988 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
12989 this: Expression::IsNull(Box::new(
12990 crate::expressions::IsNull {
12991 this: arg,
12992 not: false,
12993 postfix_form: false,
12994 },
12995 )),
12996 trailing_comments: Vec::new(),
12997 })))
12998 }
12999 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
13000 "MONTHNAME"
13001 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
13002 {
13003 let arg = f.args.into_iter().next().unwrap();
13004 Ok(Expression::Function(Box::new(Function::new(
13005 "DATE_FORMAT".to_string(),
13006 vec![arg, Expression::string("%M")],
13007 ))))
13008 }
13009 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
13010 "SPLITBYSTRING" if f.args.len() == 2 => {
13011 let sep = f.args[0].clone();
13012 let str_arg = f.args[1].clone();
13013 match target {
13014 DialectType::DuckDB => Ok(Expression::Function(Box::new(
13015 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
13016 ))),
13017 DialectType::Doris => {
13018 Ok(Expression::Function(Box::new(Function::new(
13019 "SPLIT_BY_STRING".to_string(),
13020 vec![str_arg, sep],
13021 ))))
13022 }
13023 DialectType::Hive
13024 | DialectType::Spark
13025 | DialectType::Databricks => {
13026 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
13027 let escaped =
13028 Expression::Function(Box::new(Function::new(
13029 "CONCAT".to_string(),
13030 vec![
13031 Expression::string("\\Q"),
13032 sep,
13033 Expression::string("\\E"),
13034 ],
13035 )));
13036 Ok(Expression::Function(Box::new(Function::new(
13037 "SPLIT".to_string(),
13038 vec![str_arg, escaped],
13039 ))))
13040 }
13041 _ => Ok(Expression::Function(f)),
13042 }
13043 }
13044 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
13045 "SPLITBYREGEXP" if f.args.len() == 2 => {
13046 let sep = f.args[0].clone();
13047 let str_arg = f.args[1].clone();
13048 match target {
13049 DialectType::DuckDB => {
13050 Ok(Expression::Function(Box::new(Function::new(
13051 "STR_SPLIT_REGEX".to_string(),
13052 vec![str_arg, sep],
13053 ))))
13054 }
13055 DialectType::Hive
13056 | DialectType::Spark
13057 | DialectType::Databricks => {
13058 Ok(Expression::Function(Box::new(Function::new(
13059 "SPLIT".to_string(),
13060 vec![str_arg, sep],
13061 ))))
13062 }
13063 _ => Ok(Expression::Function(f)),
13064 }
13065 }
13066 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
13067 "TOMONDAY" => {
13068 if f.args.len() == 1 {
13069 let arg = f.args.into_iter().next().unwrap();
13070 match target {
13071 DialectType::Doris => {
13072 Ok(Expression::Function(Box::new(Function::new(
13073 "DATE_TRUNC".to_string(),
13074 vec![arg, Expression::string("WEEK")],
13075 ))))
13076 }
13077 _ => Ok(Expression::Function(Box::new(Function::new(
13078 "DATE_TRUNC".to_string(),
13079 vec![Expression::string("WEEK"), arg],
13080 )))),
13081 }
13082 } else {
13083 Ok(Expression::Function(f))
13084 }
13085 }
13086 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
13087 "COLLECT_LIST" if f.args.len() == 1 => match target {
13088 DialectType::Spark
13089 | DialectType::Databricks
13090 | DialectType::Hive => Ok(Expression::Function(f)),
13091 _ => Ok(Expression::Function(Box::new(Function::new(
13092 "ARRAY_AGG".to_string(),
13093 f.args,
13094 )))),
13095 },
13096 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
13097 "TO_CHAR"
13098 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
13099 {
13100 let arg = f.args.into_iter().next().unwrap();
13101 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
13102 this: arg,
13103 to: DataType::Custom {
13104 name: "STRING".to_string(),
13105 },
13106 double_colon_syntax: false,
13107 trailing_comments: Vec::new(),
13108 format: None,
13109 default: None,
13110 inferred_type: None,
13111 })))
13112 }
13113 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
13114 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
13115 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
13116 Function::new("RANDOM".to_string(), vec![]),
13117 ))),
13118 _ => Ok(Expression::Function(f)),
13119 },
13120 // ClickHouse formatDateTime -> target-specific
13121 "FORMATDATETIME" if f.args.len() >= 2 => match target {
13122 DialectType::MySQL => Ok(Expression::Function(Box::new(
13123 Function::new("DATE_FORMAT".to_string(), f.args),
13124 ))),
13125 _ => Ok(Expression::Function(f)),
13126 },
13127 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
13128 "REPLICATE" if f.args.len() == 2 => match target {
13129 DialectType::TSQL => Ok(Expression::Function(f)),
13130 _ => Ok(Expression::Function(Box::new(Function::new(
13131 "REPEAT".to_string(),
13132 f.args,
13133 )))),
13134 },
13135 // LEN(x) -> LENGTH(x) for non-TSQL targets
13136 // No CAST needed when arg is already a string literal
13137 "LEN" if f.args.len() == 1 => {
13138 match target {
13139 DialectType::TSQL => Ok(Expression::Function(f)),
13140 DialectType::Spark | DialectType::Databricks => {
13141 let arg = f.args.into_iter().next().unwrap();
13142 // Don't wrap string literals with CAST - they're already strings
13143 let is_string = matches!(
13144 &arg,
13145 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
13146 );
13147 let final_arg = if is_string {
13148 arg
13149 } else {
13150 Expression::Cast(Box::new(Cast {
13151 this: arg,
13152 to: DataType::VarChar {
13153 length: None,
13154 parenthesized_length: false,
13155 },
13156 double_colon_syntax: false,
13157 trailing_comments: Vec::new(),
13158 format: None,
13159 default: None,
13160 inferred_type: None,
13161 }))
13162 };
13163 Ok(Expression::Function(Box::new(Function::new(
13164 "LENGTH".to_string(),
13165 vec![final_arg],
13166 ))))
13167 }
13168 _ => {
13169 let arg = f.args.into_iter().next().unwrap();
13170 Ok(Expression::Function(Box::new(Function::new(
13171 "LENGTH".to_string(),
13172 vec![arg],
13173 ))))
13174 }
13175 }
13176 }
13177 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
13178 "COUNT_BIG" if f.args.len() == 1 => match target {
13179 DialectType::TSQL => Ok(Expression::Function(f)),
13180 _ => Ok(Expression::Function(Box::new(Function::new(
13181 "COUNT".to_string(),
13182 f.args,
13183 )))),
13184 },
13185 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
13186 "DATEFROMPARTS" if f.args.len() == 3 => match target {
13187 DialectType::TSQL => Ok(Expression::Function(f)),
13188 _ => Ok(Expression::Function(Box::new(Function::new(
13189 "MAKE_DATE".to_string(),
13190 f.args,
13191 )))),
13192 },
13193 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
13194 "REGEXP_LIKE" if f.args.len() >= 2 => {
13195 let str_expr = f.args[0].clone();
13196 let pattern = f.args[1].clone();
13197 let flags = if f.args.len() >= 3 {
13198 Some(f.args[2].clone())
13199 } else {
13200 None
13201 };
13202 match target {
13203 DialectType::DuckDB => {
13204 let mut new_args = vec![str_expr, pattern];
13205 if let Some(fl) = flags {
13206 new_args.push(fl);
13207 }
13208 Ok(Expression::Function(Box::new(Function::new(
13209 "REGEXP_MATCHES".to_string(),
13210 new_args,
13211 ))))
13212 }
13213 _ => Ok(Expression::RegexpLike(Box::new(
13214 crate::expressions::RegexpFunc {
13215 this: str_expr,
13216 pattern,
13217 flags,
13218 },
13219 ))),
13220 }
13221 }
13222 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
13223 "ARRAYJOIN" if f.args.len() == 1 => match target {
13224 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
13225 Function::new("UNNEST".to_string(), f.args),
13226 ))),
13227 _ => Ok(Expression::Function(f)),
13228 },
13229 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
13230 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
13231 match target {
13232 DialectType::TSQL => Ok(Expression::Function(f)),
13233 DialectType::DuckDB => {
13234 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
13235 let mut args = f.args;
13236 let ms = args.pop().unwrap();
13237 let s = args.pop().unwrap();
13238 // s + (ms / 1000.0)
13239 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
13240 ms,
13241 Expression::Literal(Box::new(
13242 crate::expressions::Literal::Number(
13243 "1000.0".to_string(),
13244 ),
13245 )),
13246 )));
13247 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
13248 s,
13249 Expression::Paren(Box::new(Paren {
13250 this: ms_frac,
13251 trailing_comments: vec![],
13252 })),
13253 )));
13254 args.push(s_with_ms);
13255 Ok(Expression::Function(Box::new(Function::new(
13256 "MAKE_TIMESTAMP".to_string(),
13257 args,
13258 ))))
13259 }
13260 DialectType::Snowflake => {
13261 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
13262 let mut args = f.args;
13263 let ms = args.pop().unwrap();
13264 // ms * 1000000
13265 let ns = Expression::Mul(Box::new(BinaryOp::new(
13266 ms,
13267 Expression::number(1000000),
13268 )));
13269 args.push(ns);
13270 Ok(Expression::Function(Box::new(Function::new(
13271 "TIMESTAMP_FROM_PARTS".to_string(),
13272 args,
13273 ))))
13274 }
13275 _ => {
13276 // Default: keep function name for other targets
13277 Ok(Expression::Function(Box::new(Function::new(
13278 "DATETIMEFROMPARTS".to_string(),
13279 f.args,
13280 ))))
13281 }
13282 }
13283 }
13284 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
13285 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
13286 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
13287 let is_try = name == "TRY_CONVERT";
13288 let type_expr = f.args[0].clone();
13289 let value_expr = f.args[1].clone();
13290 let style = if f.args.len() >= 3 {
13291 Some(&f.args[2])
13292 } else {
13293 None
13294 };
13295
13296 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
13297 if matches!(target, DialectType::TSQL) {
13298 let normalized_type = match &type_expr {
13299 Expression::DataType(dt) => {
13300 let new_dt = match dt {
13301 DataType::Int { .. } => DataType::Custom {
13302 name: "INTEGER".to_string(),
13303 },
13304 _ => dt.clone(),
13305 };
13306 Expression::DataType(new_dt)
13307 }
13308 Expression::Identifier(id) => {
13309 if id.name.eq_ignore_ascii_case("INT") {
13310 Expression::Identifier(
13311 crate::expressions::Identifier::new("INTEGER"),
13312 )
13313 } else {
13314 let upper = id.name.to_ascii_uppercase();
13315 Expression::Identifier(
13316 crate::expressions::Identifier::new(upper),
13317 )
13318 }
13319 }
13320 Expression::Column(col) => {
13321 if col.name.name.eq_ignore_ascii_case("INT") {
13322 Expression::Identifier(
13323 crate::expressions::Identifier::new("INTEGER"),
13324 )
13325 } else {
13326 let upper = col.name.name.to_ascii_uppercase();
13327 Expression::Identifier(
13328 crate::expressions::Identifier::new(upper),
13329 )
13330 }
13331 }
13332 _ => type_expr.clone(),
13333 };
13334 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
13335 let mut new_args = vec![normalized_type, value_expr];
13336 if let Some(s) = style {
13337 new_args.push(s.clone());
13338 }
13339 return Ok(Expression::Function(Box::new(Function::new(
13340 func_name.to_string(),
13341 new_args,
13342 ))));
13343 }
13344
13345 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
13346 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
13347 match e {
13348 Expression::DataType(dt) => {
13349 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
13350 match dt {
13351 DataType::Custom { name }
13352 if name.starts_with("NVARCHAR(")
13353 || name.starts_with("NCHAR(") =>
13354 {
13355 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
13356 let inner = &name[name.find('(').unwrap() + 1
13357 ..name.len() - 1];
13358 if inner.eq_ignore_ascii_case("MAX") {
13359 Some(DataType::Text)
13360 } else if let Ok(len) = inner.parse::<u32>() {
13361 if name.starts_with("NCHAR") {
13362 Some(DataType::Char {
13363 length: Some(len),
13364 })
13365 } else {
13366 Some(DataType::VarChar {
13367 length: Some(len),
13368 parenthesized_length: false,
13369 })
13370 }
13371 } else {
13372 Some(dt.clone())
13373 }
13374 }
13375 DataType::Custom { name } if name == "NVARCHAR" => {
13376 Some(DataType::VarChar {
13377 length: None,
13378 parenthesized_length: false,
13379 })
13380 }
13381 DataType::Custom { name } if name == "NCHAR" => {
13382 Some(DataType::Char { length: None })
13383 }
13384 DataType::Custom { name }
13385 if name == "NVARCHAR(MAX)"
13386 || name == "VARCHAR(MAX)" =>
13387 {
13388 Some(DataType::Text)
13389 }
13390 _ => Some(dt.clone()),
13391 }
13392 }
13393 Expression::Identifier(id) => {
13394 let name = id.name.to_ascii_uppercase();
13395 match name.as_str() {
13396 "INT" | "INTEGER" => Some(DataType::Int {
13397 length: None,
13398 integer_spelling: false,
13399 }),
13400 "BIGINT" => Some(DataType::BigInt { length: None }),
13401 "SMALLINT" => {
13402 Some(DataType::SmallInt { length: None })
13403 }
13404 "TINYINT" => {
13405 Some(DataType::TinyInt { length: None })
13406 }
13407 "FLOAT" => Some(DataType::Float {
13408 precision: None,
13409 scale: None,
13410 real_spelling: false,
13411 }),
13412 "REAL" => Some(DataType::Float {
13413 precision: None,
13414 scale: None,
13415 real_spelling: true,
13416 }),
13417 "DATETIME" | "DATETIME2" => {
13418 Some(DataType::Timestamp {
13419 timezone: false,
13420 precision: None,
13421 })
13422 }
13423 "DATE" => Some(DataType::Date),
13424 "BIT" => Some(DataType::Boolean),
13425 "TEXT" => Some(DataType::Text),
13426 "NUMERIC" => Some(DataType::Decimal {
13427 precision: None,
13428 scale: None,
13429 }),
13430 "MONEY" => Some(DataType::Decimal {
13431 precision: Some(15),
13432 scale: Some(4),
13433 }),
13434 "SMALLMONEY" => Some(DataType::Decimal {
13435 precision: Some(6),
13436 scale: Some(4),
13437 }),
13438 "VARCHAR" => Some(DataType::VarChar {
13439 length: None,
13440 parenthesized_length: false,
13441 }),
13442 "NVARCHAR" => Some(DataType::VarChar {
13443 length: None,
13444 parenthesized_length: false,
13445 }),
13446 "CHAR" => Some(DataType::Char { length: None }),
13447 "NCHAR" => Some(DataType::Char { length: None }),
13448 _ => Some(DataType::Custom { name }),
13449 }
13450 }
13451 Expression::Column(col) => {
13452 let name = col.name.name.to_ascii_uppercase();
13453 match name.as_str() {
13454 "INT" | "INTEGER" => Some(DataType::Int {
13455 length: None,
13456 integer_spelling: false,
13457 }),
13458 "BIGINT" => Some(DataType::BigInt { length: None }),
13459 "FLOAT" => Some(DataType::Float {
13460 precision: None,
13461 scale: None,
13462 real_spelling: false,
13463 }),
13464 "DATETIME" | "DATETIME2" => {
13465 Some(DataType::Timestamp {
13466 timezone: false,
13467 precision: None,
13468 })
13469 }
13470 "DATE" => Some(DataType::Date),
13471 "NUMERIC" => Some(DataType::Decimal {
13472 precision: None,
13473 scale: None,
13474 }),
13475 "VARCHAR" => Some(DataType::VarChar {
13476 length: None,
13477 parenthesized_length: false,
13478 }),
13479 "NVARCHAR" => Some(DataType::VarChar {
13480 length: None,
13481 parenthesized_length: false,
13482 }),
13483 "CHAR" => Some(DataType::Char { length: None }),
13484 "NCHAR" => Some(DataType::Char { length: None }),
13485 _ => Some(DataType::Custom { name }),
13486 }
13487 }
13488 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
13489 Expression::Function(f) => {
13490 let fname = f.name.to_ascii_uppercase();
13491 match fname.as_str() {
13492 "VARCHAR" | "NVARCHAR" => {
13493 let len = f.args.first().and_then(|a| {
13494 if let Expression::Literal(lit) = a
13495 {
13496 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13497 n.parse::<u32>().ok()
13498 } else { None }
13499 } else if let Expression::Identifier(id) = a
13500 {
13501 if id.name.eq_ignore_ascii_case("MAX") {
13502 None
13503 } else {
13504 None
13505 }
13506 } else {
13507 None
13508 }
13509 });
13510 // Check for VARCHAR(MAX) -> TEXT
13511 let is_max = f.args.first().map_or(false, |a| {
13512 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
13513 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
13514 });
13515 if is_max {
13516 Some(DataType::Text)
13517 } else {
13518 Some(DataType::VarChar {
13519 length: len,
13520 parenthesized_length: false,
13521 })
13522 }
13523 }
13524 "NCHAR" | "CHAR" => {
13525 let len = f.args.first().and_then(|a| {
13526 if let Expression::Literal(lit) = a
13527 {
13528 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13529 n.parse::<u32>().ok()
13530 } else { None }
13531 } else {
13532 None
13533 }
13534 });
13535 Some(DataType::Char { length: len })
13536 }
13537 "NUMERIC" | "DECIMAL" => {
13538 let precision = f.args.first().and_then(|a| {
13539 if let Expression::Literal(lit) = a
13540 {
13541 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13542 n.parse::<u32>().ok()
13543 } else { None }
13544 } else {
13545 None
13546 }
13547 });
13548 let scale = f.args.get(1).and_then(|a| {
13549 if let Expression::Literal(lit) = a
13550 {
13551 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13552 n.parse::<u32>().ok()
13553 } else { None }
13554 } else {
13555 None
13556 }
13557 });
13558 Some(DataType::Decimal { precision, scale })
13559 }
13560 _ => None,
13561 }
13562 }
13563 _ => None,
13564 }
13565 }
13566
13567 if let Some(mut dt) = expr_to_datatype(&type_expr) {
13568 // For TSQL source: VARCHAR/CHAR without length defaults to 30
13569 let is_tsql_source =
13570 matches!(source, DialectType::TSQL | DialectType::Fabric);
13571 if is_tsql_source {
13572 match &dt {
13573 DataType::VarChar { length: None, .. } => {
13574 dt = DataType::VarChar {
13575 length: Some(30),
13576 parenthesized_length: false,
13577 };
13578 }
13579 DataType::Char { length: None } => {
13580 dt = DataType::Char { length: Some(30) };
13581 }
13582 _ => {}
13583 }
13584 }
13585
13586 // Determine if this is a string type
13587 let is_string_type = matches!(
13588 dt,
13589 DataType::VarChar { .. }
13590 | DataType::Char { .. }
13591 | DataType::Text
13592 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
13593 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
13594 || name.starts_with("VARCHAR(") || name == "VARCHAR"
13595 || name == "STRING");
13596
13597 // Determine if this is a date/time type
13598 let is_datetime_type = matches!(
13599 dt,
13600 DataType::Timestamp { .. } | DataType::Date
13601 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
13602 || name == "DATETIME2" || name == "SMALLDATETIME");
13603
13604 // Check for date conversion with style
13605 if style.is_some() {
13606 let style_num = style.and_then(|s| {
13607 if let Expression::Literal(lit) = s {
13608 if let crate::expressions::Literal::Number(n) =
13609 lit.as_ref()
13610 {
13611 n.parse::<u32>().ok()
13612 } else {
13613 None
13614 }
13615 } else {
13616 None
13617 }
13618 });
13619
13620 // TSQL CONVERT date styles (Java format)
13621 let format_str = style_num.and_then(|n| match n {
13622 101 => Some("MM/dd/yyyy"),
13623 102 => Some("yyyy.MM.dd"),
13624 103 => Some("dd/MM/yyyy"),
13625 104 => Some("dd.MM.yyyy"),
13626 105 => Some("dd-MM-yyyy"),
13627 108 => Some("HH:mm:ss"),
13628 110 => Some("MM-dd-yyyy"),
13629 112 => Some("yyyyMMdd"),
13630 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
13631 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
13632 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
13633 _ => None,
13634 });
13635
13636 // Non-string, non-datetime types with style: just CAST, ignore the style
13637 if !is_string_type && !is_datetime_type {
13638 let cast_expr = if is_try {
13639 Expression::TryCast(Box::new(
13640 crate::expressions::Cast {
13641 this: value_expr,
13642 to: dt,
13643 trailing_comments: Vec::new(),
13644 double_colon_syntax: false,
13645 format: None,
13646 default: None,
13647 inferred_type: None,
13648 },
13649 ))
13650 } else {
13651 Expression::Cast(Box::new(
13652 crate::expressions::Cast {
13653 this: value_expr,
13654 to: dt,
13655 trailing_comments: Vec::new(),
13656 double_colon_syntax: false,
13657 format: None,
13658 default: None,
13659 inferred_type: None,
13660 },
13661 ))
13662 };
13663 return Ok(cast_expr);
13664 }
13665
13666 if let Some(java_fmt) = format_str {
13667 let c_fmt = java_fmt
13668 .replace("yyyy", "%Y")
13669 .replace("MM", "%m")
13670 .replace("dd", "%d")
13671 .replace("HH", "%H")
13672 .replace("mm", "%M")
13673 .replace("ss", "%S")
13674 .replace("SSSSSS", "%f")
13675 .replace("SSS", "%f")
13676 .replace("'T'", "T");
13677
13678 // For datetime target types: style is the INPUT format for parsing strings -> dates
13679 if is_datetime_type {
13680 match target {
13681 DialectType::DuckDB => {
13682 return Ok(Expression::Function(Box::new(
13683 Function::new(
13684 "STRPTIME".to_string(),
13685 vec![
13686 value_expr,
13687 Expression::string(&c_fmt),
13688 ],
13689 ),
13690 )));
13691 }
13692 DialectType::Spark
13693 | DialectType::Databricks => {
13694 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
13695 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
13696 let func_name =
13697 if matches!(dt, DataType::Date) {
13698 "TO_DATE"
13699 } else {
13700 "TO_TIMESTAMP"
13701 };
13702 return Ok(Expression::Function(Box::new(
13703 Function::new(
13704 func_name.to_string(),
13705 vec![
13706 value_expr,
13707 Expression::string(java_fmt),
13708 ],
13709 ),
13710 )));
13711 }
13712 DialectType::Hive => {
13713 return Ok(Expression::Function(Box::new(
13714 Function::new(
13715 "TO_TIMESTAMP".to_string(),
13716 vec![
13717 value_expr,
13718 Expression::string(java_fmt),
13719 ],
13720 ),
13721 )));
13722 }
13723 _ => {
13724 return Ok(Expression::Cast(Box::new(
13725 crate::expressions::Cast {
13726 this: value_expr,
13727 to: dt,
13728 trailing_comments: Vec::new(),
13729 double_colon_syntax: false,
13730 format: None,
13731 default: None,
13732 inferred_type: None,
13733 },
13734 )));
13735 }
13736 }
13737 }
13738
13739 // For string target types: style is the OUTPUT format for dates -> strings
13740 match target {
13741 DialectType::DuckDB => Ok(Expression::Function(
13742 Box::new(Function::new(
13743 "STRPTIME".to_string(),
13744 vec![
13745 value_expr,
13746 Expression::string(&c_fmt),
13747 ],
13748 )),
13749 )),
13750 DialectType::Spark | DialectType::Databricks => {
13751 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
13752 // Determine the target string type
13753 let string_dt = match &dt {
13754 DataType::VarChar {
13755 length: Some(l),
13756 ..
13757 } => DataType::VarChar {
13758 length: Some(*l),
13759 parenthesized_length: false,
13760 },
13761 DataType::Text => DataType::Custom {
13762 name: "STRING".to_string(),
13763 },
13764 _ => DataType::Custom {
13765 name: "STRING".to_string(),
13766 },
13767 };
13768 let date_format_expr = Expression::Function(
13769 Box::new(Function::new(
13770 "DATE_FORMAT".to_string(),
13771 vec![
13772 value_expr,
13773 Expression::string(java_fmt),
13774 ],
13775 )),
13776 );
13777 let cast_expr = if is_try {
13778 Expression::TryCast(Box::new(
13779 crate::expressions::Cast {
13780 this: date_format_expr,
13781 to: string_dt,
13782 trailing_comments: Vec::new(),
13783 double_colon_syntax: false,
13784 format: None,
13785 default: None,
13786 inferred_type: None,
13787 },
13788 ))
13789 } else {
13790 Expression::Cast(Box::new(
13791 crate::expressions::Cast {
13792 this: date_format_expr,
13793 to: string_dt,
13794 trailing_comments: Vec::new(),
13795 double_colon_syntax: false,
13796 format: None,
13797 default: None,
13798 inferred_type: None,
13799 },
13800 ))
13801 };
13802 Ok(cast_expr)
13803 }
13804 DialectType::MySQL | DialectType::SingleStore => {
13805 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
13806 let mysql_fmt = java_fmt
13807 .replace("yyyy", "%Y")
13808 .replace("MM", "%m")
13809 .replace("dd", "%d")
13810 .replace("HH:mm:ss.SSSSSS", "%T")
13811 .replace("HH:mm:ss", "%T")
13812 .replace("HH", "%H")
13813 .replace("mm", "%i")
13814 .replace("ss", "%S");
13815 let date_format_expr = Expression::Function(
13816 Box::new(Function::new(
13817 "DATE_FORMAT".to_string(),
13818 vec![
13819 value_expr,
13820 Expression::string(&mysql_fmt),
13821 ],
13822 )),
13823 );
13824 // MySQL uses CHAR for string casts
13825 let mysql_dt = match &dt {
13826 DataType::VarChar { length, .. } => {
13827 DataType::Char { length: *length }
13828 }
13829 _ => dt,
13830 };
13831 Ok(Expression::Cast(Box::new(
13832 crate::expressions::Cast {
13833 this: date_format_expr,
13834 to: mysql_dt,
13835 trailing_comments: Vec::new(),
13836 double_colon_syntax: false,
13837 format: None,
13838 default: None,
13839 inferred_type: None,
13840 },
13841 )))
13842 }
13843 DialectType::Hive => {
13844 let func_name = "TO_TIMESTAMP";
13845 Ok(Expression::Function(Box::new(
13846 Function::new(
13847 func_name.to_string(),
13848 vec![
13849 value_expr,
13850 Expression::string(java_fmt),
13851 ],
13852 ),
13853 )))
13854 }
13855 _ => Ok(Expression::Cast(Box::new(
13856 crate::expressions::Cast {
13857 this: value_expr,
13858 to: dt,
13859 trailing_comments: Vec::new(),
13860 double_colon_syntax: false,
13861 format: None,
13862 default: None,
13863 inferred_type: None,
13864 },
13865 ))),
13866 }
13867 } else {
13868 // Unknown style, just CAST
13869 let cast_expr = if is_try {
13870 Expression::TryCast(Box::new(
13871 crate::expressions::Cast {
13872 this: value_expr,
13873 to: dt,
13874 trailing_comments: Vec::new(),
13875 double_colon_syntax: false,
13876 format: None,
13877 default: None,
13878 inferred_type: None,
13879 },
13880 ))
13881 } else {
13882 Expression::Cast(Box::new(
13883 crate::expressions::Cast {
13884 this: value_expr,
13885 to: dt,
13886 trailing_comments: Vec::new(),
13887 double_colon_syntax: false,
13888 format: None,
13889 default: None,
13890 inferred_type: None,
13891 },
13892 ))
13893 };
13894 Ok(cast_expr)
13895 }
13896 } else {
13897 // No style - simple CAST
13898 let final_dt = if matches!(
13899 target,
13900 DialectType::MySQL | DialectType::SingleStore
13901 ) {
13902 match &dt {
13903 DataType::Int { .. }
13904 | DataType::BigInt { .. }
13905 | DataType::SmallInt { .. }
13906 | DataType::TinyInt { .. } => DataType::Custom {
13907 name: "SIGNED".to_string(),
13908 },
13909 DataType::VarChar { length, .. } => {
13910 DataType::Char { length: *length }
13911 }
13912 _ => dt,
13913 }
13914 } else {
13915 dt
13916 };
13917 let cast_expr = if is_try {
13918 Expression::TryCast(Box::new(
13919 crate::expressions::Cast {
13920 this: value_expr,
13921 to: final_dt,
13922 trailing_comments: Vec::new(),
13923 double_colon_syntax: false,
13924 format: None,
13925 default: None,
13926 inferred_type: None,
13927 },
13928 ))
13929 } else {
13930 Expression::Cast(Box::new(crate::expressions::Cast {
13931 this: value_expr,
13932 to: final_dt,
13933 trailing_comments: Vec::new(),
13934 double_colon_syntax: false,
13935 format: None,
13936 default: None,
13937 inferred_type: None,
13938 }))
13939 };
13940 Ok(cast_expr)
13941 }
13942 } else {
13943 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
13944 Ok(Expression::Function(f))
13945 }
13946 }
13947 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
13948 "STRFTIME" if f.args.len() == 2 => {
13949 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
13950 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
13951 // SQLite: args[0] = format, args[1] = value
13952 (f.args[1].clone(), &f.args[0])
13953 } else {
13954 // DuckDB and others: args[0] = value, args[1] = format
13955 (f.args[0].clone(), &f.args[1])
13956 };
13957
13958 // Helper to convert C-style format to Java-style
13959 fn c_to_java_format(fmt: &str) -> String {
13960 fmt.replace("%Y", "yyyy")
13961 .replace("%m", "MM")
13962 .replace("%d", "dd")
13963 .replace("%H", "HH")
13964 .replace("%M", "mm")
13965 .replace("%S", "ss")
13966 .replace("%f", "SSSSSS")
13967 .replace("%y", "yy")
13968 .replace("%-m", "M")
13969 .replace("%-d", "d")
13970 .replace("%-H", "H")
13971 .replace("%-I", "h")
13972 .replace("%I", "hh")
13973 .replace("%p", "a")
13974 .replace("%j", "DDD")
13975 .replace("%a", "EEE")
13976 .replace("%b", "MMM")
13977 .replace("%F", "yyyy-MM-dd")
13978 .replace("%T", "HH:mm:ss")
13979 }
13980
13981 // Helper: recursively convert format strings within expressions (handles CONCAT)
13982 fn convert_fmt_expr(
13983 expr: &Expression,
13984 converter: &dyn Fn(&str) -> String,
13985 ) -> Expression {
13986 match expr {
13987 Expression::Literal(lit)
13988 if matches!(
13989 lit.as_ref(),
13990 crate::expressions::Literal::String(_)
13991 ) =>
13992 {
13993 let crate::expressions::Literal::String(s) =
13994 lit.as_ref()
13995 else {
13996 unreachable!()
13997 };
13998 Expression::string(&converter(s))
13999 }
14000 Expression::Function(func)
14001 if func.name.eq_ignore_ascii_case("CONCAT") =>
14002 {
14003 let new_args: Vec<Expression> = func
14004 .args
14005 .iter()
14006 .map(|a| convert_fmt_expr(a, converter))
14007 .collect();
14008 Expression::Function(Box::new(Function::new(
14009 "CONCAT".to_string(),
14010 new_args,
14011 )))
14012 }
14013 other => other.clone(),
14014 }
14015 }
14016
14017 match target {
14018 DialectType::DuckDB => {
14019 if matches!(source, DialectType::SQLite) {
14020 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
14021 let cast_val = Expression::Cast(Box::new(Cast {
14022 this: val,
14023 to: crate::expressions::DataType::Timestamp {
14024 precision: None,
14025 timezone: false,
14026 },
14027 trailing_comments: Vec::new(),
14028 double_colon_syntax: false,
14029 format: None,
14030 default: None,
14031 inferred_type: None,
14032 }));
14033 Ok(Expression::Function(Box::new(Function::new(
14034 "STRFTIME".to_string(),
14035 vec![cast_val, fmt_expr.clone()],
14036 ))))
14037 } else {
14038 Ok(Expression::Function(f))
14039 }
14040 }
14041 DialectType::Spark
14042 | DialectType::Databricks
14043 | DialectType::Hive => {
14044 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
14045 let converted_fmt =
14046 convert_fmt_expr(fmt_expr, &c_to_java_format);
14047 Ok(Expression::Function(Box::new(Function::new(
14048 "DATE_FORMAT".to_string(),
14049 vec![val, converted_fmt],
14050 ))))
14051 }
14052 DialectType::TSQL | DialectType::Fabric => {
14053 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
14054 let converted_fmt =
14055 convert_fmt_expr(fmt_expr, &c_to_java_format);
14056 Ok(Expression::Function(Box::new(Function::new(
14057 "FORMAT".to_string(),
14058 vec![val, converted_fmt],
14059 ))))
14060 }
14061 DialectType::Presto
14062 | DialectType::Trino
14063 | DialectType::Athena => {
14064 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
14065 if let Expression::Literal(lit) = fmt_expr {
14066 if let crate::expressions::Literal::String(s) =
14067 lit.as_ref()
14068 {
14069 let presto_fmt = duckdb_to_presto_format(s);
14070 Ok(Expression::Function(Box::new(Function::new(
14071 "DATE_FORMAT".to_string(),
14072 vec![val, Expression::string(&presto_fmt)],
14073 ))))
14074 } else {
14075 Ok(Expression::Function(Box::new(Function::new(
14076 "DATE_FORMAT".to_string(),
14077 vec![val, fmt_expr.clone()],
14078 ))))
14079 }
14080 } else {
14081 Ok(Expression::Function(Box::new(Function::new(
14082 "DATE_FORMAT".to_string(),
14083 vec![val, fmt_expr.clone()],
14084 ))))
14085 }
14086 }
14087 DialectType::BigQuery => {
14088 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
14089 if let Expression::Literal(lit) = fmt_expr {
14090 if let crate::expressions::Literal::String(s) =
14091 lit.as_ref()
14092 {
14093 let bq_fmt = duckdb_to_bigquery_format(s);
14094 Ok(Expression::Function(Box::new(Function::new(
14095 "FORMAT_DATE".to_string(),
14096 vec![Expression::string(&bq_fmt), val],
14097 ))))
14098 } else {
14099 Ok(Expression::Function(Box::new(Function::new(
14100 "FORMAT_DATE".to_string(),
14101 vec![fmt_expr.clone(), val],
14102 ))))
14103 }
14104 } else {
14105 Ok(Expression::Function(Box::new(Function::new(
14106 "FORMAT_DATE".to_string(),
14107 vec![fmt_expr.clone(), val],
14108 ))))
14109 }
14110 }
14111 DialectType::PostgreSQL | DialectType::Redshift => {
14112 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
14113 if let Expression::Literal(lit) = fmt_expr {
14114 if let crate::expressions::Literal::String(s) =
14115 lit.as_ref()
14116 {
14117 let pg_fmt = s
14118 .replace("%Y", "YYYY")
14119 .replace("%m", "MM")
14120 .replace("%d", "DD")
14121 .replace("%H", "HH24")
14122 .replace("%M", "MI")
14123 .replace("%S", "SS")
14124 .replace("%y", "YY")
14125 .replace("%-m", "FMMM")
14126 .replace("%-d", "FMDD")
14127 .replace("%-H", "FMHH24")
14128 .replace("%-I", "FMHH12")
14129 .replace("%p", "AM")
14130 .replace("%F", "YYYY-MM-DD")
14131 .replace("%T", "HH24:MI:SS");
14132 Ok(Expression::Function(Box::new(Function::new(
14133 "TO_CHAR".to_string(),
14134 vec![val, Expression::string(&pg_fmt)],
14135 ))))
14136 } else {
14137 Ok(Expression::Function(Box::new(Function::new(
14138 "TO_CHAR".to_string(),
14139 vec![val, fmt_expr.clone()],
14140 ))))
14141 }
14142 } else {
14143 Ok(Expression::Function(Box::new(Function::new(
14144 "TO_CHAR".to_string(),
14145 vec![val, fmt_expr.clone()],
14146 ))))
14147 }
14148 }
14149 _ => Ok(Expression::Function(f)),
14150 }
14151 }
14152 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
14153 "STRPTIME" if f.args.len() == 2 => {
14154 let val = f.args[0].clone();
14155 let fmt_expr = &f.args[1];
14156
14157 fn c_to_java_format_parse(fmt: &str) -> String {
14158 fmt.replace("%Y", "yyyy")
14159 .replace("%m", "MM")
14160 .replace("%d", "dd")
14161 .replace("%H", "HH")
14162 .replace("%M", "mm")
14163 .replace("%S", "ss")
14164 .replace("%f", "SSSSSS")
14165 .replace("%y", "yy")
14166 .replace("%-m", "M")
14167 .replace("%-d", "d")
14168 .replace("%-H", "H")
14169 .replace("%-I", "h")
14170 .replace("%I", "hh")
14171 .replace("%p", "a")
14172 .replace("%F", "yyyy-MM-dd")
14173 .replace("%T", "HH:mm:ss")
14174 }
14175
14176 match target {
14177 DialectType::DuckDB => Ok(Expression::Function(f)),
14178 DialectType::Spark | DialectType::Databricks => {
14179 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
14180 if let Expression::Literal(lit) = fmt_expr {
14181 if let crate::expressions::Literal::String(s) =
14182 lit.as_ref()
14183 {
14184 let java_fmt = c_to_java_format_parse(s);
14185 Ok(Expression::Function(Box::new(Function::new(
14186 "TO_TIMESTAMP".to_string(),
14187 vec![val, Expression::string(&java_fmt)],
14188 ))))
14189 } else {
14190 Ok(Expression::Function(Box::new(Function::new(
14191 "TO_TIMESTAMP".to_string(),
14192 vec![val, fmt_expr.clone()],
14193 ))))
14194 }
14195 } else {
14196 Ok(Expression::Function(Box::new(Function::new(
14197 "TO_TIMESTAMP".to_string(),
14198 vec![val, fmt_expr.clone()],
14199 ))))
14200 }
14201 }
14202 DialectType::Hive => {
14203 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
14204 if let Expression::Literal(lit) = fmt_expr {
14205 if let crate::expressions::Literal::String(s) =
14206 lit.as_ref()
14207 {
14208 let java_fmt = c_to_java_format_parse(s);
14209 let unix_ts =
14210 Expression::Function(Box::new(Function::new(
14211 "UNIX_TIMESTAMP".to_string(),
14212 vec![val, Expression::string(&java_fmt)],
14213 )));
14214 let from_unix =
14215 Expression::Function(Box::new(Function::new(
14216 "FROM_UNIXTIME".to_string(),
14217 vec![unix_ts],
14218 )));
14219 Ok(Expression::Cast(Box::new(
14220 crate::expressions::Cast {
14221 this: from_unix,
14222 to: DataType::Timestamp {
14223 timezone: false,
14224 precision: None,
14225 },
14226 trailing_comments: Vec::new(),
14227 double_colon_syntax: false,
14228 format: None,
14229 default: None,
14230 inferred_type: None,
14231 },
14232 )))
14233 } else {
14234 Ok(Expression::Function(f))
14235 }
14236 } else {
14237 Ok(Expression::Function(f))
14238 }
14239 }
14240 DialectType::Presto
14241 | DialectType::Trino
14242 | DialectType::Athena => {
14243 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
14244 if let Expression::Literal(lit) = fmt_expr {
14245 if let crate::expressions::Literal::String(s) =
14246 lit.as_ref()
14247 {
14248 let presto_fmt = duckdb_to_presto_format(s);
14249 Ok(Expression::Function(Box::new(Function::new(
14250 "DATE_PARSE".to_string(),
14251 vec![val, Expression::string(&presto_fmt)],
14252 ))))
14253 } else {
14254 Ok(Expression::Function(Box::new(Function::new(
14255 "DATE_PARSE".to_string(),
14256 vec![val, fmt_expr.clone()],
14257 ))))
14258 }
14259 } else {
14260 Ok(Expression::Function(Box::new(Function::new(
14261 "DATE_PARSE".to_string(),
14262 vec![val, fmt_expr.clone()],
14263 ))))
14264 }
14265 }
14266 DialectType::BigQuery => {
14267 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
14268 if let Expression::Literal(lit) = fmt_expr {
14269 if let crate::expressions::Literal::String(s) =
14270 lit.as_ref()
14271 {
14272 let bq_fmt = duckdb_to_bigquery_format(s);
14273 Ok(Expression::Function(Box::new(Function::new(
14274 "PARSE_TIMESTAMP".to_string(),
14275 vec![Expression::string(&bq_fmt), val],
14276 ))))
14277 } else {
14278 Ok(Expression::Function(Box::new(Function::new(
14279 "PARSE_TIMESTAMP".to_string(),
14280 vec![fmt_expr.clone(), val],
14281 ))))
14282 }
14283 } else {
14284 Ok(Expression::Function(Box::new(Function::new(
14285 "PARSE_TIMESTAMP".to_string(),
14286 vec![fmt_expr.clone(), val],
14287 ))))
14288 }
14289 }
14290 _ => Ok(Expression::Function(f)),
14291 }
14292 }
14293 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
14294 "DATE_FORMAT"
14295 if f.args.len() >= 2
14296 && matches!(
14297 source,
14298 DialectType::Presto
14299 | DialectType::Trino
14300 | DialectType::Athena
14301 ) =>
14302 {
14303 let val = f.args[0].clone();
14304 let fmt_expr = &f.args[1];
14305
14306 match target {
14307 DialectType::Presto
14308 | DialectType::Trino
14309 | DialectType::Athena => {
14310 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
14311 if let Expression::Literal(lit) = fmt_expr {
14312 if let crate::expressions::Literal::String(s) =
14313 lit.as_ref()
14314 {
14315 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
14316 Ok(Expression::Function(Box::new(Function::new(
14317 "DATE_FORMAT".to_string(),
14318 vec![val, Expression::string(&normalized)],
14319 ))))
14320 } else {
14321 Ok(Expression::Function(f))
14322 }
14323 } else {
14324 Ok(Expression::Function(f))
14325 }
14326 }
14327 DialectType::Hive
14328 | DialectType::Spark
14329 | DialectType::Databricks => {
14330 // Convert Presto C-style to Java-style format
14331 if let Expression::Literal(lit) = fmt_expr {
14332 if let crate::expressions::Literal::String(s) =
14333 lit.as_ref()
14334 {
14335 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
14336 Ok(Expression::Function(Box::new(Function::new(
14337 "DATE_FORMAT".to_string(),
14338 vec![val, Expression::string(&java_fmt)],
14339 ))))
14340 } else {
14341 Ok(Expression::Function(f))
14342 }
14343 } else {
14344 Ok(Expression::Function(f))
14345 }
14346 }
14347 DialectType::DuckDB => {
14348 // Convert to STRFTIME(val, duckdb_fmt)
14349 if let Expression::Literal(lit) = fmt_expr {
14350 if let crate::expressions::Literal::String(s) =
14351 lit.as_ref()
14352 {
14353 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
14354 Ok(Expression::Function(Box::new(Function::new(
14355 "STRFTIME".to_string(),
14356 vec![val, Expression::string(&duckdb_fmt)],
14357 ))))
14358 } else {
14359 Ok(Expression::Function(Box::new(Function::new(
14360 "STRFTIME".to_string(),
14361 vec![val, fmt_expr.clone()],
14362 ))))
14363 }
14364 } else {
14365 Ok(Expression::Function(Box::new(Function::new(
14366 "STRFTIME".to_string(),
14367 vec![val, fmt_expr.clone()],
14368 ))))
14369 }
14370 }
14371 DialectType::BigQuery => {
14372 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
14373 if let Expression::Literal(lit) = fmt_expr {
14374 if let crate::expressions::Literal::String(s) =
14375 lit.as_ref()
14376 {
14377 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
14378 Ok(Expression::Function(Box::new(Function::new(
14379 "FORMAT_DATE".to_string(),
14380 vec![Expression::string(&bq_fmt), val],
14381 ))))
14382 } else {
14383 Ok(Expression::Function(Box::new(Function::new(
14384 "FORMAT_DATE".to_string(),
14385 vec![fmt_expr.clone(), val],
14386 ))))
14387 }
14388 } else {
14389 Ok(Expression::Function(Box::new(Function::new(
14390 "FORMAT_DATE".to_string(),
14391 vec![fmt_expr.clone(), val],
14392 ))))
14393 }
14394 }
14395 _ => Ok(Expression::Function(f)),
14396 }
14397 }
14398 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
14399 "DATE_PARSE"
14400 if f.args.len() >= 2
14401 && matches!(
14402 source,
14403 DialectType::Presto
14404 | DialectType::Trino
14405 | DialectType::Athena
14406 ) =>
14407 {
14408 let val = f.args[0].clone();
14409 let fmt_expr = &f.args[1];
14410
14411 match target {
14412 DialectType::Presto
14413 | DialectType::Trino
14414 | DialectType::Athena => {
14415 // Presto -> Presto: normalize format
14416 if let Expression::Literal(lit) = fmt_expr {
14417 if let crate::expressions::Literal::String(s) =
14418 lit.as_ref()
14419 {
14420 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
14421 Ok(Expression::Function(Box::new(Function::new(
14422 "DATE_PARSE".to_string(),
14423 vec![val, Expression::string(&normalized)],
14424 ))))
14425 } else {
14426 Ok(Expression::Function(f))
14427 }
14428 } else {
14429 Ok(Expression::Function(f))
14430 }
14431 }
14432 DialectType::Hive => {
14433 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
14434 if let Expression::Literal(lit) = fmt_expr {
14435 if let crate::expressions::Literal::String(s) =
14436 lit.as_ref()
14437 {
14438 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
14439 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
14440 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
14441 this: val,
14442 to: DataType::Timestamp { timezone: false, precision: None },
14443 trailing_comments: Vec::new(),
14444 double_colon_syntax: false,
14445 format: None,
14446 default: None,
14447 inferred_type: None,
14448 })))
14449 } else {
14450 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
14451 Ok(Expression::Function(Box::new(Function::new(
14452 "TO_TIMESTAMP".to_string(),
14453 vec![val, Expression::string(&java_fmt)],
14454 ))))
14455 }
14456 } else {
14457 Ok(Expression::Function(f))
14458 }
14459 } else {
14460 Ok(Expression::Function(f))
14461 }
14462 }
14463 DialectType::Spark | DialectType::Databricks => {
14464 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
14465 if let Expression::Literal(lit) = fmt_expr {
14466 if let crate::expressions::Literal::String(s) =
14467 lit.as_ref()
14468 {
14469 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
14470 Ok(Expression::Function(Box::new(Function::new(
14471 "TO_TIMESTAMP".to_string(),
14472 vec![val, Expression::string(&java_fmt)],
14473 ))))
14474 } else {
14475 Ok(Expression::Function(f))
14476 }
14477 } else {
14478 Ok(Expression::Function(f))
14479 }
14480 }
14481 DialectType::DuckDB => {
14482 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
14483 if let Expression::Literal(lit) = fmt_expr {
14484 if let crate::expressions::Literal::String(s) =
14485 lit.as_ref()
14486 {
14487 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
14488 Ok(Expression::Function(Box::new(Function::new(
14489 "STRPTIME".to_string(),
14490 vec![val, Expression::string(&duckdb_fmt)],
14491 ))))
14492 } else {
14493 Ok(Expression::Function(Box::new(Function::new(
14494 "STRPTIME".to_string(),
14495 vec![val, fmt_expr.clone()],
14496 ))))
14497 }
14498 } else {
14499 Ok(Expression::Function(Box::new(Function::new(
14500 "STRPTIME".to_string(),
14501 vec![val, fmt_expr.clone()],
14502 ))))
14503 }
14504 }
14505 _ => Ok(Expression::Function(f)),
14506 }
14507 }
14508 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
14509 "FROM_BASE64"
14510 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
14511 {
14512 Ok(Expression::Function(Box::new(Function::new(
14513 "UNBASE64".to_string(),
14514 f.args,
14515 ))))
14516 }
14517 "TO_BASE64"
14518 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
14519 {
14520 Ok(Expression::Function(Box::new(Function::new(
14521 "BASE64".to_string(),
14522 f.args,
14523 ))))
14524 }
14525 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
14526 "FROM_UNIXTIME"
14527 if f.args.len() == 1
14528 && matches!(
14529 source,
14530 DialectType::Presto
14531 | DialectType::Trino
14532 | DialectType::Athena
14533 )
14534 && matches!(
14535 target,
14536 DialectType::Spark | DialectType::Databricks
14537 ) =>
14538 {
14539 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
14540 let from_unix = Expression::Function(Box::new(Function::new(
14541 "FROM_UNIXTIME".to_string(),
14542 f.args,
14543 )));
14544 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
14545 this: from_unix,
14546 to: DataType::Timestamp {
14547 timezone: false,
14548 precision: None,
14549 },
14550 trailing_comments: Vec::new(),
14551 double_colon_syntax: false,
14552 format: None,
14553 default: None,
14554 inferred_type: None,
14555 })))
14556 }
14557 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
14558 "DATE_FORMAT"
14559 if f.args.len() >= 2
14560 && !matches!(
14561 target,
14562 DialectType::Hive
14563 | DialectType::Spark
14564 | DialectType::Databricks
14565 | DialectType::MySQL
14566 | DialectType::SingleStore
14567 ) =>
14568 {
14569 let val = f.args[0].clone();
14570 let fmt_expr = &f.args[1];
14571 let is_hive_source = matches!(
14572 source,
14573 DialectType::Hive
14574 | DialectType::Spark
14575 | DialectType::Databricks
14576 );
14577
14578 fn java_to_c_format(fmt: &str) -> String {
14579 // Replace Java patterns with C strftime patterns.
14580 // Uses multi-pass to handle patterns that conflict.
14581 // First pass: replace multi-char patterns (longer first)
14582 let result = fmt
14583 .replace("yyyy", "%Y")
14584 .replace("SSSSSS", "%f")
14585 .replace("EEEE", "%W")
14586 .replace("MM", "%m")
14587 .replace("dd", "%d")
14588 .replace("HH", "%H")
14589 .replace("mm", "%M")
14590 .replace("ss", "%S")
14591 .replace("yy", "%y");
14592 // Second pass: handle single-char timezone patterns
14593 // z -> %Z (timezone name), Z -> %z (timezone offset)
14594 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
14595 let mut out = String::new();
14596 let chars: Vec<char> = result.chars().collect();
14597 let mut i = 0;
14598 while i < chars.len() {
14599 if chars[i] == '%' && i + 1 < chars.len() {
14600 // Already a format specifier, skip both chars
14601 out.push(chars[i]);
14602 out.push(chars[i + 1]);
14603 i += 2;
14604 } else if chars[i] == 'z' {
14605 out.push_str("%Z");
14606 i += 1;
14607 } else if chars[i] == 'Z' {
14608 out.push_str("%z");
14609 i += 1;
14610 } else {
14611 out.push(chars[i]);
14612 i += 1;
14613 }
14614 }
14615 out
14616 }
14617
14618 fn java_to_presto_format(fmt: &str) -> String {
14619 // Presto uses %T for HH:MM:SS
14620 let c_fmt = java_to_c_format(fmt);
14621 c_fmt.replace("%H:%M:%S", "%T")
14622 }
14623
14624 fn java_to_bq_format(fmt: &str) -> String {
14625 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
14626 let c_fmt = java_to_c_format(fmt);
14627 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
14628 }
14629
14630 // For Hive source, CAST string literals to appropriate type
14631 let cast_val = if is_hive_source {
14632 match &val {
14633 Expression::Literal(lit)
14634 if matches!(
14635 lit.as_ref(),
14636 crate::expressions::Literal::String(_)
14637 ) =>
14638 {
14639 match target {
14640 DialectType::DuckDB
14641 | DialectType::Presto
14642 | DialectType::Trino
14643 | DialectType::Athena => {
14644 Self::ensure_cast_timestamp(val.clone())
14645 }
14646 DialectType::BigQuery => {
14647 // BigQuery: CAST(val AS DATETIME)
14648 Expression::Cast(Box::new(
14649 crate::expressions::Cast {
14650 this: val.clone(),
14651 to: DataType::Custom {
14652 name: "DATETIME".to_string(),
14653 },
14654 trailing_comments: vec![],
14655 double_colon_syntax: false,
14656 format: None,
14657 default: None,
14658 inferred_type: None,
14659 },
14660 ))
14661 }
14662 _ => val.clone(),
14663 }
14664 }
14665 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
14666 Expression::Cast(c)
14667 if matches!(c.to, DataType::Date)
14668 && matches!(
14669 target,
14670 DialectType::Presto
14671 | DialectType::Trino
14672 | DialectType::Athena
14673 ) =>
14674 {
14675 Expression::Cast(Box::new(crate::expressions::Cast {
14676 this: val.clone(),
14677 to: DataType::Timestamp {
14678 timezone: false,
14679 precision: None,
14680 },
14681 trailing_comments: vec![],
14682 double_colon_syntax: false,
14683 format: None,
14684 default: None,
14685 inferred_type: None,
14686 }))
14687 }
14688 Expression::Literal(lit)
14689 if matches!(
14690 lit.as_ref(),
14691 crate::expressions::Literal::Date(_)
14692 ) && matches!(
14693 target,
14694 DialectType::Presto
14695 | DialectType::Trino
14696 | DialectType::Athena
14697 ) =>
14698 {
14699 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
14700 let cast_date = Self::date_literal_to_cast(val.clone());
14701 Expression::Cast(Box::new(crate::expressions::Cast {
14702 this: cast_date,
14703 to: DataType::Timestamp {
14704 timezone: false,
14705 precision: None,
14706 },
14707 trailing_comments: vec![],
14708 double_colon_syntax: false,
14709 format: None,
14710 default: None,
14711 inferred_type: None,
14712 }))
14713 }
14714 _ => val.clone(),
14715 }
14716 } else {
14717 val.clone()
14718 };
14719
14720 match target {
14721 DialectType::DuckDB => {
14722 if let Expression::Literal(lit) = fmt_expr {
14723 if let crate::expressions::Literal::String(s) =
14724 lit.as_ref()
14725 {
14726 let c_fmt = if is_hive_source {
14727 java_to_c_format(s)
14728 } else {
14729 s.clone()
14730 };
14731 Ok(Expression::Function(Box::new(Function::new(
14732 "STRFTIME".to_string(),
14733 vec![cast_val, Expression::string(&c_fmt)],
14734 ))))
14735 } else {
14736 Ok(Expression::Function(Box::new(Function::new(
14737 "STRFTIME".to_string(),
14738 vec![cast_val, fmt_expr.clone()],
14739 ))))
14740 }
14741 } else {
14742 Ok(Expression::Function(Box::new(Function::new(
14743 "STRFTIME".to_string(),
14744 vec![cast_val, fmt_expr.clone()],
14745 ))))
14746 }
14747 }
14748 DialectType::Presto
14749 | DialectType::Trino
14750 | DialectType::Athena => {
14751 if is_hive_source {
14752 if let Expression::Literal(lit) = fmt_expr {
14753 if let crate::expressions::Literal::String(s) =
14754 lit.as_ref()
14755 {
14756 let p_fmt = java_to_presto_format(s);
14757 Ok(Expression::Function(Box::new(
14758 Function::new(
14759 "DATE_FORMAT".to_string(),
14760 vec![
14761 cast_val,
14762 Expression::string(&p_fmt),
14763 ],
14764 ),
14765 )))
14766 } else {
14767 Ok(Expression::Function(Box::new(
14768 Function::new(
14769 "DATE_FORMAT".to_string(),
14770 vec![cast_val, fmt_expr.clone()],
14771 ),
14772 )))
14773 }
14774 } else {
14775 Ok(Expression::Function(Box::new(Function::new(
14776 "DATE_FORMAT".to_string(),
14777 vec![cast_val, fmt_expr.clone()],
14778 ))))
14779 }
14780 } else {
14781 Ok(Expression::Function(Box::new(Function::new(
14782 "DATE_FORMAT".to_string(),
14783 f.args,
14784 ))))
14785 }
14786 }
14787 DialectType::BigQuery => {
14788 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
14789 if let Expression::Literal(lit) = fmt_expr {
14790 if let crate::expressions::Literal::String(s) =
14791 lit.as_ref()
14792 {
14793 let bq_fmt = if is_hive_source {
14794 java_to_bq_format(s)
14795 } else {
14796 java_to_c_format(s)
14797 };
14798 Ok(Expression::Function(Box::new(Function::new(
14799 "FORMAT_DATE".to_string(),
14800 vec![Expression::string(&bq_fmt), cast_val],
14801 ))))
14802 } else {
14803 Ok(Expression::Function(Box::new(Function::new(
14804 "FORMAT_DATE".to_string(),
14805 vec![fmt_expr.clone(), cast_val],
14806 ))))
14807 }
14808 } else {
14809 Ok(Expression::Function(Box::new(Function::new(
14810 "FORMAT_DATE".to_string(),
14811 vec![fmt_expr.clone(), cast_val],
14812 ))))
14813 }
14814 }
14815 DialectType::PostgreSQL | DialectType::Redshift => {
14816 if let Expression::Literal(lit) = fmt_expr {
14817 if let crate::expressions::Literal::String(s) =
14818 lit.as_ref()
14819 {
14820 let pg_fmt = s
14821 .replace("yyyy", "YYYY")
14822 .replace("MM", "MM")
14823 .replace("dd", "DD")
14824 .replace("HH", "HH24")
14825 .replace("mm", "MI")
14826 .replace("ss", "SS")
14827 .replace("yy", "YY");
14828 Ok(Expression::Function(Box::new(Function::new(
14829 "TO_CHAR".to_string(),
14830 vec![val, Expression::string(&pg_fmt)],
14831 ))))
14832 } else {
14833 Ok(Expression::Function(Box::new(Function::new(
14834 "TO_CHAR".to_string(),
14835 vec![val, fmt_expr.clone()],
14836 ))))
14837 }
14838 } else {
14839 Ok(Expression::Function(Box::new(Function::new(
14840 "TO_CHAR".to_string(),
14841 vec![val, fmt_expr.clone()],
14842 ))))
14843 }
14844 }
14845 _ => Ok(Expression::Function(f)),
14846 }
14847 }
14848 // DATEDIFF(unit, start, end) - 3-arg form
14849 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
14850 "DATEDIFF" if f.args.len() == 3 => {
14851 let mut args = f.args;
14852 // SQLite source: args = (date1, date2, unit_string)
14853 // Standard source: args = (unit, start, end)
14854 let (_arg0, arg1, arg2, unit_str) =
14855 if matches!(source, DialectType::SQLite) {
14856 let date1 = args.remove(0);
14857 let date2 = args.remove(0);
14858 let unit_expr = args.remove(0);
14859 let unit_s = Self::get_unit_str_static(&unit_expr);
14860
14861 // For SQLite target, generate JULIANDAY arithmetic directly
14862 if matches!(target, DialectType::SQLite) {
14863 let jd_first = Expression::Function(Box::new(
14864 Function::new("JULIANDAY".to_string(), vec![date1]),
14865 ));
14866 let jd_second = Expression::Function(Box::new(
14867 Function::new("JULIANDAY".to_string(), vec![date2]),
14868 ));
14869 let diff = Expression::Sub(Box::new(
14870 crate::expressions::BinaryOp::new(
14871 jd_first, jd_second,
14872 ),
14873 ));
14874 let paren_diff = Expression::Paren(Box::new(
14875 crate::expressions::Paren {
14876 this: diff,
14877 trailing_comments: Vec::new(),
14878 },
14879 ));
14880 let adjusted = match unit_s.as_str() {
14881 "HOUR" => Expression::Mul(Box::new(
14882 crate::expressions::BinaryOp::new(
14883 paren_diff,
14884 Expression::Literal(Box::new(
14885 Literal::Number("24.0".to_string()),
14886 )),
14887 ),
14888 )),
14889 "MINUTE" => Expression::Mul(Box::new(
14890 crate::expressions::BinaryOp::new(
14891 paren_diff,
14892 Expression::Literal(Box::new(
14893 Literal::Number("1440.0".to_string()),
14894 )),
14895 ),
14896 )),
14897 "SECOND" => Expression::Mul(Box::new(
14898 crate::expressions::BinaryOp::new(
14899 paren_diff,
14900 Expression::Literal(Box::new(
14901 Literal::Number("86400.0".to_string()),
14902 )),
14903 ),
14904 )),
14905 "MONTH" => Expression::Div(Box::new(
14906 crate::expressions::BinaryOp::new(
14907 paren_diff,
14908 Expression::Literal(Box::new(
14909 Literal::Number("30.0".to_string()),
14910 )),
14911 ),
14912 )),
14913 "YEAR" => Expression::Div(Box::new(
14914 crate::expressions::BinaryOp::new(
14915 paren_diff,
14916 Expression::Literal(Box::new(
14917 Literal::Number("365.0".to_string()),
14918 )),
14919 ),
14920 )),
14921 _ => paren_diff,
14922 };
14923 return Ok(Expression::Cast(Box::new(Cast {
14924 this: adjusted,
14925 to: DataType::Int {
14926 length: None,
14927 integer_spelling: true,
14928 },
14929 trailing_comments: vec![],
14930 double_colon_syntax: false,
14931 format: None,
14932 default: None,
14933 inferred_type: None,
14934 })));
14935 }
14936
14937 // For other targets, remap to standard (unit, start, end) form
14938 let unit_ident =
14939 Expression::Identifier(Identifier::new(&unit_s));
14940 (unit_ident, date1, date2, unit_s)
14941 } else {
14942 let arg0 = args.remove(0);
14943 let arg1 = args.remove(0);
14944 let arg2 = args.remove(0);
14945 let unit_s = Self::get_unit_str_static(&arg0);
14946 (arg0, arg1, arg2, unit_s)
14947 };
14948
14949 // For Hive/Spark source, string literal dates need to be cast
14950 // Note: Databricks is excluded - it handles string args like standard SQL
14951 let is_hive_spark =
14952 matches!(source, DialectType::Hive | DialectType::Spark);
14953
14954 match target {
14955 DialectType::Snowflake => {
14956 let unit =
14957 Expression::Identifier(Identifier::new(&unit_str));
14958 // Use ensure_to_date_preserved to add TO_DATE with a marker
14959 // that prevents the Snowflake TO_DATE handler from converting it to CAST
14960 let d1 = if is_hive_spark {
14961 Self::ensure_to_date_preserved(arg1)
14962 } else {
14963 arg1
14964 };
14965 let d2 = if is_hive_spark {
14966 Self::ensure_to_date_preserved(arg2)
14967 } else {
14968 arg2
14969 };
14970 Ok(Expression::Function(Box::new(Function::new(
14971 "DATEDIFF".to_string(),
14972 vec![unit, d1, d2],
14973 ))))
14974 }
14975 DialectType::Redshift => {
14976 let unit =
14977 Expression::Identifier(Identifier::new(&unit_str));
14978 let d1 = if is_hive_spark {
14979 Self::ensure_cast_date(arg1)
14980 } else {
14981 arg1
14982 };
14983 let d2 = if is_hive_spark {
14984 Self::ensure_cast_date(arg2)
14985 } else {
14986 arg2
14987 };
14988 Ok(Expression::Function(Box::new(Function::new(
14989 "DATEDIFF".to_string(),
14990 vec![unit, d1, d2],
14991 ))))
14992 }
14993 DialectType::TSQL => {
14994 let unit =
14995 Expression::Identifier(Identifier::new(&unit_str));
14996 Ok(Expression::Function(Box::new(Function::new(
14997 "DATEDIFF".to_string(),
14998 vec![unit, arg1, arg2],
14999 ))))
15000 }
15001 DialectType::DuckDB => {
15002 let is_redshift_tsql = matches!(
15003 source,
15004 DialectType::Redshift | DialectType::TSQL
15005 );
15006 if is_hive_spark {
15007 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
15008 let d1 = Self::ensure_cast_date(arg1);
15009 let d2 = Self::ensure_cast_date(arg2);
15010 Ok(Expression::Function(Box::new(Function::new(
15011 "DATE_DIFF".to_string(),
15012 vec![Expression::string(&unit_str), d1, d2],
15013 ))))
15014 } else if matches!(source, DialectType::Snowflake) {
15015 // For Snowflake source: special handling per unit
15016 match unit_str.as_str() {
15017 "NANOSECOND" => {
15018 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
15019 fn cast_to_timestamp_ns(
15020 expr: Expression,
15021 ) -> Expression
15022 {
15023 Expression::Cast(Box::new(Cast {
15024 this: expr,
15025 to: DataType::Custom {
15026 name: "TIMESTAMP_NS".to_string(),
15027 },
15028 trailing_comments: vec![],
15029 double_colon_syntax: false,
15030 format: None,
15031 default: None,
15032 inferred_type: None,
15033 }))
15034 }
15035 let epoch_end = Expression::Function(Box::new(
15036 Function::new(
15037 "EPOCH_NS".to_string(),
15038 vec![cast_to_timestamp_ns(arg2)],
15039 ),
15040 ));
15041 let epoch_start = Expression::Function(
15042 Box::new(Function::new(
15043 "EPOCH_NS".to_string(),
15044 vec![cast_to_timestamp_ns(arg1)],
15045 )),
15046 );
15047 Ok(Expression::Sub(Box::new(BinaryOp::new(
15048 epoch_end,
15049 epoch_start,
15050 ))))
15051 }
15052 "WEEK" => {
15053 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
15054 let d1 = Self::force_cast_date(arg1);
15055 let d2 = Self::force_cast_date(arg2);
15056 let dt1 = Expression::Function(Box::new(
15057 Function::new(
15058 "DATE_TRUNC".to_string(),
15059 vec![Expression::string("WEEK"), d1],
15060 ),
15061 ));
15062 let dt2 = Expression::Function(Box::new(
15063 Function::new(
15064 "DATE_TRUNC".to_string(),
15065 vec![Expression::string("WEEK"), d2],
15066 ),
15067 ));
15068 Ok(Expression::Function(Box::new(
15069 Function::new(
15070 "DATE_DIFF".to_string(),
15071 vec![
15072 Expression::string(&unit_str),
15073 dt1,
15074 dt2,
15075 ],
15076 ),
15077 )))
15078 }
15079 _ => {
15080 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
15081 let d1 = Self::force_cast_date(arg1);
15082 let d2 = Self::force_cast_date(arg2);
15083 Ok(Expression::Function(Box::new(
15084 Function::new(
15085 "DATE_DIFF".to_string(),
15086 vec![
15087 Expression::string(&unit_str),
15088 d1,
15089 d2,
15090 ],
15091 ),
15092 )))
15093 }
15094 }
15095 } else if is_redshift_tsql {
15096 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
15097 let d1 = Self::force_cast_timestamp(arg1);
15098 let d2 = Self::force_cast_timestamp(arg2);
15099 Ok(Expression::Function(Box::new(Function::new(
15100 "DATE_DIFF".to_string(),
15101 vec![Expression::string(&unit_str), d1, d2],
15102 ))))
15103 } else {
15104 // Keep as DATEDIFF so DuckDB's transform_datediff handles
15105 // DATE_TRUNC for WEEK, CAST for string literals, etc.
15106 let unit =
15107 Expression::Identifier(Identifier::new(&unit_str));
15108 Ok(Expression::Function(Box::new(Function::new(
15109 "DATEDIFF".to_string(),
15110 vec![unit, arg1, arg2],
15111 ))))
15112 }
15113 }
15114 DialectType::BigQuery => {
15115 let is_redshift_tsql = matches!(
15116 source,
15117 DialectType::Redshift
15118 | DialectType::TSQL
15119 | DialectType::Snowflake
15120 );
15121 let cast_d1 = if is_hive_spark {
15122 Self::ensure_cast_date(arg1)
15123 } else if is_redshift_tsql {
15124 Self::force_cast_datetime(arg1)
15125 } else {
15126 Self::ensure_cast_datetime(arg1)
15127 };
15128 let cast_d2 = if is_hive_spark {
15129 Self::ensure_cast_date(arg2)
15130 } else if is_redshift_tsql {
15131 Self::force_cast_datetime(arg2)
15132 } else {
15133 Self::ensure_cast_datetime(arg2)
15134 };
15135 let unit =
15136 Expression::Identifier(Identifier::new(&unit_str));
15137 Ok(Expression::Function(Box::new(Function::new(
15138 "DATE_DIFF".to_string(),
15139 vec![cast_d2, cast_d1, unit],
15140 ))))
15141 }
15142 DialectType::Presto
15143 | DialectType::Trino
15144 | DialectType::Athena => {
15145 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
15146 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
15147 let is_redshift_tsql = matches!(
15148 source,
15149 DialectType::Redshift
15150 | DialectType::TSQL
15151 | DialectType::Snowflake
15152 );
15153 let d1 = if is_hive_spark {
15154 Self::double_cast_timestamp_date(arg1)
15155 } else if is_redshift_tsql {
15156 Self::force_cast_timestamp(arg1)
15157 } else {
15158 arg1
15159 };
15160 let d2 = if is_hive_spark {
15161 Self::double_cast_timestamp_date(arg2)
15162 } else if is_redshift_tsql {
15163 Self::force_cast_timestamp(arg2)
15164 } else {
15165 arg2
15166 };
15167 Ok(Expression::Function(Box::new(Function::new(
15168 "DATE_DIFF".to_string(),
15169 vec![Expression::string(&unit_str), d1, d2],
15170 ))))
15171 }
15172 DialectType::Hive => match unit_str.as_str() {
15173 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
15174 this: Expression::Function(Box::new(Function::new(
15175 "MONTHS_BETWEEN".to_string(),
15176 vec![arg2, arg1],
15177 ))),
15178 to: DataType::Int {
15179 length: None,
15180 integer_spelling: false,
15181 },
15182 trailing_comments: vec![],
15183 double_colon_syntax: false,
15184 format: None,
15185 default: None,
15186 inferred_type: None,
15187 }))),
15188 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
15189 this: Expression::Div(Box::new(
15190 crate::expressions::BinaryOp::new(
15191 Expression::Function(Box::new(Function::new(
15192 "DATEDIFF".to_string(),
15193 vec![arg2, arg1],
15194 ))),
15195 Expression::number(7),
15196 ),
15197 )),
15198 to: DataType::Int {
15199 length: None,
15200 integer_spelling: false,
15201 },
15202 trailing_comments: vec![],
15203 double_colon_syntax: false,
15204 format: None,
15205 default: None,
15206 inferred_type: None,
15207 }))),
15208 _ => Ok(Expression::Function(Box::new(Function::new(
15209 "DATEDIFF".to_string(),
15210 vec![arg2, arg1],
15211 )))),
15212 },
15213 DialectType::Spark | DialectType::Databricks => {
15214 let unit =
15215 Expression::Identifier(Identifier::new(&unit_str));
15216 Ok(Expression::Function(Box::new(Function::new(
15217 "DATEDIFF".to_string(),
15218 vec![unit, arg1, arg2],
15219 ))))
15220 }
15221 _ => {
15222 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
15223 let d1 = if is_hive_spark {
15224 Self::ensure_cast_date(arg1)
15225 } else {
15226 arg1
15227 };
15228 let d2 = if is_hive_spark {
15229 Self::ensure_cast_date(arg2)
15230 } else {
15231 arg2
15232 };
15233 let unit =
15234 Expression::Identifier(Identifier::new(&unit_str));
15235 Ok(Expression::Function(Box::new(Function::new(
15236 "DATEDIFF".to_string(),
15237 vec![unit, d1, d2],
15238 ))))
15239 }
15240 }
15241 }
15242 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
15243 "DATEDIFF" if f.args.len() == 2 => {
15244 let mut args = f.args;
15245 let arg0 = args.remove(0);
15246 let arg1 = args.remove(0);
15247
15248 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
15249 // Also recognizes TryCast/Cast to DATE that may have been produced by
15250 // cross-dialect TO_DATE -> TRY_CAST conversion
15251 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
15252 if let Expression::Function(ref f) = e {
15253 if f.name.eq_ignore_ascii_case("TO_DATE")
15254 && f.args.len() == 1
15255 {
15256 return (f.args[0].clone(), true);
15257 }
15258 }
15259 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
15260 if let Expression::TryCast(ref c) = e {
15261 if matches!(c.to, DataType::Date) {
15262 return (e, true); // Already properly cast, return as-is
15263 }
15264 }
15265 (e, false)
15266 };
15267
15268 match target {
15269 DialectType::DuckDB => {
15270 // For Hive source, always CAST to DATE
15271 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
15272 let cast_d0 = if matches!(
15273 source,
15274 DialectType::Hive
15275 | DialectType::Spark
15276 | DialectType::Databricks
15277 ) {
15278 let (inner, was_to_date) = unwrap_to_date(arg1);
15279 if was_to_date {
15280 // Already a date expression, use directly
15281 if matches!(&inner, Expression::TryCast(_)) {
15282 inner // Already TRY_CAST(x AS DATE)
15283 } else {
15284 Self::try_cast_date(inner)
15285 }
15286 } else {
15287 Self::force_cast_date(inner)
15288 }
15289 } else {
15290 Self::ensure_cast_date(arg1)
15291 };
15292 let cast_d1 = if matches!(
15293 source,
15294 DialectType::Hive
15295 | DialectType::Spark
15296 | DialectType::Databricks
15297 ) {
15298 let (inner, was_to_date) = unwrap_to_date(arg0);
15299 if was_to_date {
15300 if matches!(&inner, Expression::TryCast(_)) {
15301 inner
15302 } else {
15303 Self::try_cast_date(inner)
15304 }
15305 } else {
15306 Self::force_cast_date(inner)
15307 }
15308 } else {
15309 Self::ensure_cast_date(arg0)
15310 };
15311 Ok(Expression::Function(Box::new(Function::new(
15312 "DATE_DIFF".to_string(),
15313 vec![Expression::string("DAY"), cast_d0, cast_d1],
15314 ))))
15315 }
15316 DialectType::Presto
15317 | DialectType::Trino
15318 | DialectType::Athena => {
15319 // For Hive/Spark source, apply double_cast_timestamp_date
15320 // For other sources (MySQL etc.), just swap args without casting
15321 if matches!(
15322 source,
15323 DialectType::Hive
15324 | DialectType::Spark
15325 | DialectType::Databricks
15326 ) {
15327 let cast_fn = |e: Expression| -> Expression {
15328 let (inner, was_to_date) = unwrap_to_date(e);
15329 if was_to_date {
15330 let first_cast =
15331 Self::double_cast_timestamp_date(inner);
15332 Self::double_cast_timestamp_date(first_cast)
15333 } else {
15334 Self::double_cast_timestamp_date(inner)
15335 }
15336 };
15337 Ok(Expression::Function(Box::new(Function::new(
15338 "DATE_DIFF".to_string(),
15339 vec![
15340 Expression::string("DAY"),
15341 cast_fn(arg1),
15342 cast_fn(arg0),
15343 ],
15344 ))))
15345 } else {
15346 Ok(Expression::Function(Box::new(Function::new(
15347 "DATE_DIFF".to_string(),
15348 vec![Expression::string("DAY"), arg1, arg0],
15349 ))))
15350 }
15351 }
15352 DialectType::Redshift => {
15353 let unit = Expression::Identifier(Identifier::new("DAY"));
15354 Ok(Expression::Function(Box::new(Function::new(
15355 "DATEDIFF".to_string(),
15356 vec![unit, arg1, arg0],
15357 ))))
15358 }
15359 _ => Ok(Expression::Function(Box::new(Function::new(
15360 "DATEDIFF".to_string(),
15361 vec![arg0, arg1],
15362 )))),
15363 }
15364 }
15365 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
15366 "DATE_DIFF" if f.args.len() == 3 => {
15367 let mut args = f.args;
15368 let arg0 = args.remove(0);
15369 let arg1 = args.remove(0);
15370 let arg2 = args.remove(0);
15371 let unit_str = Self::get_unit_str_static(&arg0);
15372
15373 match target {
15374 DialectType::DuckDB => {
15375 // DuckDB: DATE_DIFF('UNIT', start, end)
15376 Ok(Expression::Function(Box::new(Function::new(
15377 "DATE_DIFF".to_string(),
15378 vec![Expression::string(&unit_str), arg1, arg2],
15379 ))))
15380 }
15381 DialectType::Presto
15382 | DialectType::Trino
15383 | DialectType::Athena => {
15384 Ok(Expression::Function(Box::new(Function::new(
15385 "DATE_DIFF".to_string(),
15386 vec![Expression::string(&unit_str), arg1, arg2],
15387 ))))
15388 }
15389 DialectType::ClickHouse => {
15390 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
15391 let unit =
15392 Expression::Identifier(Identifier::new(&unit_str));
15393 Ok(Expression::Function(Box::new(Function::new(
15394 "DATE_DIFF".to_string(),
15395 vec![unit, arg1, arg2],
15396 ))))
15397 }
15398 DialectType::Snowflake | DialectType::Redshift => {
15399 let unit =
15400 Expression::Identifier(Identifier::new(&unit_str));
15401 Ok(Expression::Function(Box::new(Function::new(
15402 "DATEDIFF".to_string(),
15403 vec![unit, arg1, arg2],
15404 ))))
15405 }
15406 _ => {
15407 let unit =
15408 Expression::Identifier(Identifier::new(&unit_str));
15409 Ok(Expression::Function(Box::new(Function::new(
15410 "DATEDIFF".to_string(),
15411 vec![unit, arg1, arg2],
15412 ))))
15413 }
15414 }
15415 }
15416 // DATEADD(unit, val, date) - 3-arg form
15417 "DATEADD" if f.args.len() == 3 => {
15418 let mut args = f.args;
15419 let arg0 = args.remove(0);
15420 let arg1 = args.remove(0);
15421 let arg2 = args.remove(0);
15422 let unit_str = Self::get_unit_str_static(&arg0);
15423
15424 // Normalize TSQL unit abbreviations to standard names
15425 let unit_str = match unit_str.as_str() {
15426 "YY" | "YYYY" => "YEAR".to_string(),
15427 "QQ" | "Q" => "QUARTER".to_string(),
15428 "MM" | "M" => "MONTH".to_string(),
15429 "WK" | "WW" => "WEEK".to_string(),
15430 "DD" | "D" | "DY" => "DAY".to_string(),
15431 "HH" => "HOUR".to_string(),
15432 "MI" | "N" => "MINUTE".to_string(),
15433 "SS" | "S" => "SECOND".to_string(),
15434 "MS" => "MILLISECOND".to_string(),
15435 "MCS" | "US" => "MICROSECOND".to_string(),
15436 _ => unit_str,
15437 };
15438 match target {
15439 DialectType::Snowflake => {
15440 let unit =
15441 Expression::Identifier(Identifier::new(&unit_str));
15442 // Cast string literal to TIMESTAMP, but not for Snowflake source
15443 // (Snowflake natively accepts string literals in DATEADD)
15444 let arg2 = if matches!(
15445 &arg2,
15446 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15447 ) && !matches!(source, DialectType::Snowflake)
15448 {
15449 Expression::Cast(Box::new(Cast {
15450 this: arg2,
15451 to: DataType::Timestamp {
15452 precision: None,
15453 timezone: false,
15454 },
15455 trailing_comments: Vec::new(),
15456 double_colon_syntax: false,
15457 format: None,
15458 default: None,
15459 inferred_type: None,
15460 }))
15461 } else {
15462 arg2
15463 };
15464 Ok(Expression::Function(Box::new(Function::new(
15465 "DATEADD".to_string(),
15466 vec![unit, arg1, arg2],
15467 ))))
15468 }
15469 DialectType::TSQL => {
15470 let unit =
15471 Expression::Identifier(Identifier::new(&unit_str));
15472 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
15473 let arg2 = if matches!(
15474 &arg2,
15475 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15476 ) && !matches!(
15477 source,
15478 DialectType::Spark
15479 | DialectType::Databricks
15480 | DialectType::Hive
15481 ) {
15482 Expression::Cast(Box::new(Cast {
15483 this: arg2,
15484 to: DataType::Custom {
15485 name: "DATETIME2".to_string(),
15486 },
15487 trailing_comments: Vec::new(),
15488 double_colon_syntax: false,
15489 format: None,
15490 default: None,
15491 inferred_type: None,
15492 }))
15493 } else {
15494 arg2
15495 };
15496 Ok(Expression::Function(Box::new(Function::new(
15497 "DATEADD".to_string(),
15498 vec![unit, arg1, arg2],
15499 ))))
15500 }
15501 DialectType::Redshift => {
15502 let unit =
15503 Expression::Identifier(Identifier::new(&unit_str));
15504 Ok(Expression::Function(Box::new(Function::new(
15505 "DATEADD".to_string(),
15506 vec![unit, arg1, arg2],
15507 ))))
15508 }
15509 DialectType::Databricks => {
15510 let unit =
15511 Expression::Identifier(Identifier::new(&unit_str));
15512 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
15513 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
15514 let func_name = if matches!(
15515 source,
15516 DialectType::TSQL
15517 | DialectType::Fabric
15518 | DialectType::Databricks
15519 | DialectType::Snowflake
15520 ) {
15521 "DATEADD"
15522 } else {
15523 "DATE_ADD"
15524 };
15525 Ok(Expression::Function(Box::new(Function::new(
15526 func_name.to_string(),
15527 vec![unit, arg1, arg2],
15528 ))))
15529 }
15530 DialectType::DuckDB => {
15531 // Special handling for NANOSECOND from Snowflake
15532 if unit_str == "NANOSECOND"
15533 && matches!(source, DialectType::Snowflake)
15534 {
15535 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
15536 let cast_ts = Expression::Cast(Box::new(Cast {
15537 this: arg2,
15538 to: DataType::Custom {
15539 name: "TIMESTAMP_NS".to_string(),
15540 },
15541 trailing_comments: vec![],
15542 double_colon_syntax: false,
15543 format: None,
15544 default: None,
15545 inferred_type: None,
15546 }));
15547 let epoch_ns =
15548 Expression::Function(Box::new(Function::new(
15549 "EPOCH_NS".to_string(),
15550 vec![cast_ts],
15551 )));
15552 let sum = Expression::Add(Box::new(BinaryOp::new(
15553 epoch_ns, arg1,
15554 )));
15555 Ok(Expression::Function(Box::new(Function::new(
15556 "MAKE_TIMESTAMP_NS".to_string(),
15557 vec![sum],
15558 ))))
15559 } else {
15560 // DuckDB: convert to date + INTERVAL syntax with CAST
15561 let iu = Self::parse_interval_unit_static(&unit_str);
15562 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15563 this: Some(arg1),
15564 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
15565 }));
15566 // Cast string literal to TIMESTAMP
15567 let arg2 = if matches!(
15568 &arg2,
15569 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15570 ) {
15571 Expression::Cast(Box::new(Cast {
15572 this: arg2,
15573 to: DataType::Timestamp {
15574 precision: None,
15575 timezone: false,
15576 },
15577 trailing_comments: Vec::new(),
15578 double_colon_syntax: false,
15579 format: None,
15580 default: None,
15581 inferred_type: None,
15582 }))
15583 } else {
15584 arg2
15585 };
15586 Ok(Expression::Add(Box::new(
15587 crate::expressions::BinaryOp::new(arg2, interval),
15588 )))
15589 }
15590 }
15591 DialectType::Spark => {
15592 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
15593 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
15594 if matches!(source, DialectType::TSQL | DialectType::Fabric)
15595 {
15596 fn multiply_expr_spark(
15597 expr: Expression,
15598 factor: i64,
15599 ) -> Expression
15600 {
15601 if let Expression::Literal(lit) = &expr {
15602 if let crate::expressions::Literal::Number(n) =
15603 lit.as_ref()
15604 {
15605 if let Ok(val) = n.parse::<i64>() {
15606 return Expression::Literal(Box::new(
15607 crate::expressions::Literal::Number(
15608 (val * factor).to_string(),
15609 ),
15610 ));
15611 }
15612 }
15613 }
15614 Expression::Mul(Box::new(
15615 crate::expressions::BinaryOp::new(
15616 expr,
15617 Expression::Literal(Box::new(
15618 crate::expressions::Literal::Number(
15619 factor.to_string(),
15620 ),
15621 )),
15622 ),
15623 ))
15624 }
15625 let normalized_unit = match unit_str.as_str() {
15626 "YEAR" | "YY" | "YYYY" => "YEAR",
15627 "QUARTER" | "QQ" | "Q" => "QUARTER",
15628 "MONTH" | "MM" | "M" => "MONTH",
15629 "WEEK" | "WK" | "WW" => "WEEK",
15630 "DAY" | "DD" | "D" | "DY" => "DAY",
15631 _ => &unit_str,
15632 };
15633 match normalized_unit {
15634 "YEAR" => {
15635 let months = multiply_expr_spark(arg1, 12);
15636 Ok(Expression::Function(Box::new(
15637 Function::new(
15638 "ADD_MONTHS".to_string(),
15639 vec![arg2, months],
15640 ),
15641 )))
15642 }
15643 "QUARTER" => {
15644 let months = multiply_expr_spark(arg1, 3);
15645 Ok(Expression::Function(Box::new(
15646 Function::new(
15647 "ADD_MONTHS".to_string(),
15648 vec![arg2, months],
15649 ),
15650 )))
15651 }
15652 "MONTH" => Ok(Expression::Function(Box::new(
15653 Function::new(
15654 "ADD_MONTHS".to_string(),
15655 vec![arg2, arg1],
15656 ),
15657 ))),
15658 "WEEK" => {
15659 let days = multiply_expr_spark(arg1, 7);
15660 Ok(Expression::Function(Box::new(
15661 Function::new(
15662 "DATE_ADD".to_string(),
15663 vec![arg2, days],
15664 ),
15665 )))
15666 }
15667 "DAY" => Ok(Expression::Function(Box::new(
15668 Function::new(
15669 "DATE_ADD".to_string(),
15670 vec![arg2, arg1],
15671 ),
15672 ))),
15673 _ => {
15674 let unit = Expression::Identifier(
15675 Identifier::new(&unit_str),
15676 );
15677 Ok(Expression::Function(Box::new(
15678 Function::new(
15679 "DATE_ADD".to_string(),
15680 vec![unit, arg1, arg2],
15681 ),
15682 )))
15683 }
15684 }
15685 } else {
15686 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
15687 let unit =
15688 Expression::Identifier(Identifier::new(&unit_str));
15689 Ok(Expression::Function(Box::new(Function::new(
15690 "DATE_ADD".to_string(),
15691 vec![unit, arg1, arg2],
15692 ))))
15693 }
15694 }
15695 DialectType::Hive => match unit_str.as_str() {
15696 "MONTH" => {
15697 Ok(Expression::Function(Box::new(Function::new(
15698 "ADD_MONTHS".to_string(),
15699 vec![arg2, arg1],
15700 ))))
15701 }
15702 _ => Ok(Expression::Function(Box::new(Function::new(
15703 "DATE_ADD".to_string(),
15704 vec![arg2, arg1],
15705 )))),
15706 },
15707 DialectType::Presto
15708 | DialectType::Trino
15709 | DialectType::Athena => {
15710 // Cast string literal date to TIMESTAMP
15711 let arg2 = if matches!(
15712 &arg2,
15713 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15714 ) {
15715 Expression::Cast(Box::new(Cast {
15716 this: arg2,
15717 to: DataType::Timestamp {
15718 precision: None,
15719 timezone: false,
15720 },
15721 trailing_comments: Vec::new(),
15722 double_colon_syntax: false,
15723 format: None,
15724 default: None,
15725 inferred_type: None,
15726 }))
15727 } else {
15728 arg2
15729 };
15730 Ok(Expression::Function(Box::new(Function::new(
15731 "DATE_ADD".to_string(),
15732 vec![Expression::string(&unit_str), arg1, arg2],
15733 ))))
15734 }
15735 DialectType::MySQL => {
15736 let iu = Self::parse_interval_unit_static(&unit_str);
15737 Ok(Expression::DateAdd(Box::new(
15738 crate::expressions::DateAddFunc {
15739 this: arg2,
15740 interval: arg1,
15741 unit: iu,
15742 },
15743 )))
15744 }
15745 DialectType::PostgreSQL => {
15746 // Cast string literal date to TIMESTAMP
15747 let arg2 = if matches!(
15748 &arg2,
15749 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15750 ) {
15751 Expression::Cast(Box::new(Cast {
15752 this: arg2,
15753 to: DataType::Timestamp {
15754 precision: None,
15755 timezone: false,
15756 },
15757 trailing_comments: Vec::new(),
15758 double_colon_syntax: false,
15759 format: None,
15760 default: None,
15761 inferred_type: None,
15762 }))
15763 } else {
15764 arg2
15765 };
15766 let interval = Expression::Interval(Box::new(
15767 crate::expressions::Interval {
15768 this: Some(Expression::string(&format!(
15769 "{} {}",
15770 Self::expr_to_string_static(&arg1),
15771 unit_str
15772 ))),
15773 unit: None,
15774 },
15775 ));
15776 Ok(Expression::Add(Box::new(
15777 crate::expressions::BinaryOp::new(arg2, interval),
15778 )))
15779 }
15780 DialectType::BigQuery => {
15781 let iu = Self::parse_interval_unit_static(&unit_str);
15782 let interval = Expression::Interval(Box::new(
15783 crate::expressions::Interval {
15784 this: Some(arg1),
15785 unit: Some(
15786 crate::expressions::IntervalUnitSpec::Simple {
15787 unit: iu,
15788 use_plural: false,
15789 },
15790 ),
15791 },
15792 ));
15793 // Non-TSQL sources: CAST string literal to DATETIME
15794 let arg2 = if !matches!(
15795 source,
15796 DialectType::TSQL | DialectType::Fabric
15797 ) && matches!(
15798 &arg2,
15799 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15800 ) {
15801 Expression::Cast(Box::new(Cast {
15802 this: arg2,
15803 to: DataType::Custom {
15804 name: "DATETIME".to_string(),
15805 },
15806 trailing_comments: Vec::new(),
15807 double_colon_syntax: false,
15808 format: None,
15809 default: None,
15810 inferred_type: None,
15811 }))
15812 } else {
15813 arg2
15814 };
15815 Ok(Expression::Function(Box::new(Function::new(
15816 "DATE_ADD".to_string(),
15817 vec![arg2, interval],
15818 ))))
15819 }
15820 _ => {
15821 let unit =
15822 Expression::Identifier(Identifier::new(&unit_str));
15823 Ok(Expression::Function(Box::new(Function::new(
15824 "DATEADD".to_string(),
15825 vec![unit, arg1, arg2],
15826 ))))
15827 }
15828 }
15829 }
15830 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
15831 // or (date, val, 'UNIT') from Generic canonical form
15832 "DATE_ADD" if f.args.len() == 3 => {
15833 let mut args = f.args;
15834 let arg0 = args.remove(0);
15835 let arg1 = args.remove(0);
15836 let arg2 = args.remove(0);
15837 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
15838 // where arg2 is a string literal matching a unit name
15839 let arg2_unit = match &arg2 {
15840 Expression::Literal(lit)
15841 if matches!(lit.as_ref(), Literal::String(_)) =>
15842 {
15843 let Literal::String(s) = lit.as_ref() else {
15844 unreachable!()
15845 };
15846 let u = s.to_ascii_uppercase();
15847 if matches!(
15848 u.as_str(),
15849 "DAY"
15850 | "MONTH"
15851 | "YEAR"
15852 | "HOUR"
15853 | "MINUTE"
15854 | "SECOND"
15855 | "WEEK"
15856 | "QUARTER"
15857 | "MILLISECOND"
15858 | "MICROSECOND"
15859 ) {
15860 Some(u)
15861 } else {
15862 None
15863 }
15864 }
15865 _ => None,
15866 };
15867 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
15868 let (unit_str, val, date) = if let Some(u) = arg2_unit {
15869 (u, arg1, arg0)
15870 } else {
15871 (Self::get_unit_str_static(&arg0), arg1, arg2)
15872 };
15873 // Alias for backward compat with the rest of the match
15874 let arg1 = val;
15875 let arg2 = date;
15876
15877 match target {
15878 DialectType::Presto
15879 | DialectType::Trino
15880 | DialectType::Athena => {
15881 Ok(Expression::Function(Box::new(Function::new(
15882 "DATE_ADD".to_string(),
15883 vec![Expression::string(&unit_str), arg1, arg2],
15884 ))))
15885 }
15886 DialectType::DuckDB => {
15887 let iu = Self::parse_interval_unit_static(&unit_str);
15888 let interval = Expression::Interval(Box::new(
15889 crate::expressions::Interval {
15890 this: Some(arg1),
15891 unit: Some(
15892 crate::expressions::IntervalUnitSpec::Simple {
15893 unit: iu,
15894 use_plural: false,
15895 },
15896 ),
15897 },
15898 ));
15899 Ok(Expression::Add(Box::new(
15900 crate::expressions::BinaryOp::new(arg2, interval),
15901 )))
15902 }
15903 DialectType::PostgreSQL
15904 | DialectType::Materialize
15905 | DialectType::RisingWave => {
15906 // PostgreSQL: x + INTERVAL '1 DAY'
15907 let amount_str = Self::expr_to_string_static(&arg1);
15908 let interval = Expression::Interval(Box::new(
15909 crate::expressions::Interval {
15910 this: Some(Expression::string(&format!(
15911 "{} {}",
15912 amount_str, unit_str
15913 ))),
15914 unit: None,
15915 },
15916 ));
15917 Ok(Expression::Add(Box::new(
15918 crate::expressions::BinaryOp::new(arg2, interval),
15919 )))
15920 }
15921 DialectType::Snowflake
15922 | DialectType::TSQL
15923 | DialectType::Redshift => {
15924 let unit =
15925 Expression::Identifier(Identifier::new(&unit_str));
15926 Ok(Expression::Function(Box::new(Function::new(
15927 "DATEADD".to_string(),
15928 vec![unit, arg1, arg2],
15929 ))))
15930 }
15931 DialectType::BigQuery
15932 | DialectType::MySQL
15933 | DialectType::Doris
15934 | DialectType::StarRocks
15935 | DialectType::Drill => {
15936 // DATE_ADD(date, INTERVAL amount UNIT)
15937 let iu = Self::parse_interval_unit_static(&unit_str);
15938 let interval = Expression::Interval(Box::new(
15939 crate::expressions::Interval {
15940 this: Some(arg1),
15941 unit: Some(
15942 crate::expressions::IntervalUnitSpec::Simple {
15943 unit: iu,
15944 use_plural: false,
15945 },
15946 ),
15947 },
15948 ));
15949 Ok(Expression::Function(Box::new(Function::new(
15950 "DATE_ADD".to_string(),
15951 vec![arg2, interval],
15952 ))))
15953 }
15954 DialectType::SQLite => {
15955 // SQLite: DATE(x, '1 DAY')
15956 // Build the string '1 DAY' from amount and unit
15957 let amount_str = match &arg1 {
15958 Expression::Literal(lit)
15959 if matches!(lit.as_ref(), Literal::Number(_)) =>
15960 {
15961 let Literal::Number(n) = lit.as_ref() else {
15962 unreachable!()
15963 };
15964 n.clone()
15965 }
15966 _ => "1".to_string(),
15967 };
15968 Ok(Expression::Function(Box::new(Function::new(
15969 "DATE".to_string(),
15970 vec![
15971 arg2,
15972 Expression::string(format!(
15973 "{} {}",
15974 amount_str, unit_str
15975 )),
15976 ],
15977 ))))
15978 }
15979 DialectType::Dremio => {
15980 // Dremio: DATE_ADD(date, amount) - drops unit
15981 Ok(Expression::Function(Box::new(Function::new(
15982 "DATE_ADD".to_string(),
15983 vec![arg2, arg1],
15984 ))))
15985 }
15986 DialectType::Spark => {
15987 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
15988 if unit_str == "DAY" {
15989 Ok(Expression::Function(Box::new(Function::new(
15990 "DATE_ADD".to_string(),
15991 vec![arg2, arg1],
15992 ))))
15993 } else {
15994 let unit =
15995 Expression::Identifier(Identifier::new(&unit_str));
15996 Ok(Expression::Function(Box::new(Function::new(
15997 "DATE_ADD".to_string(),
15998 vec![unit, arg1, arg2],
15999 ))))
16000 }
16001 }
16002 DialectType::Databricks => {
16003 let unit =
16004 Expression::Identifier(Identifier::new(&unit_str));
16005 Ok(Expression::Function(Box::new(Function::new(
16006 "DATE_ADD".to_string(),
16007 vec![unit, arg1, arg2],
16008 ))))
16009 }
16010 DialectType::Hive => {
16011 // Hive: DATE_ADD(date, val) for DAY
16012 Ok(Expression::Function(Box::new(Function::new(
16013 "DATE_ADD".to_string(),
16014 vec![arg2, arg1],
16015 ))))
16016 }
16017 _ => {
16018 let unit =
16019 Expression::Identifier(Identifier::new(&unit_str));
16020 Ok(Expression::Function(Box::new(Function::new(
16021 "DATE_ADD".to_string(),
16022 vec![unit, arg1, arg2],
16023 ))))
16024 }
16025 }
16026 }
16027 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
16028 "DATE_ADD"
16029 if f.args.len() == 2
16030 && matches!(
16031 source,
16032 DialectType::Hive
16033 | DialectType::Spark
16034 | DialectType::Databricks
16035 | DialectType::Generic
16036 ) =>
16037 {
16038 let mut args = f.args;
16039 let date = args.remove(0);
16040 let days = args.remove(0);
16041 match target {
16042 DialectType::Hive | DialectType::Spark => {
16043 // Keep as DATE_ADD(date, days) for Hive/Spark
16044 Ok(Expression::Function(Box::new(Function::new(
16045 "DATE_ADD".to_string(),
16046 vec![date, days],
16047 ))))
16048 }
16049 DialectType::Databricks => {
16050 // Databricks: DATEADD(DAY, days, date)
16051 Ok(Expression::Function(Box::new(Function::new(
16052 "DATEADD".to_string(),
16053 vec![
16054 Expression::Identifier(Identifier::new("DAY")),
16055 days,
16056 date,
16057 ],
16058 ))))
16059 }
16060 DialectType::DuckDB => {
16061 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
16062 let cast_date = Self::ensure_cast_date(date);
16063 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
16064 let interval_val = if matches!(
16065 days,
16066 Expression::Mul(_)
16067 | Expression::Sub(_)
16068 | Expression::Add(_)
16069 ) {
16070 Expression::Paren(Box::new(crate::expressions::Paren {
16071 this: days,
16072 trailing_comments: vec![],
16073 }))
16074 } else {
16075 days
16076 };
16077 let interval = Expression::Interval(Box::new(
16078 crate::expressions::Interval {
16079 this: Some(interval_val),
16080 unit: Some(
16081 crate::expressions::IntervalUnitSpec::Simple {
16082 unit: crate::expressions::IntervalUnit::Day,
16083 use_plural: false,
16084 },
16085 ),
16086 },
16087 ));
16088 Ok(Expression::Add(Box::new(
16089 crate::expressions::BinaryOp::new(cast_date, interval),
16090 )))
16091 }
16092 DialectType::Snowflake => {
16093 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
16094 let cast_date = if matches!(
16095 source,
16096 DialectType::Hive
16097 | DialectType::Spark
16098 | DialectType::Databricks
16099 ) {
16100 if matches!(
16101 date,
16102 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
16103 ) {
16104 Self::double_cast_timestamp_date(date)
16105 } else {
16106 date
16107 }
16108 } else {
16109 date
16110 };
16111 Ok(Expression::Function(Box::new(Function::new(
16112 "DATEADD".to_string(),
16113 vec![
16114 Expression::Identifier(Identifier::new("DAY")),
16115 days,
16116 cast_date,
16117 ],
16118 ))))
16119 }
16120 DialectType::Redshift => {
16121 Ok(Expression::Function(Box::new(Function::new(
16122 "DATEADD".to_string(),
16123 vec![
16124 Expression::Identifier(Identifier::new("DAY")),
16125 days,
16126 date,
16127 ],
16128 ))))
16129 }
16130 DialectType::TSQL | DialectType::Fabric => {
16131 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
16132 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
16133 let cast_date = if matches!(
16134 source,
16135 DialectType::Hive | DialectType::Spark
16136 ) {
16137 if matches!(
16138 date,
16139 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
16140 ) {
16141 Self::double_cast_datetime2_date(date)
16142 } else {
16143 date
16144 }
16145 } else {
16146 date
16147 };
16148 Ok(Expression::Function(Box::new(Function::new(
16149 "DATEADD".to_string(),
16150 vec![
16151 Expression::Identifier(Identifier::new("DAY")),
16152 days,
16153 cast_date,
16154 ],
16155 ))))
16156 }
16157 DialectType::Presto
16158 | DialectType::Trino
16159 | DialectType::Athena => {
16160 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
16161 let cast_date = if matches!(
16162 source,
16163 DialectType::Hive
16164 | DialectType::Spark
16165 | DialectType::Databricks
16166 ) {
16167 if matches!(
16168 date,
16169 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
16170 ) {
16171 Self::double_cast_timestamp_date(date)
16172 } else {
16173 date
16174 }
16175 } else {
16176 date
16177 };
16178 Ok(Expression::Function(Box::new(Function::new(
16179 "DATE_ADD".to_string(),
16180 vec![Expression::string("DAY"), days, cast_date],
16181 ))))
16182 }
16183 DialectType::BigQuery => {
16184 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
16185 let cast_date = if matches!(
16186 source,
16187 DialectType::Hive
16188 | DialectType::Spark
16189 | DialectType::Databricks
16190 ) {
16191 Self::double_cast_datetime_date(date)
16192 } else {
16193 date
16194 };
16195 // Wrap complex expressions in Paren for interval
16196 let interval_val = if matches!(
16197 days,
16198 Expression::Mul(_)
16199 | Expression::Sub(_)
16200 | Expression::Add(_)
16201 ) {
16202 Expression::Paren(Box::new(crate::expressions::Paren {
16203 this: days,
16204 trailing_comments: vec![],
16205 }))
16206 } else {
16207 days
16208 };
16209 let interval = Expression::Interval(Box::new(
16210 crate::expressions::Interval {
16211 this: Some(interval_val),
16212 unit: Some(
16213 crate::expressions::IntervalUnitSpec::Simple {
16214 unit: crate::expressions::IntervalUnit::Day,
16215 use_plural: false,
16216 },
16217 ),
16218 },
16219 ));
16220 Ok(Expression::Function(Box::new(Function::new(
16221 "DATE_ADD".to_string(),
16222 vec![cast_date, interval],
16223 ))))
16224 }
16225 DialectType::MySQL => {
16226 let iu = crate::expressions::IntervalUnit::Day;
16227 Ok(Expression::DateAdd(Box::new(
16228 crate::expressions::DateAddFunc {
16229 this: date,
16230 interval: days,
16231 unit: iu,
16232 },
16233 )))
16234 }
16235 DialectType::PostgreSQL => {
16236 let interval = Expression::Interval(Box::new(
16237 crate::expressions::Interval {
16238 this: Some(Expression::string(&format!(
16239 "{} DAY",
16240 Self::expr_to_string_static(&days)
16241 ))),
16242 unit: None,
16243 },
16244 ));
16245 Ok(Expression::Add(Box::new(
16246 crate::expressions::BinaryOp::new(date, interval),
16247 )))
16248 }
16249 DialectType::Doris
16250 | DialectType::StarRocks
16251 | DialectType::Drill => {
16252 // DATE_ADD(date, INTERVAL days DAY)
16253 let interval = Expression::Interval(Box::new(
16254 crate::expressions::Interval {
16255 this: Some(days),
16256 unit: Some(
16257 crate::expressions::IntervalUnitSpec::Simple {
16258 unit: crate::expressions::IntervalUnit::Day,
16259 use_plural: false,
16260 },
16261 ),
16262 },
16263 ));
16264 Ok(Expression::Function(Box::new(Function::new(
16265 "DATE_ADD".to_string(),
16266 vec![date, interval],
16267 ))))
16268 }
16269 _ => Ok(Expression::Function(Box::new(Function::new(
16270 "DATE_ADD".to_string(),
16271 vec![date, days],
16272 )))),
16273 }
16274 }
16275 // DATE_ADD(date, INTERVAL val UNIT) - MySQL 2-arg form with INTERVAL as 2nd arg
16276 "DATE_ADD"
16277 if f.args.len() == 2
16278 && matches!(
16279 source,
16280 DialectType::MySQL | DialectType::SingleStore
16281 )
16282 && matches!(&f.args[1], Expression::Interval(_)) =>
16283 {
16284 let mut args = f.args;
16285 let date = args.remove(0);
16286 let interval_expr = args.remove(0);
16287 let (val, unit) = Self::extract_interval_parts(&interval_expr);
16288 let unit_str = Self::interval_unit_to_string(&unit);
16289 let is_literal = matches!(&val,
16290 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_))
16291 );
16292
16293 match target {
16294 DialectType::MySQL | DialectType::SingleStore => {
16295 // Keep as DATE_ADD(date, INTERVAL val UNIT)
16296 Ok(Expression::Function(Box::new(Function::new(
16297 "DATE_ADD".to_string(),
16298 vec![date, interval_expr],
16299 ))))
16300 }
16301 DialectType::PostgreSQL => {
16302 if is_literal {
16303 // Literal: date + INTERVAL 'val UNIT'
16304 let interval = Expression::Interval(Box::new(
16305 crate::expressions::Interval {
16306 this: Some(Expression::Literal(Box::new(
16307 Literal::String(format!(
16308 "{} {}",
16309 Self::expr_to_string(&val),
16310 unit_str
16311 )),
16312 ))),
16313 unit: None,
16314 },
16315 ));
16316 Ok(Expression::Add(Box::new(
16317 crate::expressions::BinaryOp::new(date, interval),
16318 )))
16319 } else {
16320 // Non-literal (column ref): date + INTERVAL '1 UNIT' * val
16321 let interval_one = Expression::Interval(Box::new(
16322 crate::expressions::Interval {
16323 this: Some(Expression::Literal(Box::new(
16324 Literal::String(format!("1 {}", unit_str)),
16325 ))),
16326 unit: None,
16327 },
16328 ));
16329 let mul = Expression::Mul(Box::new(
16330 crate::expressions::BinaryOp::new(
16331 interval_one,
16332 val,
16333 ),
16334 ));
16335 Ok(Expression::Add(Box::new(
16336 crate::expressions::BinaryOp::new(date, mul),
16337 )))
16338 }
16339 }
16340 _ => {
16341 // Default: keep as DATE_ADD(date, interval)
16342 Ok(Expression::Function(Box::new(Function::new(
16343 "DATE_ADD".to_string(),
16344 vec![date, interval_expr],
16345 ))))
16346 }
16347 }
16348 }
16349 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
16350 "DATE_SUB"
16351 if f.args.len() == 2
16352 && matches!(
16353 source,
16354 DialectType::Hive
16355 | DialectType::Spark
16356 | DialectType::Databricks
16357 ) =>
16358 {
16359 let mut args = f.args;
16360 let date = args.remove(0);
16361 let days = args.remove(0);
16362 // Helper to create days * -1
16363 let make_neg_days = |d: Expression| -> Expression {
16364 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
16365 d,
16366 Expression::Literal(Box::new(Literal::Number(
16367 "-1".to_string(),
16368 ))),
16369 )))
16370 };
16371 let is_string_literal = matches!(date, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)));
16372 match target {
16373 DialectType::Hive
16374 | DialectType::Spark
16375 | DialectType::Databricks => {
16376 // Keep as DATE_SUB(date, days) for Hive/Spark
16377 Ok(Expression::Function(Box::new(Function::new(
16378 "DATE_SUB".to_string(),
16379 vec![date, days],
16380 ))))
16381 }
16382 DialectType::DuckDB => {
16383 let cast_date = Self::ensure_cast_date(date);
16384 let neg = make_neg_days(days);
16385 let interval = Expression::Interval(Box::new(
16386 crate::expressions::Interval {
16387 this: Some(Expression::Paren(Box::new(
16388 crate::expressions::Paren {
16389 this: neg,
16390 trailing_comments: vec![],
16391 },
16392 ))),
16393 unit: Some(
16394 crate::expressions::IntervalUnitSpec::Simple {
16395 unit: crate::expressions::IntervalUnit::Day,
16396 use_plural: false,
16397 },
16398 ),
16399 },
16400 ));
16401 Ok(Expression::Add(Box::new(
16402 crate::expressions::BinaryOp::new(cast_date, interval),
16403 )))
16404 }
16405 DialectType::Snowflake => {
16406 let cast_date = if is_string_literal {
16407 Self::double_cast_timestamp_date(date)
16408 } else {
16409 date
16410 };
16411 let neg = make_neg_days(days);
16412 Ok(Expression::Function(Box::new(Function::new(
16413 "DATEADD".to_string(),
16414 vec![
16415 Expression::Identifier(Identifier::new("DAY")),
16416 neg,
16417 cast_date,
16418 ],
16419 ))))
16420 }
16421 DialectType::Redshift => {
16422 let neg = make_neg_days(days);
16423 Ok(Expression::Function(Box::new(Function::new(
16424 "DATEADD".to_string(),
16425 vec![
16426 Expression::Identifier(Identifier::new("DAY")),
16427 neg,
16428 date,
16429 ],
16430 ))))
16431 }
16432 DialectType::TSQL | DialectType::Fabric => {
16433 let cast_date = if is_string_literal {
16434 Self::double_cast_datetime2_date(date)
16435 } else {
16436 date
16437 };
16438 let neg = make_neg_days(days);
16439 Ok(Expression::Function(Box::new(Function::new(
16440 "DATEADD".to_string(),
16441 vec![
16442 Expression::Identifier(Identifier::new("DAY")),
16443 neg,
16444 cast_date,
16445 ],
16446 ))))
16447 }
16448 DialectType::Presto
16449 | DialectType::Trino
16450 | DialectType::Athena => {
16451 let cast_date = if is_string_literal {
16452 Self::double_cast_timestamp_date(date)
16453 } else {
16454 date
16455 };
16456 let neg = make_neg_days(days);
16457 Ok(Expression::Function(Box::new(Function::new(
16458 "DATE_ADD".to_string(),
16459 vec![Expression::string("DAY"), neg, cast_date],
16460 ))))
16461 }
16462 DialectType::BigQuery => {
16463 let cast_date = if is_string_literal {
16464 Self::double_cast_datetime_date(date)
16465 } else {
16466 date
16467 };
16468 let neg = make_neg_days(days);
16469 let interval = Expression::Interval(Box::new(
16470 crate::expressions::Interval {
16471 this: Some(Expression::Paren(Box::new(
16472 crate::expressions::Paren {
16473 this: neg,
16474 trailing_comments: vec![],
16475 },
16476 ))),
16477 unit: Some(
16478 crate::expressions::IntervalUnitSpec::Simple {
16479 unit: crate::expressions::IntervalUnit::Day,
16480 use_plural: false,
16481 },
16482 ),
16483 },
16484 ));
16485 Ok(Expression::Function(Box::new(Function::new(
16486 "DATE_ADD".to_string(),
16487 vec![cast_date, interval],
16488 ))))
16489 }
16490 _ => Ok(Expression::Function(Box::new(Function::new(
16491 "DATE_SUB".to_string(),
16492 vec![date, days],
16493 )))),
16494 }
16495 }
16496 // ADD_MONTHS(date, val) -> target-specific
16497 "ADD_MONTHS" if f.args.len() == 2 => {
16498 let mut args = f.args;
16499 let date = args.remove(0);
16500 let val = args.remove(0);
16501 match target {
16502 DialectType::TSQL => {
16503 let cast_date = Self::ensure_cast_datetime2(date);
16504 Ok(Expression::Function(Box::new(Function::new(
16505 "DATEADD".to_string(),
16506 vec![
16507 Expression::Identifier(Identifier::new("MONTH")),
16508 val,
16509 cast_date,
16510 ],
16511 ))))
16512 }
16513 DialectType::DuckDB => {
16514 let interval = Expression::Interval(Box::new(
16515 crate::expressions::Interval {
16516 this: Some(val),
16517 unit: Some(
16518 crate::expressions::IntervalUnitSpec::Simple {
16519 unit:
16520 crate::expressions::IntervalUnit::Month,
16521 use_plural: false,
16522 },
16523 ),
16524 },
16525 ));
16526 Ok(Expression::Add(Box::new(
16527 crate::expressions::BinaryOp::new(date, interval),
16528 )))
16529 }
16530 DialectType::Snowflake => {
16531 // Keep ADD_MONTHS when source is Snowflake
16532 if matches!(source, DialectType::Snowflake) {
16533 Ok(Expression::Function(Box::new(Function::new(
16534 "ADD_MONTHS".to_string(),
16535 vec![date, val],
16536 ))))
16537 } else {
16538 Ok(Expression::Function(Box::new(Function::new(
16539 "DATEADD".to_string(),
16540 vec![
16541 Expression::Identifier(Identifier::new(
16542 "MONTH",
16543 )),
16544 val,
16545 date,
16546 ],
16547 ))))
16548 }
16549 }
16550 DialectType::Redshift => {
16551 Ok(Expression::Function(Box::new(Function::new(
16552 "DATEADD".to_string(),
16553 vec![
16554 Expression::Identifier(Identifier::new("MONTH")),
16555 val,
16556 date,
16557 ],
16558 ))))
16559 }
16560 DialectType::Presto
16561 | DialectType::Trino
16562 | DialectType::Athena => {
16563 Ok(Expression::Function(Box::new(Function::new(
16564 "DATE_ADD".to_string(),
16565 vec![Expression::string("MONTH"), val, date],
16566 ))))
16567 }
16568 DialectType::BigQuery => {
16569 let interval = Expression::Interval(Box::new(
16570 crate::expressions::Interval {
16571 this: Some(val),
16572 unit: Some(
16573 crate::expressions::IntervalUnitSpec::Simple {
16574 unit:
16575 crate::expressions::IntervalUnit::Month,
16576 use_plural: false,
16577 },
16578 ),
16579 },
16580 ));
16581 Ok(Expression::Function(Box::new(Function::new(
16582 "DATE_ADD".to_string(),
16583 vec![date, interval],
16584 ))))
16585 }
16586 _ => Ok(Expression::Function(Box::new(Function::new(
16587 "ADD_MONTHS".to_string(),
16588 vec![date, val],
16589 )))),
16590 }
16591 }
16592 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
16593 "DATETRUNC" if f.args.len() == 2 => {
16594 let mut args = f.args;
16595 let arg0 = args.remove(0);
16596 let arg1 = args.remove(0);
16597 let unit_str = Self::get_unit_str_static(&arg0);
16598 match target {
16599 DialectType::TSQL | DialectType::Fabric => {
16600 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
16601 Ok(Expression::Function(Box::new(Function::new(
16602 "DATETRUNC".to_string(),
16603 vec![
16604 Expression::Identifier(Identifier::new(&unit_str)),
16605 arg1,
16606 ],
16607 ))))
16608 }
16609 DialectType::DuckDB => {
16610 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
16611 let date = Self::ensure_cast_timestamp(arg1);
16612 Ok(Expression::Function(Box::new(Function::new(
16613 "DATE_TRUNC".to_string(),
16614 vec![Expression::string(&unit_str), date],
16615 ))))
16616 }
16617 DialectType::ClickHouse => {
16618 // ClickHouse: dateTrunc('UNIT', expr)
16619 Ok(Expression::Function(Box::new(Function::new(
16620 "dateTrunc".to_string(),
16621 vec![Expression::string(&unit_str), arg1],
16622 ))))
16623 }
16624 _ => {
16625 // Standard: DATE_TRUNC('UNIT', expr)
16626 let unit = Expression::string(&unit_str);
16627 Ok(Expression::Function(Box::new(Function::new(
16628 "DATE_TRUNC".to_string(),
16629 vec![unit, arg1],
16630 ))))
16631 }
16632 }
16633 }
16634 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
16635 "GETDATE" if f.args.is_empty() => match target {
16636 DialectType::TSQL => Ok(Expression::Function(f)),
16637 DialectType::Redshift => Ok(Expression::Function(Box::new(
16638 Function::new("GETDATE".to_string(), vec![]),
16639 ))),
16640 _ => Ok(Expression::CurrentTimestamp(
16641 crate::expressions::CurrentTimestamp {
16642 precision: None,
16643 sysdate: false,
16644 },
16645 )),
16646 },
16647 // TO_HEX(x) / HEX(x) -> target-specific hex function
16648 "TO_HEX" | "HEX" if f.args.len() == 1 => {
16649 let name = match target {
16650 DialectType::Presto | DialectType::Trino => "TO_HEX",
16651 DialectType::Spark
16652 | DialectType::Databricks
16653 | DialectType::Hive => "HEX",
16654 DialectType::DuckDB
16655 | DialectType::PostgreSQL
16656 | DialectType::Redshift => "TO_HEX",
16657 _ => &f.name,
16658 };
16659 Ok(Expression::Function(Box::new(Function::new(
16660 name.to_string(),
16661 f.args,
16662 ))))
16663 }
16664 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
16665 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
16666 match target {
16667 DialectType::BigQuery => {
16668 // BigQuery: UNHEX(x) -> FROM_HEX(x)
16669 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
16670 // because BigQuery MD5 returns BYTES, not hex string
16671 let arg = &f.args[0];
16672 let wrapped_arg = match arg {
16673 Expression::Function(inner_f)
16674 if inner_f.name.eq_ignore_ascii_case("MD5")
16675 || inner_f
16676 .name
16677 .eq_ignore_ascii_case("SHA1")
16678 || inner_f
16679 .name
16680 .eq_ignore_ascii_case("SHA256")
16681 || inner_f
16682 .name
16683 .eq_ignore_ascii_case("SHA512") =>
16684 {
16685 // Wrap hash function in TO_HEX for BigQuery
16686 Expression::Function(Box::new(Function::new(
16687 "TO_HEX".to_string(),
16688 vec![arg.clone()],
16689 )))
16690 }
16691 _ => f.args.into_iter().next().unwrap(),
16692 };
16693 Ok(Expression::Function(Box::new(Function::new(
16694 "FROM_HEX".to_string(),
16695 vec![wrapped_arg],
16696 ))))
16697 }
16698 _ => {
16699 let name = match target {
16700 DialectType::Presto | DialectType::Trino => "FROM_HEX",
16701 DialectType::Spark
16702 | DialectType::Databricks
16703 | DialectType::Hive => "UNHEX",
16704 _ => &f.name,
16705 };
16706 Ok(Expression::Function(Box::new(Function::new(
16707 name.to_string(),
16708 f.args,
16709 ))))
16710 }
16711 }
16712 }
16713 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
16714 "TO_UTF8" if f.args.len() == 1 => match target {
16715 DialectType::Spark | DialectType::Databricks => {
16716 let mut args = f.args;
16717 args.push(Expression::string("utf-8"));
16718 Ok(Expression::Function(Box::new(Function::new(
16719 "ENCODE".to_string(),
16720 args,
16721 ))))
16722 }
16723 _ => Ok(Expression::Function(f)),
16724 },
16725 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
16726 "FROM_UTF8" if f.args.len() == 1 => match target {
16727 DialectType::Spark | DialectType::Databricks => {
16728 let mut args = f.args;
16729 args.push(Expression::string("utf-8"));
16730 Ok(Expression::Function(Box::new(Function::new(
16731 "DECODE".to_string(),
16732 args,
16733 ))))
16734 }
16735 _ => Ok(Expression::Function(f)),
16736 },
16737 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
16738 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
16739 let name = match target {
16740 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
16741 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
16742 DialectType::PostgreSQL | DialectType::Redshift => {
16743 "STARTS_WITH"
16744 }
16745 _ => &f.name,
16746 };
16747 Ok(Expression::Function(Box::new(Function::new(
16748 name.to_string(),
16749 f.args,
16750 ))))
16751 }
16752 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
16753 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
16754 let name = match target {
16755 DialectType::Presto
16756 | DialectType::Trino
16757 | DialectType::Athena => "APPROX_DISTINCT",
16758 _ => "APPROX_COUNT_DISTINCT",
16759 };
16760 Ok(Expression::Function(Box::new(Function::new(
16761 name.to_string(),
16762 f.args,
16763 ))))
16764 }
16765 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
16766 "JSON_EXTRACT"
16767 if f.args.len() == 2
16768 && !matches!(source, DialectType::BigQuery)
16769 && matches!(
16770 target,
16771 DialectType::Spark
16772 | DialectType::Databricks
16773 | DialectType::Hive
16774 ) =>
16775 {
16776 Ok(Expression::Function(Box::new(Function::new(
16777 "GET_JSON_OBJECT".to_string(),
16778 f.args,
16779 ))))
16780 }
16781 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
16782 "JSON_EXTRACT"
16783 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
16784 {
16785 let mut args = f.args;
16786 let path = args.remove(1);
16787 let this = args.remove(0);
16788 Ok(Expression::JsonExtract(Box::new(
16789 crate::expressions::JsonExtractFunc {
16790 this,
16791 path,
16792 returning: None,
16793 arrow_syntax: true,
16794 hash_arrow_syntax: false,
16795 wrapper_option: None,
16796 quotes_option: None,
16797 on_scalar_string: false,
16798 on_error: None,
16799 },
16800 )))
16801 }
16802 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
16803 "JSON_FORMAT" if f.args.len() == 1 => {
16804 match target {
16805 DialectType::Spark | DialectType::Databricks => {
16806 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
16807 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
16808 if matches!(
16809 source,
16810 DialectType::Presto
16811 | DialectType::Trino
16812 | DialectType::Athena
16813 ) {
16814 if let Some(Expression::ParseJson(pj)) = f.args.first()
16815 {
16816 if let Expression::Literal(lit) = &pj.this {
16817 if let Literal::String(s) = lit.as_ref() {
16818 let wrapped =
16819 Expression::Literal(Box::new(
16820 Literal::String(format!("[{}]", s)),
16821 ));
16822 let schema_of_json = Expression::Function(
16823 Box::new(Function::new(
16824 "SCHEMA_OF_JSON".to_string(),
16825 vec![wrapped.clone()],
16826 )),
16827 );
16828 let from_json = Expression::Function(
16829 Box::new(Function::new(
16830 "FROM_JSON".to_string(),
16831 vec![wrapped, schema_of_json],
16832 )),
16833 );
16834 let to_json = Expression::Function(
16835 Box::new(Function::new(
16836 "TO_JSON".to_string(),
16837 vec![from_json],
16838 )),
16839 );
16840 return Ok(Expression::Function(Box::new(
16841 Function::new(
16842 "REGEXP_EXTRACT".to_string(),
16843 vec![
16844 to_json,
16845 Expression::Literal(Box::new(
16846 Literal::String(
16847 "^.(.*).$".to_string(),
16848 ),
16849 )),
16850 Expression::Literal(Box::new(
16851 Literal::Number(
16852 "1".to_string(),
16853 ),
16854 )),
16855 ],
16856 ),
16857 )));
16858 }
16859 }
16860 }
16861 }
16862
16863 // Strip inner CAST(... AS JSON) or TO_JSON() if present
16864 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
16865 let mut args = f.args;
16866 if let Some(Expression::Cast(ref c)) = args.first() {
16867 if matches!(&c.to, DataType::Json | DataType::JsonB) {
16868 args = vec![c.this.clone()];
16869 }
16870 } else if let Some(Expression::Function(ref inner_f)) =
16871 args.first()
16872 {
16873 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
16874 && inner_f.args.len() == 1
16875 {
16876 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
16877 args = inner_f.args.clone();
16878 }
16879 }
16880 Ok(Expression::Function(Box::new(Function::new(
16881 "TO_JSON".to_string(),
16882 args,
16883 ))))
16884 }
16885 DialectType::BigQuery => Ok(Expression::Function(Box::new(
16886 Function::new("TO_JSON_STRING".to_string(), f.args),
16887 ))),
16888 DialectType::DuckDB => {
16889 // CAST(TO_JSON(x) AS TEXT)
16890 let to_json = Expression::Function(Box::new(
16891 Function::new("TO_JSON".to_string(), f.args),
16892 ));
16893 Ok(Expression::Cast(Box::new(Cast {
16894 this: to_json,
16895 to: DataType::Text,
16896 trailing_comments: Vec::new(),
16897 double_colon_syntax: false,
16898 format: None,
16899 default: None,
16900 inferred_type: None,
16901 })))
16902 }
16903 _ => Ok(Expression::Function(f)),
16904 }
16905 }
16906 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
16907 "SYSDATE" if f.args.is_empty() => {
16908 match target {
16909 DialectType::Oracle | DialectType::Redshift => {
16910 Ok(Expression::Function(f))
16911 }
16912 DialectType::Snowflake => {
16913 // Snowflake uses SYSDATE() with parens
16914 let mut f = *f;
16915 f.no_parens = false;
16916 Ok(Expression::Function(Box::new(f)))
16917 }
16918 DialectType::DuckDB => {
16919 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
16920 Ok(Expression::AtTimeZone(Box::new(
16921 crate::expressions::AtTimeZone {
16922 this: Expression::CurrentTimestamp(
16923 crate::expressions::CurrentTimestamp {
16924 precision: None,
16925 sysdate: false,
16926 },
16927 ),
16928 zone: Expression::Literal(Box::new(
16929 Literal::String("UTC".to_string()),
16930 )),
16931 },
16932 )))
16933 }
16934 _ => Ok(Expression::CurrentTimestamp(
16935 crate::expressions::CurrentTimestamp {
16936 precision: None,
16937 sysdate: true,
16938 },
16939 )),
16940 }
16941 }
16942 // LOGICAL_OR(x) -> BOOL_OR(x)
16943 "LOGICAL_OR" if f.args.len() == 1 => {
16944 let name = match target {
16945 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
16946 _ => &f.name,
16947 };
16948 Ok(Expression::Function(Box::new(Function::new(
16949 name.to_string(),
16950 f.args,
16951 ))))
16952 }
16953 // LOGICAL_AND(x) -> BOOL_AND(x)
16954 "LOGICAL_AND" if f.args.len() == 1 => {
16955 let name = match target {
16956 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
16957 _ => &f.name,
16958 };
16959 Ok(Expression::Function(Box::new(Function::new(
16960 name.to_string(),
16961 f.args,
16962 ))))
16963 }
16964 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
16965 "MONTHS_ADD" if f.args.len() == 2 => match target {
16966 DialectType::Oracle => Ok(Expression::Function(Box::new(
16967 Function::new("ADD_MONTHS".to_string(), f.args),
16968 ))),
16969 _ => Ok(Expression::Function(f)),
16970 },
16971 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
16972 "ARRAY_JOIN" if f.args.len() >= 2 => {
16973 match target {
16974 DialectType::Spark | DialectType::Databricks => {
16975 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
16976 Ok(Expression::Function(f))
16977 }
16978 DialectType::Hive => {
16979 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
16980 let mut args = f.args;
16981 let arr = args.remove(0);
16982 let sep = args.remove(0);
16983 // Drop any remaining args (null_replacement)
16984 Ok(Expression::Function(Box::new(Function::new(
16985 "CONCAT_WS".to_string(),
16986 vec![sep, arr],
16987 ))))
16988 }
16989 DialectType::Presto | DialectType::Trino => {
16990 Ok(Expression::Function(f))
16991 }
16992 _ => Ok(Expression::Function(f)),
16993 }
16994 }
16995 // LOCATE(substr, str, pos) 3-arg -> target-specific
16996 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
16997 "LOCATE"
16998 if f.args.len() == 3
16999 && matches!(
17000 target,
17001 DialectType::Presto
17002 | DialectType::Trino
17003 | DialectType::Athena
17004 | DialectType::DuckDB
17005 ) =>
17006 {
17007 let mut args = f.args;
17008 let substr = args.remove(0);
17009 let string = args.remove(0);
17010 let pos = args.remove(0);
17011 // STRPOS(SUBSTRING(string, pos), substr)
17012 let substring_call = Expression::Function(Box::new(Function::new(
17013 "SUBSTRING".to_string(),
17014 vec![string.clone(), pos.clone()],
17015 )));
17016 let strpos_call = Expression::Function(Box::new(Function::new(
17017 "STRPOS".to_string(),
17018 vec![substring_call, substr.clone()],
17019 )));
17020 // STRPOS(...) + pos - 1
17021 let pos_adjusted =
17022 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
17023 Expression::Add(Box::new(
17024 crate::expressions::BinaryOp::new(
17025 strpos_call.clone(),
17026 pos.clone(),
17027 ),
17028 )),
17029 Expression::number(1),
17030 )));
17031 // STRPOS(...) = 0
17032 let is_zero =
17033 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
17034 strpos_call.clone(),
17035 Expression::number(0),
17036 )));
17037
17038 match target {
17039 DialectType::Presto
17040 | DialectType::Trino
17041 | DialectType::Athena => {
17042 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
17043 Ok(Expression::Function(Box::new(Function::new(
17044 "IF".to_string(),
17045 vec![is_zero, Expression::number(0), pos_adjusted],
17046 ))))
17047 }
17048 DialectType::DuckDB => {
17049 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
17050 Ok(Expression::Case(Box::new(crate::expressions::Case {
17051 operand: None,
17052 whens: vec![(is_zero, Expression::number(0))],
17053 else_: Some(pos_adjusted),
17054 comments: Vec::new(),
17055 inferred_type: None,
17056 })))
17057 }
17058 _ => Ok(Expression::Function(Box::new(Function::new(
17059 "LOCATE".to_string(),
17060 vec![substr, string, pos],
17061 )))),
17062 }
17063 }
17064 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
17065 "STRPOS"
17066 if f.args.len() == 3
17067 && matches!(
17068 target,
17069 DialectType::BigQuery
17070 | DialectType::Oracle
17071 | DialectType::Teradata
17072 ) =>
17073 {
17074 let mut args = f.args;
17075 let haystack = args.remove(0);
17076 let needle = args.remove(0);
17077 let occurrence = args.remove(0);
17078 Ok(Expression::Function(Box::new(Function::new(
17079 "INSTR".to_string(),
17080 vec![haystack, needle, Expression::number(1), occurrence],
17081 ))))
17082 }
17083 // SCHEMA_NAME(id) -> target-specific
17084 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
17085 DialectType::MySQL | DialectType::SingleStore => {
17086 Ok(Expression::Function(Box::new(Function::new(
17087 "SCHEMA".to_string(),
17088 vec![],
17089 ))))
17090 }
17091 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
17092 crate::expressions::CurrentSchema { this: None },
17093 ))),
17094 DialectType::SQLite => Ok(Expression::string("main")),
17095 _ => Ok(Expression::Function(f)),
17096 },
17097 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
17098 "STRTOL" if f.args.len() == 2 => match target {
17099 DialectType::Presto | DialectType::Trino => {
17100 Ok(Expression::Function(Box::new(Function::new(
17101 "FROM_BASE".to_string(),
17102 f.args,
17103 ))))
17104 }
17105 _ => Ok(Expression::Function(f)),
17106 },
17107 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
17108 "EDITDIST3" if f.args.len() == 2 => match target {
17109 DialectType::Spark | DialectType::Databricks => {
17110 Ok(Expression::Function(Box::new(Function::new(
17111 "LEVENSHTEIN".to_string(),
17112 f.args,
17113 ))))
17114 }
17115 _ => Ok(Expression::Function(f)),
17116 },
17117 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
17118 "FORMAT"
17119 if f.args.len() == 2
17120 && matches!(
17121 source,
17122 DialectType::MySQL | DialectType::SingleStore
17123 )
17124 && matches!(target, DialectType::DuckDB) =>
17125 {
17126 let mut args = f.args;
17127 let num_expr = args.remove(0);
17128 let decimals_expr = args.remove(0);
17129 // Extract decimal count
17130 let dec_count = match &decimals_expr {
17131 Expression::Literal(lit)
17132 if matches!(lit.as_ref(), Literal::Number(_)) =>
17133 {
17134 let Literal::Number(n) = lit.as_ref() else {
17135 unreachable!()
17136 };
17137 n.clone()
17138 }
17139 _ => "0".to_string(),
17140 };
17141 let fmt_str = format!("{{:,.{}f}}", dec_count);
17142 Ok(Expression::Function(Box::new(Function::new(
17143 "FORMAT".to_string(),
17144 vec![Expression::string(&fmt_str), num_expr],
17145 ))))
17146 }
17147 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
17148 "FORMAT"
17149 if f.args.len() == 2
17150 && matches!(
17151 source,
17152 DialectType::TSQL | DialectType::Fabric
17153 ) =>
17154 {
17155 let val_expr = f.args[0].clone();
17156 let fmt_expr = f.args[1].clone();
17157 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
17158 // Only expand shortcodes that are NOT also valid numeric format specifiers.
17159 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
17160 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
17161 let (expanded_fmt, is_shortcode) = match &fmt_expr {
17162 Expression::Literal(lit)
17163 if matches!(
17164 lit.as_ref(),
17165 crate::expressions::Literal::String(_)
17166 ) =>
17167 {
17168 let crate::expressions::Literal::String(s) = lit.as_ref()
17169 else {
17170 unreachable!()
17171 };
17172 match s.as_str() {
17173 "m" | "M" => (Expression::string("MMMM d"), true),
17174 "t" => (Expression::string("h:mm tt"), true),
17175 "T" => (Expression::string("h:mm:ss tt"), true),
17176 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
17177 _ => (fmt_expr.clone(), false),
17178 }
17179 }
17180 _ => (fmt_expr.clone(), false),
17181 };
17182 // Check if the format looks like a date format
17183 let is_date_format = is_shortcode
17184 || match &expanded_fmt {
17185 Expression::Literal(lit)
17186 if matches!(
17187 lit.as_ref(),
17188 crate::expressions::Literal::String(_)
17189 ) =>
17190 {
17191 let crate::expressions::Literal::String(s) =
17192 lit.as_ref()
17193 else {
17194 unreachable!()
17195 };
17196 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
17197 s.contains("yyyy")
17198 || s.contains("YYYY")
17199 || s.contains("MM")
17200 || s.contains("dd")
17201 || s.contains("MMMM")
17202 || s.contains("HH")
17203 || s.contains("hh")
17204 || s.contains("ss")
17205 }
17206 _ => false,
17207 };
17208 match target {
17209 DialectType::Spark | DialectType::Databricks => {
17210 let func_name = if is_date_format {
17211 "DATE_FORMAT"
17212 } else {
17213 "FORMAT_NUMBER"
17214 };
17215 Ok(Expression::Function(Box::new(Function::new(
17216 func_name.to_string(),
17217 vec![val_expr, expanded_fmt],
17218 ))))
17219 }
17220 _ => {
17221 // For TSQL and other targets, expand shortcodes but keep FORMAT
17222 if is_shortcode {
17223 Ok(Expression::Function(Box::new(Function::new(
17224 "FORMAT".to_string(),
17225 vec![val_expr, expanded_fmt],
17226 ))))
17227 } else {
17228 Ok(Expression::Function(f))
17229 }
17230 }
17231 }
17232 }
17233 // FORMAT('%s', x) from Trino/Presto -> target-specific
17234 "FORMAT"
17235 if f.args.len() >= 2
17236 && matches!(
17237 source,
17238 DialectType::Trino
17239 | DialectType::Presto
17240 | DialectType::Athena
17241 ) =>
17242 {
17243 let fmt_expr = f.args[0].clone();
17244 let value_args: Vec<Expression> = f.args[1..].to_vec();
17245 match target {
17246 // DuckDB: replace %s with {} in format string
17247 DialectType::DuckDB => {
17248 let new_fmt = match &fmt_expr {
17249 Expression::Literal(lit)
17250 if matches!(lit.as_ref(), Literal::String(_)) =>
17251 {
17252 let Literal::String(s) = lit.as_ref() else {
17253 unreachable!()
17254 };
17255 Expression::Literal(Box::new(Literal::String(
17256 s.replace("%s", "{}"),
17257 )))
17258 }
17259 _ => fmt_expr,
17260 };
17261 let mut args = vec![new_fmt];
17262 args.extend(value_args);
17263 Ok(Expression::Function(Box::new(Function::new(
17264 "FORMAT".to_string(),
17265 args,
17266 ))))
17267 }
17268 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
17269 DialectType::Snowflake => match &fmt_expr {
17270 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "%s" && value_args.len() == 1) =>
17271 {
17272 let Literal::String(_) = lit.as_ref() else {
17273 unreachable!()
17274 };
17275 Ok(Expression::Function(Box::new(Function::new(
17276 "TO_CHAR".to_string(),
17277 value_args,
17278 ))))
17279 }
17280 _ => Ok(Expression::Function(f)),
17281 },
17282 // Default: keep FORMAT as-is
17283 _ => Ok(Expression::Function(f)),
17284 }
17285 }
17286 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
17287 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
17288 if f.args.len() == 2 =>
17289 {
17290 // When coming from Snowflake source: ARRAY_CONTAINS(value, array)
17291 // args[0]=value, args[1]=array. For DuckDB target, swap and add NULL-aware CASE.
17292 if matches!(target, DialectType::DuckDB)
17293 && matches!(source, DialectType::Snowflake)
17294 && f.name.eq_ignore_ascii_case("ARRAY_CONTAINS")
17295 {
17296 let value = f.args[0].clone();
17297 let array = f.args[1].clone();
17298
17299 // value IS NULL
17300 let value_is_null =
17301 Expression::IsNull(Box::new(crate::expressions::IsNull {
17302 this: value.clone(),
17303 not: false,
17304 postfix_form: false,
17305 }));
17306
17307 // ARRAY_LENGTH(array)
17308 let array_length =
17309 Expression::Function(Box::new(Function::new(
17310 "ARRAY_LENGTH".to_string(),
17311 vec![array.clone()],
17312 )));
17313 // LIST_COUNT(array)
17314 let list_count = Expression::Function(Box::new(Function::new(
17315 "LIST_COUNT".to_string(),
17316 vec![array.clone()],
17317 )));
17318 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
17319 let neq =
17320 Expression::Neq(Box::new(crate::expressions::BinaryOp {
17321 left: array_length,
17322 right: list_count,
17323 left_comments: vec![],
17324 operator_comments: vec![],
17325 trailing_comments: vec![],
17326 inferred_type: None,
17327 }));
17328 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
17329 let nullif =
17330 Expression::Nullif(Box::new(crate::expressions::Nullif {
17331 this: Box::new(neq),
17332 expression: Box::new(Expression::Boolean(
17333 crate::expressions::BooleanLiteral { value: false },
17334 )),
17335 }));
17336
17337 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
17338 let array_contains =
17339 Expression::Function(Box::new(Function::new(
17340 "ARRAY_CONTAINS".to_string(),
17341 vec![array, value],
17342 )));
17343
17344 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
17345 return Ok(Expression::Case(Box::new(Case {
17346 operand: None,
17347 whens: vec![(value_is_null, nullif)],
17348 else_: Some(array_contains),
17349 comments: Vec::new(),
17350 inferred_type: None,
17351 })));
17352 }
17353 match target {
17354 DialectType::PostgreSQL | DialectType::Redshift => {
17355 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
17356 let arr = f.args[0].clone();
17357 let needle = f.args[1].clone();
17358 // Convert [] to ARRAY[] for PostgreSQL
17359 let pg_arr = match arr {
17360 Expression::Array(a) => Expression::ArrayFunc(
17361 Box::new(crate::expressions::ArrayConstructor {
17362 expressions: a.expressions,
17363 bracket_notation: false,
17364 use_list_keyword: false,
17365 }),
17366 ),
17367 _ => arr,
17368 };
17369 // needle = ANY(arr) using the Any quantified expression
17370 let any_expr = Expression::Any(Box::new(
17371 crate::expressions::QuantifiedExpr {
17372 this: needle.clone(),
17373 subquery: pg_arr,
17374 op: Some(crate::expressions::QuantifiedOp::Eq),
17375 },
17376 ));
17377 let coalesce = Expression::Coalesce(Box::new(
17378 crate::expressions::VarArgFunc {
17379 expressions: vec![
17380 any_expr,
17381 Expression::Boolean(
17382 crate::expressions::BooleanLiteral {
17383 value: false,
17384 },
17385 ),
17386 ],
17387 original_name: None,
17388 inferred_type: None,
17389 },
17390 ));
17391 let is_null_check = Expression::IsNull(Box::new(
17392 crate::expressions::IsNull {
17393 this: needle,
17394 not: false,
17395 postfix_form: false,
17396 },
17397 ));
17398 Ok(Expression::Case(Box::new(Case {
17399 operand: None,
17400 whens: vec![(
17401 is_null_check,
17402 Expression::Null(crate::expressions::Null),
17403 )],
17404 else_: Some(coalesce),
17405 comments: Vec::new(),
17406 inferred_type: None,
17407 })))
17408 }
17409 _ => Ok(Expression::Function(Box::new(Function::new(
17410 "ARRAY_CONTAINS".to_string(),
17411 f.args,
17412 )))),
17413 }
17414 }
17415 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
17416 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
17417 match target {
17418 DialectType::PostgreSQL | DialectType::Redshift => {
17419 // arr1 && arr2 with ARRAY[] syntax
17420 let mut args = f.args;
17421 let arr1 = args.remove(0);
17422 let arr2 = args.remove(0);
17423 let pg_arr1 = match arr1 {
17424 Expression::Array(a) => Expression::ArrayFunc(
17425 Box::new(crate::expressions::ArrayConstructor {
17426 expressions: a.expressions,
17427 bracket_notation: false,
17428 use_list_keyword: false,
17429 }),
17430 ),
17431 _ => arr1,
17432 };
17433 let pg_arr2 = match arr2 {
17434 Expression::Array(a) => Expression::ArrayFunc(
17435 Box::new(crate::expressions::ArrayConstructor {
17436 expressions: a.expressions,
17437 bracket_notation: false,
17438 use_list_keyword: false,
17439 }),
17440 ),
17441 _ => arr2,
17442 };
17443 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
17444 pg_arr1, pg_arr2,
17445 ))))
17446 }
17447 DialectType::DuckDB => {
17448 // DuckDB: arr1 && arr2 (native support)
17449 let mut args = f.args;
17450 let arr1 = args.remove(0);
17451 let arr2 = args.remove(0);
17452 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
17453 arr1, arr2,
17454 ))))
17455 }
17456 _ => Ok(Expression::Function(Box::new(Function::new(
17457 "LIST_HAS_ANY".to_string(),
17458 f.args,
17459 )))),
17460 }
17461 }
17462 // APPROX_QUANTILE(x, q) -> target-specific
17463 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
17464 DialectType::Snowflake => Ok(Expression::Function(Box::new(
17465 Function::new("APPROX_PERCENTILE".to_string(), f.args),
17466 ))),
17467 DialectType::DuckDB => Ok(Expression::Function(f)),
17468 _ => Ok(Expression::Function(f)),
17469 },
17470 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
17471 "MAKE_DATE" if f.args.len() == 3 => match target {
17472 DialectType::BigQuery => Ok(Expression::Function(Box::new(
17473 Function::new("DATE".to_string(), f.args),
17474 ))),
17475 _ => Ok(Expression::Function(f)),
17476 },
17477 // RANGE(start, end[, step]) -> target-specific
17478 "RANGE"
17479 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
17480 {
17481 let start = f.args[0].clone();
17482 let end = f.args[1].clone();
17483 let step = f.args.get(2).cloned();
17484 match target {
17485 // Snowflake ARRAY_GENERATE_RANGE uses exclusive end (same as DuckDB RANGE),
17486 // so just rename without adjusting the end argument.
17487 DialectType::Snowflake => {
17488 let mut args = vec![start, end];
17489 if let Some(s) = step {
17490 args.push(s);
17491 }
17492 Ok(Expression::Function(Box::new(Function::new(
17493 "ARRAY_GENERATE_RANGE".to_string(),
17494 args,
17495 ))))
17496 }
17497 DialectType::Spark | DialectType::Databricks => {
17498 // RANGE(start, end) -> SEQUENCE(start, end-1)
17499 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
17500 // RANGE(start, start) -> ARRAY() (empty)
17501 // RANGE(start, end, 0) -> ARRAY() (empty)
17502 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
17503
17504 // Check for constant args
17505 fn extract_i64(e: &Expression) -> Option<i64> {
17506 match e {
17507 Expression::Literal(lit)
17508 if matches!(
17509 lit.as_ref(),
17510 Literal::Number(_)
17511 ) =>
17512 {
17513 let Literal::Number(n) = lit.as_ref() else {
17514 unreachable!()
17515 };
17516 n.parse::<i64>().ok()
17517 }
17518 Expression::Neg(u) => {
17519 if let Expression::Literal(lit) = &u.this {
17520 if let Literal::Number(n) = lit.as_ref() {
17521 n.parse::<i64>().ok().map(|v| -v)
17522 } else {
17523 None
17524 }
17525 } else {
17526 None
17527 }
17528 }
17529 _ => None,
17530 }
17531 }
17532 let start_val = extract_i64(&start);
17533 let end_val = extract_i64(&end);
17534 let step_val = step.as_ref().and_then(|s| extract_i64(s));
17535
17536 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
17537 if step_val == Some(0) {
17538 return Ok(Expression::Function(Box::new(
17539 Function::new("ARRAY".to_string(), vec![]),
17540 )));
17541 }
17542 if let (Some(s), Some(e_val)) = (start_val, end_val) {
17543 if s == e_val {
17544 return Ok(Expression::Function(Box::new(
17545 Function::new("ARRAY".to_string(), vec![]),
17546 )));
17547 }
17548 }
17549
17550 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
17551 // All constants - compute new end = end - step (if step provided) or end - 1
17552 match step_val {
17553 Some(st) if st < 0 => {
17554 // Negative step: SEQUENCE(start, end - step, step)
17555 let new_end = e_val - st; // end - step (= end + |step|)
17556 let mut args =
17557 vec![start, Expression::number(new_end)];
17558 if let Some(s) = step {
17559 args.push(s);
17560 }
17561 Ok(Expression::Function(Box::new(
17562 Function::new("SEQUENCE".to_string(), args),
17563 )))
17564 }
17565 Some(st) => {
17566 let new_end = e_val - st;
17567 let mut args =
17568 vec![start, Expression::number(new_end)];
17569 if let Some(s) = step {
17570 args.push(s);
17571 }
17572 Ok(Expression::Function(Box::new(
17573 Function::new("SEQUENCE".to_string(), args),
17574 )))
17575 }
17576 None => {
17577 // No step: SEQUENCE(start, end - 1)
17578 let new_end = e_val - 1;
17579 Ok(Expression::Function(Box::new(
17580 Function::new(
17581 "SEQUENCE".to_string(),
17582 vec![
17583 start,
17584 Expression::number(new_end),
17585 ],
17586 ),
17587 )))
17588 }
17589 }
17590 } else {
17591 // Variable end: IF((end - 1) < start, ARRAY(), SEQUENCE(start, (end - 1)))
17592 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
17593 end.clone(),
17594 Expression::number(1),
17595 )));
17596 let cond = Expression::Lt(Box::new(BinaryOp::new(
17597 Expression::Paren(Box::new(Paren {
17598 this: end_m1.clone(),
17599 trailing_comments: Vec::new(),
17600 })),
17601 start.clone(),
17602 )));
17603 let empty = Expression::Function(Box::new(
17604 Function::new("ARRAY".to_string(), vec![]),
17605 ));
17606 let mut seq_args = vec![
17607 start,
17608 Expression::Paren(Box::new(Paren {
17609 this: end_m1,
17610 trailing_comments: Vec::new(),
17611 })),
17612 ];
17613 if let Some(s) = step {
17614 seq_args.push(s);
17615 }
17616 let seq = Expression::Function(Box::new(
17617 Function::new("SEQUENCE".to_string(), seq_args),
17618 ));
17619 Ok(Expression::IfFunc(Box::new(
17620 crate::expressions::IfFunc {
17621 condition: cond,
17622 true_value: empty,
17623 false_value: Some(seq),
17624 original_name: None,
17625 inferred_type: None,
17626 },
17627 )))
17628 }
17629 }
17630 DialectType::SQLite => {
17631 // RANGE(start, end) -> GENERATE_SERIES(start, end)
17632 // The subquery wrapping is handled at the Alias level
17633 let mut args = vec![start, end];
17634 if let Some(s) = step {
17635 args.push(s);
17636 }
17637 Ok(Expression::Function(Box::new(Function::new(
17638 "GENERATE_SERIES".to_string(),
17639 args,
17640 ))))
17641 }
17642 _ => Ok(Expression::Function(f)),
17643 }
17644 }
17645 // ARRAY_REVERSE_SORT -> target-specific
17646 // (handled above as well, but also need DuckDB self-normalization)
17647 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
17648 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
17649 DialectType::Snowflake => Ok(Expression::Function(Box::new(
17650 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
17651 ))),
17652 DialectType::Spark | DialectType::Databricks => {
17653 Ok(Expression::Function(Box::new(Function::new(
17654 "MAP_FROM_ARRAYS".to_string(),
17655 f.args,
17656 ))))
17657 }
17658 _ => Ok(Expression::Function(Box::new(Function::new(
17659 "MAP".to_string(),
17660 f.args,
17661 )))),
17662 },
17663 // VARIANCE(x) -> varSamp(x) for ClickHouse
17664 "VARIANCE" if f.args.len() == 1 => match target {
17665 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
17666 Function::new("varSamp".to_string(), f.args),
17667 ))),
17668 _ => Ok(Expression::Function(f)),
17669 },
17670 // STDDEV(x) -> stddevSamp(x) for ClickHouse
17671 "STDDEV" if f.args.len() == 1 => match target {
17672 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
17673 Function::new("stddevSamp".to_string(), f.args),
17674 ))),
17675 _ => Ok(Expression::Function(f)),
17676 },
17677 // ISINF(x) -> IS_INF(x) for BigQuery
17678 "ISINF" if f.args.len() == 1 => match target {
17679 DialectType::BigQuery => Ok(Expression::Function(Box::new(
17680 Function::new("IS_INF".to_string(), f.args),
17681 ))),
17682 _ => Ok(Expression::Function(f)),
17683 },
17684 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
17685 "CONTAINS" if f.args.len() == 2 => match target {
17686 DialectType::Spark
17687 | DialectType::Databricks
17688 | DialectType::Hive => Ok(Expression::Function(Box::new(
17689 Function::new("ARRAY_CONTAINS".to_string(), f.args),
17690 ))),
17691 _ => Ok(Expression::Function(f)),
17692 },
17693 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
17694 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
17695 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
17696 Ok(Expression::Function(Box::new(Function::new(
17697 "CONTAINS".to_string(),
17698 f.args,
17699 ))))
17700 }
17701 DialectType::DuckDB => Ok(Expression::Function(Box::new(
17702 Function::new("ARRAY_CONTAINS".to_string(), f.args),
17703 ))),
17704 _ => Ok(Expression::Function(f)),
17705 },
17706 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
17707 "TO_UNIXTIME" if f.args.len() == 1 => match target {
17708 DialectType::Hive
17709 | DialectType::Spark
17710 | DialectType::Databricks => Ok(Expression::Function(Box::new(
17711 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
17712 ))),
17713 _ => Ok(Expression::Function(f)),
17714 },
17715 // FROM_UNIXTIME(x) -> target-specific
17716 "FROM_UNIXTIME" if f.args.len() == 1 => {
17717 match target {
17718 DialectType::Hive
17719 | DialectType::Spark
17720 | DialectType::Databricks
17721 | DialectType::Presto
17722 | DialectType::Trino => Ok(Expression::Function(f)),
17723 DialectType::DuckDB => {
17724 // DuckDB: TO_TIMESTAMP(x)
17725 let arg = f.args.into_iter().next().unwrap();
17726 Ok(Expression::Function(Box::new(Function::new(
17727 "TO_TIMESTAMP".to_string(),
17728 vec![arg],
17729 ))))
17730 }
17731 DialectType::PostgreSQL => {
17732 // PG: TO_TIMESTAMP(col)
17733 let arg = f.args.into_iter().next().unwrap();
17734 Ok(Expression::Function(Box::new(Function::new(
17735 "TO_TIMESTAMP".to_string(),
17736 vec![arg],
17737 ))))
17738 }
17739 DialectType::Redshift => {
17740 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
17741 let arg = f.args.into_iter().next().unwrap();
17742 let epoch_ts = Expression::Literal(Box::new(
17743 Literal::Timestamp("epoch".to_string()),
17744 ));
17745 let interval = Expression::Interval(Box::new(
17746 crate::expressions::Interval {
17747 this: Some(Expression::string("1 SECOND")),
17748 unit: None,
17749 },
17750 ));
17751 let mul =
17752 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
17753 let add =
17754 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
17755 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
17756 this: add,
17757 trailing_comments: Vec::new(),
17758 })))
17759 }
17760 _ => Ok(Expression::Function(f)),
17761 }
17762 }
17763 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
17764 "FROM_UNIXTIME"
17765 if f.args.len() == 2
17766 && matches!(
17767 source,
17768 DialectType::Hive
17769 | DialectType::Spark
17770 | DialectType::Databricks
17771 ) =>
17772 {
17773 let mut args = f.args;
17774 let unix_ts = args.remove(0);
17775 let fmt_expr = args.remove(0);
17776 match target {
17777 DialectType::DuckDB => {
17778 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
17779 let to_ts = Expression::Function(Box::new(Function::new(
17780 "TO_TIMESTAMP".to_string(),
17781 vec![unix_ts],
17782 )));
17783 if let Expression::Literal(lit) = &fmt_expr {
17784 if let crate::expressions::Literal::String(s) =
17785 lit.as_ref()
17786 {
17787 let c_fmt = Self::hive_format_to_c_format(s);
17788 Ok(Expression::Function(Box::new(Function::new(
17789 "STRFTIME".to_string(),
17790 vec![to_ts, Expression::string(&c_fmt)],
17791 ))))
17792 } else {
17793 Ok(Expression::Function(Box::new(Function::new(
17794 "STRFTIME".to_string(),
17795 vec![to_ts, fmt_expr],
17796 ))))
17797 }
17798 } else {
17799 Ok(Expression::Function(Box::new(Function::new(
17800 "STRFTIME".to_string(),
17801 vec![to_ts, fmt_expr],
17802 ))))
17803 }
17804 }
17805 DialectType::Presto
17806 | DialectType::Trino
17807 | DialectType::Athena => {
17808 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
17809 let from_unix =
17810 Expression::Function(Box::new(Function::new(
17811 "FROM_UNIXTIME".to_string(),
17812 vec![unix_ts],
17813 )));
17814 if let Expression::Literal(lit) = &fmt_expr {
17815 if let crate::expressions::Literal::String(s) =
17816 lit.as_ref()
17817 {
17818 let p_fmt = Self::hive_format_to_presto_format(s);
17819 Ok(Expression::Function(Box::new(Function::new(
17820 "DATE_FORMAT".to_string(),
17821 vec![from_unix, Expression::string(&p_fmt)],
17822 ))))
17823 } else {
17824 Ok(Expression::Function(Box::new(Function::new(
17825 "DATE_FORMAT".to_string(),
17826 vec![from_unix, fmt_expr],
17827 ))))
17828 }
17829 } else {
17830 Ok(Expression::Function(Box::new(Function::new(
17831 "DATE_FORMAT".to_string(),
17832 vec![from_unix, fmt_expr],
17833 ))))
17834 }
17835 }
17836 _ => {
17837 // Keep as FROM_UNIXTIME(x, fmt) for other targets
17838 Ok(Expression::Function(Box::new(Function::new(
17839 "FROM_UNIXTIME".to_string(),
17840 vec![unix_ts, fmt_expr],
17841 ))))
17842 }
17843 }
17844 }
17845 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
17846 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
17847 let unit_str = Self::get_unit_str_static(&f.args[0]);
17848 // Get the raw unit text preserving original case
17849 let raw_unit = match &f.args[0] {
17850 Expression::Identifier(id) => id.name.clone(),
17851 Expression::Var(v) => v.this.clone(),
17852 Expression::Literal(lit)
17853 if matches!(
17854 lit.as_ref(),
17855 crate::expressions::Literal::String(_)
17856 ) =>
17857 {
17858 let crate::expressions::Literal::String(s) = lit.as_ref()
17859 else {
17860 unreachable!()
17861 };
17862 s.clone()
17863 }
17864 Expression::Column(col) => col.name.name.clone(),
17865 _ => unit_str.clone(),
17866 };
17867 match target {
17868 DialectType::TSQL | DialectType::Fabric => {
17869 // Preserve original case of unit for TSQL
17870 let unit_name = match unit_str.as_str() {
17871 "YY" | "YYYY" => "YEAR".to_string(),
17872 "QQ" | "Q" => "QUARTER".to_string(),
17873 "MM" | "M" => "MONTH".to_string(),
17874 "WK" | "WW" => "WEEK".to_string(),
17875 "DD" | "D" | "DY" => "DAY".to_string(),
17876 "HH" => "HOUR".to_string(),
17877 "MI" | "N" => "MINUTE".to_string(),
17878 "SS" | "S" => "SECOND".to_string(),
17879 _ => raw_unit.clone(), // preserve original case
17880 };
17881 let mut args = f.args;
17882 args[0] =
17883 Expression::Identifier(Identifier::new(&unit_name));
17884 Ok(Expression::Function(Box::new(Function::new(
17885 "DATEPART".to_string(),
17886 args,
17887 ))))
17888 }
17889 DialectType::Spark | DialectType::Databricks => {
17890 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
17891 // Preserve original case for non-abbreviation units
17892 let unit = match unit_str.as_str() {
17893 "YY" | "YYYY" => "YEAR".to_string(),
17894 "QQ" | "Q" => "QUARTER".to_string(),
17895 "MM" | "M" => "MONTH".to_string(),
17896 "WK" | "WW" => "WEEK".to_string(),
17897 "DD" | "D" | "DY" => "DAY".to_string(),
17898 "HH" => "HOUR".to_string(),
17899 "MI" | "N" => "MINUTE".to_string(),
17900 "SS" | "S" => "SECOND".to_string(),
17901 _ => raw_unit, // preserve original case
17902 };
17903 Ok(Expression::Extract(Box::new(
17904 crate::expressions::ExtractFunc {
17905 this: f.args[1].clone(),
17906 field: crate::expressions::DateTimeField::Custom(
17907 unit,
17908 ),
17909 },
17910 )))
17911 }
17912 _ => Ok(Expression::Function(Box::new(Function::new(
17913 "DATE_PART".to_string(),
17914 f.args,
17915 )))),
17916 }
17917 }
17918 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
17919 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
17920 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
17921 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
17922 "DATENAME" if f.args.len() == 2 => {
17923 let unit_str = Self::get_unit_str_static(&f.args[0]);
17924 let date_expr = f.args[1].clone();
17925 match unit_str.as_str() {
17926 "MM" | "M" | "MONTH" => match target {
17927 DialectType::TSQL => {
17928 let cast_date = Expression::Cast(Box::new(
17929 crate::expressions::Cast {
17930 this: date_expr,
17931 to: DataType::Custom {
17932 name: "DATETIME2".to_string(),
17933 },
17934 trailing_comments: Vec::new(),
17935 double_colon_syntax: false,
17936 format: None,
17937 default: None,
17938 inferred_type: None,
17939 },
17940 ));
17941 Ok(Expression::Function(Box::new(Function::new(
17942 "FORMAT".to_string(),
17943 vec![cast_date, Expression::string("MMMM")],
17944 ))))
17945 }
17946 DialectType::Spark | DialectType::Databricks => {
17947 let cast_date = Expression::Cast(Box::new(
17948 crate::expressions::Cast {
17949 this: date_expr,
17950 to: DataType::Timestamp {
17951 timezone: false,
17952 precision: None,
17953 },
17954 trailing_comments: Vec::new(),
17955 double_colon_syntax: false,
17956 format: None,
17957 default: None,
17958 inferred_type: None,
17959 },
17960 ));
17961 Ok(Expression::Function(Box::new(Function::new(
17962 "DATE_FORMAT".to_string(),
17963 vec![cast_date, Expression::string("MMMM")],
17964 ))))
17965 }
17966 _ => Ok(Expression::Function(f)),
17967 },
17968 "DW" | "WEEKDAY" => match target {
17969 DialectType::TSQL => {
17970 let cast_date = Expression::Cast(Box::new(
17971 crate::expressions::Cast {
17972 this: date_expr,
17973 to: DataType::Custom {
17974 name: "DATETIME2".to_string(),
17975 },
17976 trailing_comments: Vec::new(),
17977 double_colon_syntax: false,
17978 format: None,
17979 default: None,
17980 inferred_type: None,
17981 },
17982 ));
17983 Ok(Expression::Function(Box::new(Function::new(
17984 "FORMAT".to_string(),
17985 vec![cast_date, Expression::string("dddd")],
17986 ))))
17987 }
17988 DialectType::Spark | DialectType::Databricks => {
17989 let cast_date = Expression::Cast(Box::new(
17990 crate::expressions::Cast {
17991 this: date_expr,
17992 to: DataType::Timestamp {
17993 timezone: false,
17994 precision: None,
17995 },
17996 trailing_comments: Vec::new(),
17997 double_colon_syntax: false,
17998 format: None,
17999 default: None,
18000 inferred_type: None,
18001 },
18002 ));
18003 Ok(Expression::Function(Box::new(Function::new(
18004 "DATE_FORMAT".to_string(),
18005 vec![cast_date, Expression::string("EEEE")],
18006 ))))
18007 }
18008 _ => Ok(Expression::Function(f)),
18009 },
18010 _ => Ok(Expression::Function(f)),
18011 }
18012 }
18013 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
18014 "STRING_AGG" if f.args.len() >= 2 => {
18015 let x = f.args[0].clone();
18016 let sep = f.args[1].clone();
18017 match target {
18018 DialectType::MySQL
18019 | DialectType::SingleStore
18020 | DialectType::Doris
18021 | DialectType::StarRocks => Ok(Expression::GroupConcat(
18022 Box::new(crate::expressions::GroupConcatFunc {
18023 this: x,
18024 separator: Some(sep),
18025 order_by: None,
18026 distinct: false,
18027 filter: None,
18028 limit: None,
18029 inferred_type: None,
18030 }),
18031 )),
18032 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
18033 crate::expressions::GroupConcatFunc {
18034 this: x,
18035 separator: Some(sep),
18036 order_by: None,
18037 distinct: false,
18038 filter: None,
18039 limit: None,
18040 inferred_type: None,
18041 },
18042 ))),
18043 DialectType::PostgreSQL | DialectType::Redshift => {
18044 Ok(Expression::StringAgg(Box::new(
18045 crate::expressions::StringAggFunc {
18046 this: x,
18047 separator: Some(sep),
18048 order_by: None,
18049 distinct: false,
18050 filter: None,
18051 limit: None,
18052 inferred_type: None,
18053 },
18054 )))
18055 }
18056 _ => Ok(Expression::Function(f)),
18057 }
18058 }
18059 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
18060 "JSON_ARRAYAGG" => match target {
18061 DialectType::PostgreSQL => {
18062 Ok(Expression::Function(Box::new(Function {
18063 name: "JSON_AGG".to_string(),
18064 ..(*f)
18065 })))
18066 }
18067 _ => Ok(Expression::Function(f)),
18068 },
18069 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
18070 "SCHEMA_NAME" => match target {
18071 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
18072 crate::expressions::CurrentSchema { this: None },
18073 ))),
18074 DialectType::SQLite => Ok(Expression::string("main")),
18075 _ => Ok(Expression::Function(f)),
18076 },
18077 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
18078 "TO_TIMESTAMP"
18079 if f.args.len() == 2
18080 && matches!(
18081 source,
18082 DialectType::Spark
18083 | DialectType::Databricks
18084 | DialectType::Hive
18085 )
18086 && matches!(target, DialectType::DuckDB) =>
18087 {
18088 let mut args = f.args;
18089 let val = args.remove(0);
18090 let fmt_expr = args.remove(0);
18091 if let Expression::Literal(ref lit) = fmt_expr {
18092 if let Literal::String(ref s) = lit.as_ref() {
18093 // Convert Java/Spark format to C strptime format
18094 fn java_to_c_fmt(fmt: &str) -> String {
18095 let result = fmt
18096 .replace("yyyy", "%Y")
18097 .replace("SSSSSS", "%f")
18098 .replace("EEEE", "%W")
18099 .replace("MM", "%m")
18100 .replace("dd", "%d")
18101 .replace("HH", "%H")
18102 .replace("mm", "%M")
18103 .replace("ss", "%S")
18104 .replace("yy", "%y");
18105 let mut out = String::new();
18106 let chars: Vec<char> = result.chars().collect();
18107 let mut i = 0;
18108 while i < chars.len() {
18109 if chars[i] == '%' && i + 1 < chars.len() {
18110 out.push(chars[i]);
18111 out.push(chars[i + 1]);
18112 i += 2;
18113 } else if chars[i] == 'z' {
18114 out.push_str("%Z");
18115 i += 1;
18116 } else if chars[i] == 'Z' {
18117 out.push_str("%z");
18118 i += 1;
18119 } else {
18120 out.push(chars[i]);
18121 i += 1;
18122 }
18123 }
18124 out
18125 }
18126 let c_fmt = java_to_c_fmt(s);
18127 Ok(Expression::Function(Box::new(Function::new(
18128 "STRPTIME".to_string(),
18129 vec![val, Expression::string(&c_fmt)],
18130 ))))
18131 } else {
18132 Ok(Expression::Function(Box::new(Function::new(
18133 "STRPTIME".to_string(),
18134 vec![val, fmt_expr],
18135 ))))
18136 }
18137 } else {
18138 Ok(Expression::Function(Box::new(Function::new(
18139 "STRPTIME".to_string(),
18140 vec![val, fmt_expr],
18141 ))))
18142 }
18143 }
18144 // TO_DATE(x) 1-arg from Doris: date conversion
18145 "TO_DATE"
18146 if f.args.len() == 1
18147 && matches!(
18148 source,
18149 DialectType::Doris | DialectType::StarRocks
18150 ) =>
18151 {
18152 let arg = f.args.into_iter().next().unwrap();
18153 match target {
18154 DialectType::Oracle
18155 | DialectType::DuckDB
18156 | DialectType::TSQL => {
18157 // CAST(x AS DATE)
18158 Ok(Expression::Cast(Box::new(Cast {
18159 this: arg,
18160 to: DataType::Date,
18161 double_colon_syntax: false,
18162 trailing_comments: vec![],
18163 format: None,
18164 default: None,
18165 inferred_type: None,
18166 })))
18167 }
18168 DialectType::MySQL | DialectType::SingleStore => {
18169 // DATE(x)
18170 Ok(Expression::Function(Box::new(Function::new(
18171 "DATE".to_string(),
18172 vec![arg],
18173 ))))
18174 }
18175 _ => {
18176 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
18177 Ok(Expression::Function(Box::new(Function::new(
18178 "TO_DATE".to_string(),
18179 vec![arg],
18180 ))))
18181 }
18182 }
18183 }
18184 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
18185 "TO_DATE"
18186 if f.args.len() == 1
18187 && matches!(
18188 source,
18189 DialectType::Spark
18190 | DialectType::Databricks
18191 | DialectType::Hive
18192 ) =>
18193 {
18194 let arg = f.args.into_iter().next().unwrap();
18195 match target {
18196 DialectType::DuckDB => {
18197 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
18198 Ok(Expression::TryCast(Box::new(Cast {
18199 this: arg,
18200 to: DataType::Date,
18201 double_colon_syntax: false,
18202 trailing_comments: vec![],
18203 format: None,
18204 default: None,
18205 inferred_type: None,
18206 })))
18207 }
18208 DialectType::Presto
18209 | DialectType::Trino
18210 | DialectType::Athena => {
18211 // CAST(CAST(x AS TIMESTAMP) AS DATE)
18212 Ok(Self::double_cast_timestamp_date(arg))
18213 }
18214 DialectType::Snowflake => {
18215 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
18216 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
18217 Ok(Expression::Function(Box::new(Function::new(
18218 "TRY_TO_DATE".to_string(),
18219 vec![arg, Expression::string("yyyy-mm-DD")],
18220 ))))
18221 }
18222 _ => {
18223 // Default: keep as TO_DATE(x)
18224 Ok(Expression::Function(Box::new(Function::new(
18225 "TO_DATE".to_string(),
18226 vec![arg],
18227 ))))
18228 }
18229 }
18230 }
18231 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
18232 "TO_DATE"
18233 if f.args.len() == 2
18234 && matches!(
18235 source,
18236 DialectType::Spark
18237 | DialectType::Databricks
18238 | DialectType::Hive
18239 ) =>
18240 {
18241 let mut args = f.args;
18242 let val = args.remove(0);
18243 let fmt_expr = args.remove(0);
18244 let is_default_format = matches!(&fmt_expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "yyyy-MM-dd"));
18245
18246 if is_default_format {
18247 // Default format: same as 1-arg form
18248 match target {
18249 DialectType::DuckDB => {
18250 Ok(Expression::TryCast(Box::new(Cast {
18251 this: val,
18252 to: DataType::Date,
18253 double_colon_syntax: false,
18254 trailing_comments: vec![],
18255 format: None,
18256 default: None,
18257 inferred_type: None,
18258 })))
18259 }
18260 DialectType::Presto
18261 | DialectType::Trino
18262 | DialectType::Athena => {
18263 Ok(Self::double_cast_timestamp_date(val))
18264 }
18265 DialectType::Snowflake => {
18266 // TRY_TO_DATE(x, format) with Snowflake format mapping
18267 let sf_fmt = "yyyy-MM-dd"
18268 .replace("yyyy", "yyyy")
18269 .replace("MM", "mm")
18270 .replace("dd", "DD");
18271 Ok(Expression::Function(Box::new(Function::new(
18272 "TRY_TO_DATE".to_string(),
18273 vec![val, Expression::string(&sf_fmt)],
18274 ))))
18275 }
18276 _ => Ok(Expression::Function(Box::new(Function::new(
18277 "TO_DATE".to_string(),
18278 vec![val],
18279 )))),
18280 }
18281 } else {
18282 // Non-default format: use format-based parsing
18283 if let Expression::Literal(ref lit) = fmt_expr {
18284 if let Literal::String(ref s) = lit.as_ref() {
18285 match target {
18286 DialectType::DuckDB => {
18287 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
18288 fn java_to_c_fmt_todate(fmt: &str) -> String {
18289 let result = fmt
18290 .replace("yyyy", "%Y")
18291 .replace("SSSSSS", "%f")
18292 .replace("EEEE", "%W")
18293 .replace("MM", "%m")
18294 .replace("dd", "%d")
18295 .replace("HH", "%H")
18296 .replace("mm", "%M")
18297 .replace("ss", "%S")
18298 .replace("yy", "%y");
18299 let mut out = String::new();
18300 let chars: Vec<char> =
18301 result.chars().collect();
18302 let mut i = 0;
18303 while i < chars.len() {
18304 if chars[i] == '%'
18305 && i + 1 < chars.len()
18306 {
18307 out.push(chars[i]);
18308 out.push(chars[i + 1]);
18309 i += 2;
18310 } else if chars[i] == 'z' {
18311 out.push_str("%Z");
18312 i += 1;
18313 } else if chars[i] == 'Z' {
18314 out.push_str("%z");
18315 i += 1;
18316 } else {
18317 out.push(chars[i]);
18318 i += 1;
18319 }
18320 }
18321 out
18322 }
18323 let c_fmt = java_to_c_fmt_todate(s);
18324 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
18325 let try_strptime = Expression::Function(
18326 Box::new(Function::new(
18327 "TRY_STRPTIME".to_string(),
18328 vec![val, Expression::string(&c_fmt)],
18329 )),
18330 );
18331 let cast_ts =
18332 Expression::Cast(Box::new(Cast {
18333 this: try_strptime,
18334 to: DataType::Timestamp {
18335 precision: None,
18336 timezone: false,
18337 },
18338 double_colon_syntax: false,
18339 trailing_comments: vec![],
18340 format: None,
18341 default: None,
18342 inferred_type: None,
18343 }));
18344 Ok(Expression::Cast(Box::new(Cast {
18345 this: cast_ts,
18346 to: DataType::Date,
18347 double_colon_syntax: false,
18348 trailing_comments: vec![],
18349 format: None,
18350 default: None,
18351 inferred_type: None,
18352 })))
18353 }
18354 DialectType::Presto
18355 | DialectType::Trino
18356 | DialectType::Athena => {
18357 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
18358 let p_fmt = s
18359 .replace("yyyy", "%Y")
18360 .replace("SSSSSS", "%f")
18361 .replace("MM", "%m")
18362 .replace("dd", "%d")
18363 .replace("HH", "%H")
18364 .replace("mm", "%M")
18365 .replace("ss", "%S")
18366 .replace("yy", "%y");
18367 let date_parse = Expression::Function(
18368 Box::new(Function::new(
18369 "DATE_PARSE".to_string(),
18370 vec![val, Expression::string(&p_fmt)],
18371 )),
18372 );
18373 Ok(Expression::Cast(Box::new(Cast {
18374 this: date_parse,
18375 to: DataType::Date,
18376 double_colon_syntax: false,
18377 trailing_comments: vec![],
18378 format: None,
18379 default: None,
18380 inferred_type: None,
18381 })))
18382 }
18383 DialectType::Snowflake => {
18384 // TRY_TO_DATE(x, snowflake_fmt)
18385 Ok(Expression::Function(Box::new(
18386 Function::new(
18387 "TRY_TO_DATE".to_string(),
18388 vec![val, Expression::string(s)],
18389 ),
18390 )))
18391 }
18392 _ => Ok(Expression::Function(Box::new(
18393 Function::new(
18394 "TO_DATE".to_string(),
18395 vec![val, fmt_expr],
18396 ),
18397 ))),
18398 }
18399 } else {
18400 Ok(Expression::Function(Box::new(Function::new(
18401 "TO_DATE".to_string(),
18402 vec![val, fmt_expr],
18403 ))))
18404 }
18405 } else {
18406 Ok(Expression::Function(Box::new(Function::new(
18407 "TO_DATE".to_string(),
18408 vec![val, fmt_expr],
18409 ))))
18410 }
18411 }
18412 }
18413 // TO_TIMESTAMP(x) 1-arg: epoch conversion
18414 "TO_TIMESTAMP"
18415 if f.args.len() == 1
18416 && matches!(source, DialectType::DuckDB)
18417 && matches!(
18418 target,
18419 DialectType::BigQuery
18420 | DialectType::Presto
18421 | DialectType::Trino
18422 | DialectType::Hive
18423 | DialectType::Spark
18424 | DialectType::Databricks
18425 | DialectType::Athena
18426 ) =>
18427 {
18428 let arg = f.args.into_iter().next().unwrap();
18429 let func_name = match target {
18430 DialectType::BigQuery => "TIMESTAMP_SECONDS",
18431 DialectType::Presto
18432 | DialectType::Trino
18433 | DialectType::Athena
18434 | DialectType::Hive
18435 | DialectType::Spark
18436 | DialectType::Databricks => "FROM_UNIXTIME",
18437 _ => "TO_TIMESTAMP",
18438 };
18439 Ok(Expression::Function(Box::new(Function::new(
18440 func_name.to_string(),
18441 vec![arg],
18442 ))))
18443 }
18444 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
18445 "CONCAT" if f.args.len() == 1 => {
18446 let arg = f.args.into_iter().next().unwrap();
18447 match target {
18448 DialectType::Presto
18449 | DialectType::Trino
18450 | DialectType::Athena => {
18451 // CONCAT(a) -> CAST(a AS VARCHAR)
18452 Ok(Expression::Cast(Box::new(Cast {
18453 this: arg,
18454 to: DataType::VarChar {
18455 length: None,
18456 parenthesized_length: false,
18457 },
18458 trailing_comments: vec![],
18459 double_colon_syntax: false,
18460 format: None,
18461 default: None,
18462 inferred_type: None,
18463 })))
18464 }
18465 DialectType::TSQL => {
18466 // CONCAT(a) -> a
18467 Ok(arg)
18468 }
18469 DialectType::DuckDB => {
18470 // Keep CONCAT(a) for DuckDB (native support)
18471 Ok(Expression::Function(Box::new(Function::new(
18472 "CONCAT".to_string(),
18473 vec![arg],
18474 ))))
18475 }
18476 DialectType::Spark | DialectType::Databricks => {
18477 let coalesced = Expression::Coalesce(Box::new(
18478 crate::expressions::VarArgFunc {
18479 expressions: vec![arg, Expression::string("")],
18480 original_name: None,
18481 inferred_type: None,
18482 },
18483 ));
18484 Ok(Expression::Function(Box::new(Function::new(
18485 "CONCAT".to_string(),
18486 vec![coalesced],
18487 ))))
18488 }
18489 _ => Ok(Expression::Function(Box::new(Function::new(
18490 "CONCAT".to_string(),
18491 vec![arg],
18492 )))),
18493 }
18494 }
18495 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
18496 "REGEXP_EXTRACT"
18497 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
18498 {
18499 // If group_index is 0, drop it
18500 let drop_group = match &f.args[2] {
18501 Expression::Literal(lit)
18502 if matches!(lit.as_ref(), Literal::Number(_)) =>
18503 {
18504 let Literal::Number(n) = lit.as_ref() else {
18505 unreachable!()
18506 };
18507 n == "0"
18508 }
18509 _ => false,
18510 };
18511 if drop_group {
18512 let mut args = f.args;
18513 args.truncate(2);
18514 Ok(Expression::Function(Box::new(Function::new(
18515 "REGEXP_EXTRACT".to_string(),
18516 args,
18517 ))))
18518 } else {
18519 Ok(Expression::Function(f))
18520 }
18521 }
18522 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
18523 "REGEXP_EXTRACT"
18524 if f.args.len() == 4
18525 && matches!(target, DialectType::Snowflake) =>
18526 {
18527 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
18528 let mut args = f.args;
18529 let this = args.remove(0);
18530 let pattern = args.remove(0);
18531 let group = args.remove(0);
18532 let flags = args.remove(0);
18533 Ok(Expression::Function(Box::new(Function::new(
18534 "REGEXP_SUBSTR".to_string(),
18535 vec![
18536 this,
18537 pattern,
18538 Expression::number(1),
18539 Expression::number(1),
18540 flags,
18541 group,
18542 ],
18543 ))))
18544 }
18545 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
18546 "REGEXP_SUBSTR"
18547 if f.args.len() == 3
18548 && matches!(
18549 target,
18550 DialectType::DuckDB
18551 | DialectType::Presto
18552 | DialectType::Trino
18553 | DialectType::Spark
18554 | DialectType::Databricks
18555 ) =>
18556 {
18557 let mut args = f.args;
18558 let this = args.remove(0);
18559 let pattern = args.remove(0);
18560 let position = args.remove(0);
18561 // Wrap subject in SUBSTRING(this, position) to apply the offset
18562 let substring_expr = Expression::Function(Box::new(Function::new(
18563 "SUBSTRING".to_string(),
18564 vec![this, position],
18565 )));
18566 let target_name = match target {
18567 DialectType::DuckDB => "REGEXP_EXTRACT",
18568 _ => "REGEXP_EXTRACT",
18569 };
18570 Ok(Expression::Function(Box::new(Function::new(
18571 target_name.to_string(),
18572 vec![substring_expr, pattern],
18573 ))))
18574 }
18575 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
18576 "TO_DAYS" if f.args.len() == 1 => {
18577 let x = f.args.into_iter().next().unwrap();
18578 let epoch = Expression::string("0000-01-01");
18579 // Build the final target-specific expression directly
18580 let datediff_expr = match target {
18581 DialectType::MySQL | DialectType::SingleStore => {
18582 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
18583 Expression::Function(Box::new(Function::new(
18584 "DATEDIFF".to_string(),
18585 vec![x, epoch],
18586 )))
18587 }
18588 DialectType::DuckDB => {
18589 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
18590 let cast_epoch = Expression::Cast(Box::new(Cast {
18591 this: epoch,
18592 to: DataType::Date,
18593 trailing_comments: Vec::new(),
18594 double_colon_syntax: false,
18595 format: None,
18596 default: None,
18597 inferred_type: None,
18598 }));
18599 let cast_x = Expression::Cast(Box::new(Cast {
18600 this: x,
18601 to: DataType::Date,
18602 trailing_comments: Vec::new(),
18603 double_colon_syntax: false,
18604 format: None,
18605 default: None,
18606 inferred_type: None,
18607 }));
18608 Expression::Function(Box::new(Function::new(
18609 "DATE_DIFF".to_string(),
18610 vec![Expression::string("DAY"), cast_epoch, cast_x],
18611 )))
18612 }
18613 DialectType::Presto
18614 | DialectType::Trino
18615 | DialectType::Athena => {
18616 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
18617 let cast_epoch = Self::double_cast_timestamp_date(epoch);
18618 let cast_x = Self::double_cast_timestamp_date(x);
18619 Expression::Function(Box::new(Function::new(
18620 "DATE_DIFF".to_string(),
18621 vec![Expression::string("DAY"), cast_epoch, cast_x],
18622 )))
18623 }
18624 _ => {
18625 // Default: (DATEDIFF(x, '0000-01-01') + 1)
18626 Expression::Function(Box::new(Function::new(
18627 "DATEDIFF".to_string(),
18628 vec![x, epoch],
18629 )))
18630 }
18631 };
18632 let add_one = Expression::Add(Box::new(BinaryOp::new(
18633 datediff_expr,
18634 Expression::number(1),
18635 )));
18636 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
18637 this: add_one,
18638 trailing_comments: Vec::new(),
18639 })))
18640 }
18641 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
18642 "STR_TO_DATE"
18643 if f.args.len() == 2
18644 && matches!(
18645 target,
18646 DialectType::Presto | DialectType::Trino
18647 ) =>
18648 {
18649 let mut args = f.args;
18650 let x = args.remove(0);
18651 let format_expr = args.remove(0);
18652 // Check if the format contains time components
18653 let has_time = if let Expression::Literal(ref lit) = format_expr {
18654 if let Literal::String(ref fmt) = lit.as_ref() {
18655 fmt.contains("%H")
18656 || fmt.contains("%T")
18657 || fmt.contains("%M")
18658 || fmt.contains("%S")
18659 || fmt.contains("%I")
18660 || fmt.contains("%p")
18661 } else {
18662 false
18663 }
18664 } else {
18665 false
18666 };
18667 let date_parse = Expression::Function(Box::new(Function::new(
18668 "DATE_PARSE".to_string(),
18669 vec![x, format_expr],
18670 )));
18671 if has_time {
18672 // Has time components: just DATE_PARSE
18673 Ok(date_parse)
18674 } else {
18675 // Date-only: CAST(DATE_PARSE(...) AS DATE)
18676 Ok(Expression::Cast(Box::new(Cast {
18677 this: date_parse,
18678 to: DataType::Date,
18679 trailing_comments: Vec::new(),
18680 double_colon_syntax: false,
18681 format: None,
18682 default: None,
18683 inferred_type: None,
18684 })))
18685 }
18686 }
18687 "STR_TO_DATE"
18688 if f.args.len() == 2
18689 && matches!(
18690 target,
18691 DialectType::PostgreSQL | DialectType::Redshift
18692 ) =>
18693 {
18694 let mut args = f.args;
18695 let x = args.remove(0);
18696 let fmt = args.remove(0);
18697 let pg_fmt = match fmt {
18698 Expression::Literal(lit)
18699 if matches!(lit.as_ref(), Literal::String(_)) =>
18700 {
18701 let Literal::String(s) = lit.as_ref() else {
18702 unreachable!()
18703 };
18704 Expression::string(
18705 &s.replace("%Y", "YYYY")
18706 .replace("%m", "MM")
18707 .replace("%d", "DD")
18708 .replace("%H", "HH24")
18709 .replace("%M", "MI")
18710 .replace("%S", "SS"),
18711 )
18712 }
18713 other => other,
18714 };
18715 let to_date = Expression::Function(Box::new(Function::new(
18716 "TO_DATE".to_string(),
18717 vec![x, pg_fmt],
18718 )));
18719 Ok(Expression::Cast(Box::new(Cast {
18720 this: to_date,
18721 to: DataType::Timestamp {
18722 timezone: false,
18723 precision: None,
18724 },
18725 trailing_comments: Vec::new(),
18726 double_colon_syntax: false,
18727 format: None,
18728 default: None,
18729 inferred_type: None,
18730 })))
18731 }
18732 // RANGE(start, end) -> GENERATE_SERIES for SQLite
18733 "RANGE"
18734 if (f.args.len() == 1 || f.args.len() == 2)
18735 && matches!(target, DialectType::SQLite) =>
18736 {
18737 if f.args.len() == 2 {
18738 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
18739 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
18740 let mut args = f.args;
18741 let start = args.remove(0);
18742 let end = args.remove(0);
18743 Ok(Expression::Function(Box::new(Function::new(
18744 "GENERATE_SERIES".to_string(),
18745 vec![start, end],
18746 ))))
18747 } else {
18748 Ok(Expression::Function(f))
18749 }
18750 }
18751 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
18752 // When source is Snowflake, keep as-is (args already in correct form)
18753 "UNIFORM"
18754 if matches!(target, DialectType::Snowflake)
18755 && (f.args.len() == 2 || f.args.len() == 3) =>
18756 {
18757 if matches!(source, DialectType::Snowflake) {
18758 // Snowflake -> Snowflake: keep as-is
18759 Ok(Expression::Function(f))
18760 } else {
18761 let mut args = f.args;
18762 let low = args.remove(0);
18763 let high = args.remove(0);
18764 let random = if !args.is_empty() {
18765 let seed = args.remove(0);
18766 Expression::Function(Box::new(Function::new(
18767 "RANDOM".to_string(),
18768 vec![seed],
18769 )))
18770 } else {
18771 Expression::Function(Box::new(Function::new(
18772 "RANDOM".to_string(),
18773 vec![],
18774 )))
18775 };
18776 Ok(Expression::Function(Box::new(Function::new(
18777 "UNIFORM".to_string(),
18778 vec![low, high, random],
18779 ))))
18780 }
18781 }
18782 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
18783 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
18784 let mut args = f.args;
18785 let ts_arg = args.remove(0);
18786 let tz_arg = args.remove(0);
18787 // Cast string literal to TIMESTAMP for all targets
18788 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
18789 {
18790 Expression::Cast(Box::new(Cast {
18791 this: ts_arg,
18792 to: DataType::Timestamp {
18793 timezone: false,
18794 precision: None,
18795 },
18796 trailing_comments: vec![],
18797 double_colon_syntax: false,
18798 format: None,
18799 default: None,
18800 inferred_type: None,
18801 }))
18802 } else {
18803 ts_arg
18804 };
18805 match target {
18806 DialectType::Spark | DialectType::Databricks => {
18807 Ok(Expression::Function(Box::new(Function::new(
18808 "TO_UTC_TIMESTAMP".to_string(),
18809 vec![ts_cast, tz_arg],
18810 ))))
18811 }
18812 DialectType::Snowflake => {
18813 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
18814 Ok(Expression::Function(Box::new(Function::new(
18815 "CONVERT_TIMEZONE".to_string(),
18816 vec![tz_arg, Expression::string("UTC"), ts_cast],
18817 ))))
18818 }
18819 DialectType::Presto
18820 | DialectType::Trino
18821 | DialectType::Athena => {
18822 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
18823 let wtz = Expression::Function(Box::new(Function::new(
18824 "WITH_TIMEZONE".to_string(),
18825 vec![ts_cast, tz_arg],
18826 )));
18827 Ok(Expression::AtTimeZone(Box::new(
18828 crate::expressions::AtTimeZone {
18829 this: wtz,
18830 zone: Expression::string("UTC"),
18831 },
18832 )))
18833 }
18834 DialectType::BigQuery => {
18835 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
18836 let cast_dt = Expression::Cast(Box::new(Cast {
18837 this: if let Expression::Cast(c) = ts_cast {
18838 c.this
18839 } else {
18840 ts_cast.clone()
18841 },
18842 to: DataType::Custom {
18843 name: "DATETIME".to_string(),
18844 },
18845 trailing_comments: vec![],
18846 double_colon_syntax: false,
18847 format: None,
18848 default: None,
18849 inferred_type: None,
18850 }));
18851 let ts_func =
18852 Expression::Function(Box::new(Function::new(
18853 "TIMESTAMP".to_string(),
18854 vec![cast_dt, tz_arg],
18855 )));
18856 Ok(Expression::Function(Box::new(Function::new(
18857 "DATETIME".to_string(),
18858 vec![ts_func, Expression::string("UTC")],
18859 ))))
18860 }
18861 _ => {
18862 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
18863 let atz1 = Expression::AtTimeZone(Box::new(
18864 crate::expressions::AtTimeZone {
18865 this: ts_cast,
18866 zone: tz_arg,
18867 },
18868 ));
18869 Ok(Expression::AtTimeZone(Box::new(
18870 crate::expressions::AtTimeZone {
18871 this: atz1,
18872 zone: Expression::string("UTC"),
18873 },
18874 )))
18875 }
18876 }
18877 }
18878 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
18879 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
18880 let mut args = f.args;
18881 let ts_arg = args.remove(0);
18882 let tz_arg = args.remove(0);
18883 // Cast string literal to TIMESTAMP
18884 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
18885 {
18886 Expression::Cast(Box::new(Cast {
18887 this: ts_arg,
18888 to: DataType::Timestamp {
18889 timezone: false,
18890 precision: None,
18891 },
18892 trailing_comments: vec![],
18893 double_colon_syntax: false,
18894 format: None,
18895 default: None,
18896 inferred_type: None,
18897 }))
18898 } else {
18899 ts_arg
18900 };
18901 match target {
18902 DialectType::Spark | DialectType::Databricks => {
18903 Ok(Expression::Function(Box::new(Function::new(
18904 "FROM_UTC_TIMESTAMP".to_string(),
18905 vec![ts_cast, tz_arg],
18906 ))))
18907 }
18908 DialectType::Presto
18909 | DialectType::Trino
18910 | DialectType::Athena => {
18911 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
18912 Ok(Expression::Function(Box::new(Function::new(
18913 "AT_TIMEZONE".to_string(),
18914 vec![ts_cast, tz_arg],
18915 ))))
18916 }
18917 DialectType::Snowflake => {
18918 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
18919 Ok(Expression::Function(Box::new(Function::new(
18920 "CONVERT_TIMEZONE".to_string(),
18921 vec![Expression::string("UTC"), tz_arg, ts_cast],
18922 ))))
18923 }
18924 _ => {
18925 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
18926 Ok(Expression::AtTimeZone(Box::new(
18927 crate::expressions::AtTimeZone {
18928 this: ts_cast,
18929 zone: tz_arg,
18930 },
18931 )))
18932 }
18933 }
18934 }
18935 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
18936 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
18937 let name = match target {
18938 DialectType::Snowflake => "OBJECT_CONSTRUCT",
18939 _ => "MAP",
18940 };
18941 Ok(Expression::Function(Box::new(Function::new(
18942 name.to_string(),
18943 f.args,
18944 ))))
18945 }
18946 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
18947 "STR_TO_MAP" if f.args.len() >= 1 => match target {
18948 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18949 Ok(Expression::Function(Box::new(Function::new(
18950 "SPLIT_TO_MAP".to_string(),
18951 f.args,
18952 ))))
18953 }
18954 _ => Ok(Expression::Function(f)),
18955 },
18956 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
18957 "TIME_TO_STR" if f.args.len() == 2 => {
18958 let mut args = f.args;
18959 let this = args.remove(0);
18960 let fmt_expr = args.remove(0);
18961 let format = if let Expression::Literal(lit) = fmt_expr {
18962 if let Literal::String(s) = lit.as_ref() {
18963 s.clone()
18964 } else {
18965 String::new()
18966 }
18967 } else {
18968 "%Y-%m-%d %H:%M:%S".to_string()
18969 };
18970 Ok(Expression::TimeToStr(Box::new(
18971 crate::expressions::TimeToStr {
18972 this: Box::new(this),
18973 format,
18974 culture: None,
18975 zone: None,
18976 },
18977 )))
18978 }
18979 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
18980 "STR_TO_TIME" if f.args.len() == 2 => {
18981 let mut args = f.args;
18982 let this = args.remove(0);
18983 let fmt_expr = args.remove(0);
18984 let format = if let Expression::Literal(lit) = fmt_expr {
18985 if let Literal::String(s) = lit.as_ref() {
18986 s.clone()
18987 } else {
18988 String::new()
18989 }
18990 } else {
18991 "%Y-%m-%d %H:%M:%S".to_string()
18992 };
18993 Ok(Expression::StrToTime(Box::new(
18994 crate::expressions::StrToTime {
18995 this: Box::new(this),
18996 format,
18997 zone: None,
18998 safe: None,
18999 target_type: None,
19000 },
19001 )))
19002 }
19003 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
19004 "STR_TO_UNIX" if f.args.len() >= 1 => {
19005 let mut args = f.args;
19006 let this = args.remove(0);
19007 let format = if !args.is_empty() {
19008 if let Expression::Literal(lit) = args.remove(0) {
19009 if let Literal::String(s) = lit.as_ref() {
19010 Some(s.clone())
19011 } else {
19012 None
19013 }
19014 } else {
19015 None
19016 }
19017 } else {
19018 None
19019 };
19020 Ok(Expression::StrToUnix(Box::new(
19021 crate::expressions::StrToUnix {
19022 this: Some(Box::new(this)),
19023 format,
19024 },
19025 )))
19026 }
19027 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
19028 "TIME_TO_UNIX" if f.args.len() == 1 => {
19029 let mut args = f.args;
19030 let this = args.remove(0);
19031 Ok(Expression::TimeToUnix(Box::new(
19032 crate::expressions::UnaryFunc {
19033 this,
19034 original_name: None,
19035 inferred_type: None,
19036 },
19037 )))
19038 }
19039 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
19040 "UNIX_TO_STR" if f.args.len() >= 1 => {
19041 let mut args = f.args;
19042 let this = args.remove(0);
19043 let format = if !args.is_empty() {
19044 if let Expression::Literal(lit) = args.remove(0) {
19045 if let Literal::String(s) = lit.as_ref() {
19046 Some(s.clone())
19047 } else {
19048 None
19049 }
19050 } else {
19051 None
19052 }
19053 } else {
19054 None
19055 };
19056 Ok(Expression::UnixToStr(Box::new(
19057 crate::expressions::UnixToStr {
19058 this: Box::new(this),
19059 format,
19060 },
19061 )))
19062 }
19063 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
19064 "UNIX_TO_TIME" if f.args.len() == 1 => {
19065 let mut args = f.args;
19066 let this = args.remove(0);
19067 Ok(Expression::UnixToTime(Box::new(
19068 crate::expressions::UnixToTime {
19069 this: Box::new(this),
19070 scale: None,
19071 zone: None,
19072 hours: None,
19073 minutes: None,
19074 format: None,
19075 target_type: None,
19076 },
19077 )))
19078 }
19079 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
19080 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
19081 let mut args = f.args;
19082 let this = args.remove(0);
19083 Ok(Expression::TimeStrToDate(Box::new(
19084 crate::expressions::UnaryFunc {
19085 this,
19086 original_name: None,
19087 inferred_type: None,
19088 },
19089 )))
19090 }
19091 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
19092 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
19093 let mut args = f.args;
19094 let this = args.remove(0);
19095 Ok(Expression::TimeStrToTime(Box::new(
19096 crate::expressions::TimeStrToTime {
19097 this: Box::new(this),
19098 zone: None,
19099 },
19100 )))
19101 }
19102 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
19103 "MONTHS_BETWEEN" if f.args.len() == 2 => {
19104 match target {
19105 DialectType::DuckDB => {
19106 let mut args = f.args;
19107 let end_date = args.remove(0);
19108 let start_date = args.remove(0);
19109 let cast_end = Self::ensure_cast_date(end_date);
19110 let cast_start = Self::ensure_cast_date(start_date);
19111 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
19112 let dd = Expression::Function(Box::new(Function::new(
19113 "DATE_DIFF".to_string(),
19114 vec![
19115 Expression::string("MONTH"),
19116 cast_start.clone(),
19117 cast_end.clone(),
19118 ],
19119 )));
19120 let day_end =
19121 Expression::Function(Box::new(Function::new(
19122 "DAY".to_string(),
19123 vec![cast_end.clone()],
19124 )));
19125 let day_start =
19126 Expression::Function(Box::new(Function::new(
19127 "DAY".to_string(),
19128 vec![cast_start.clone()],
19129 )));
19130 let last_day_end =
19131 Expression::Function(Box::new(Function::new(
19132 "LAST_DAY".to_string(),
19133 vec![cast_end.clone()],
19134 )));
19135 let last_day_start =
19136 Expression::Function(Box::new(Function::new(
19137 "LAST_DAY".to_string(),
19138 vec![cast_start.clone()],
19139 )));
19140 let day_last_end = Expression::Function(Box::new(
19141 Function::new("DAY".to_string(), vec![last_day_end]),
19142 ));
19143 let day_last_start = Expression::Function(Box::new(
19144 Function::new("DAY".to_string(), vec![last_day_start]),
19145 ));
19146 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
19147 day_end.clone(),
19148 day_last_end,
19149 )));
19150 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
19151 day_start.clone(),
19152 day_last_start,
19153 )));
19154 let both_cond =
19155 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
19156 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
19157 day_end, day_start,
19158 )));
19159 let day_diff_paren = Expression::Paren(Box::new(
19160 crate::expressions::Paren {
19161 this: day_diff,
19162 trailing_comments: Vec::new(),
19163 },
19164 ));
19165 let frac = Expression::Div(Box::new(BinaryOp::new(
19166 day_diff_paren,
19167 Expression::Literal(Box::new(Literal::Number(
19168 "31.0".to_string(),
19169 ))),
19170 )));
19171 let case_expr = Expression::Case(Box::new(Case {
19172 operand: None,
19173 whens: vec![(both_cond, Expression::number(0))],
19174 else_: Some(frac),
19175 comments: Vec::new(),
19176 inferred_type: None,
19177 }));
19178 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
19179 }
19180 DialectType::Snowflake | DialectType::Redshift => {
19181 let mut args = f.args;
19182 let end_date = args.remove(0);
19183 let start_date = args.remove(0);
19184 let unit = Expression::Identifier(Identifier::new("MONTH"));
19185 Ok(Expression::Function(Box::new(Function::new(
19186 "DATEDIFF".to_string(),
19187 vec![unit, start_date, end_date],
19188 ))))
19189 }
19190 DialectType::Presto
19191 | DialectType::Trino
19192 | DialectType::Athena => {
19193 let mut args = f.args;
19194 let end_date = args.remove(0);
19195 let start_date = args.remove(0);
19196 Ok(Expression::Function(Box::new(Function::new(
19197 "DATE_DIFF".to_string(),
19198 vec![Expression::string("MONTH"), start_date, end_date],
19199 ))))
19200 }
19201 _ => Ok(Expression::Function(f)),
19202 }
19203 }
19204 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
19205 // Drop the roundOff arg for non-Spark targets, keep it for Spark
19206 "MONTHS_BETWEEN" if f.args.len() == 3 => {
19207 match target {
19208 DialectType::Spark | DialectType::Databricks => {
19209 Ok(Expression::Function(f))
19210 }
19211 _ => {
19212 // Drop the 3rd arg and delegate to the 2-arg logic
19213 let mut args = f.args;
19214 let end_date = args.remove(0);
19215 let start_date = args.remove(0);
19216 // Re-create as 2-arg and process
19217 let f2 = Function::new(
19218 "MONTHS_BETWEEN".to_string(),
19219 vec![end_date, start_date],
19220 );
19221 let e2 = Expression::Function(Box::new(f2));
19222 Self::cross_dialect_normalize(e2, source, target)
19223 }
19224 }
19225 }
19226 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
19227 "TO_TIMESTAMP"
19228 if f.args.len() == 1
19229 && matches!(
19230 source,
19231 DialectType::Spark
19232 | DialectType::Databricks
19233 | DialectType::Hive
19234 ) =>
19235 {
19236 let arg = f.args.into_iter().next().unwrap();
19237 Ok(Expression::Cast(Box::new(Cast {
19238 this: arg,
19239 to: DataType::Timestamp {
19240 timezone: false,
19241 precision: None,
19242 },
19243 trailing_comments: vec![],
19244 double_colon_syntax: false,
19245 format: None,
19246 default: None,
19247 inferred_type: None,
19248 })))
19249 }
19250 // STRING(x) -> CAST(x AS STRING) for Spark target
19251 "STRING"
19252 if f.args.len() == 1
19253 && matches!(
19254 source,
19255 DialectType::Spark | DialectType::Databricks
19256 ) =>
19257 {
19258 let arg = f.args.into_iter().next().unwrap();
19259 let dt = match target {
19260 DialectType::Spark
19261 | DialectType::Databricks
19262 | DialectType::Hive => DataType::Custom {
19263 name: "STRING".to_string(),
19264 },
19265 _ => DataType::Text,
19266 };
19267 Ok(Expression::Cast(Box::new(Cast {
19268 this: arg,
19269 to: dt,
19270 trailing_comments: vec![],
19271 double_colon_syntax: false,
19272 format: None,
19273 default: None,
19274 inferred_type: None,
19275 })))
19276 }
19277 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
19278 "LOGICAL_OR" if f.args.len() == 1 => {
19279 let name = match target {
19280 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
19281 _ => "LOGICAL_OR",
19282 };
19283 Ok(Expression::Function(Box::new(Function::new(
19284 name.to_string(),
19285 f.args,
19286 ))))
19287 }
19288 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
19289 "SPLIT"
19290 if f.args.len() == 2
19291 && matches!(
19292 source,
19293 DialectType::Spark
19294 | DialectType::Databricks
19295 | DialectType::Hive
19296 ) =>
19297 {
19298 let name = match target {
19299 DialectType::DuckDB => "STR_SPLIT_REGEX",
19300 DialectType::Presto
19301 | DialectType::Trino
19302 | DialectType::Athena => "REGEXP_SPLIT",
19303 DialectType::Spark
19304 | DialectType::Databricks
19305 | DialectType::Hive => "SPLIT",
19306 _ => "SPLIT",
19307 };
19308 Ok(Expression::Function(Box::new(Function::new(
19309 name.to_string(),
19310 f.args,
19311 ))))
19312 }
19313 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
19314 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
19315 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
19316 Ok(Expression::Function(Box::new(Function::new(
19317 "ELEMENT_AT".to_string(),
19318 f.args,
19319 ))))
19320 }
19321 DialectType::DuckDB => {
19322 let mut args = f.args;
19323 let arr = args.remove(0);
19324 let idx = args.remove(0);
19325 Ok(Expression::Subscript(Box::new(
19326 crate::expressions::Subscript {
19327 this: arr,
19328 index: idx,
19329 },
19330 )))
19331 }
19332 _ => Ok(Expression::Function(f)),
19333 },
19334 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
19335 "ARRAY_FILTER" if f.args.len() == 2 => {
19336 let name = match target {
19337 DialectType::DuckDB => "LIST_FILTER",
19338 DialectType::StarRocks => "ARRAY_FILTER",
19339 _ => "FILTER",
19340 };
19341 Ok(Expression::Function(Box::new(Function::new(
19342 name.to_string(),
19343 f.args,
19344 ))))
19345 }
19346 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
19347 "FILTER" if f.args.len() == 2 => {
19348 let name = match target {
19349 DialectType::DuckDB => "LIST_FILTER",
19350 DialectType::StarRocks => "ARRAY_FILTER",
19351 _ => "FILTER",
19352 };
19353 Ok(Expression::Function(Box::new(Function::new(
19354 name.to_string(),
19355 f.args,
19356 ))))
19357 }
19358 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
19359 "REDUCE" if f.args.len() >= 3 => {
19360 let name = match target {
19361 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
19362 _ => "REDUCE",
19363 };
19364 Ok(Expression::Function(Box::new(Function::new(
19365 name.to_string(),
19366 f.args,
19367 ))))
19368 }
19369 // CURRENT_SCHEMA() -> dialect-specific
19370 "CURRENT_SCHEMA" => {
19371 match target {
19372 DialectType::PostgreSQL => {
19373 // PostgreSQL: CURRENT_SCHEMA (no parens)
19374 Ok(Expression::Function(Box::new(Function {
19375 name: "CURRENT_SCHEMA".to_string(),
19376 args: vec![],
19377 distinct: false,
19378 trailing_comments: vec![],
19379 use_bracket_syntax: false,
19380 no_parens: true,
19381 quoted: false,
19382 span: None,
19383 inferred_type: None,
19384 })))
19385 }
19386 DialectType::MySQL
19387 | DialectType::Doris
19388 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
19389 Function::new("SCHEMA".to_string(), vec![]),
19390 ))),
19391 DialectType::TSQL => Ok(Expression::Function(Box::new(
19392 Function::new("SCHEMA_NAME".to_string(), vec![]),
19393 ))),
19394 DialectType::SQLite => Ok(Expression::Literal(Box::new(
19395 Literal::String("main".to_string()),
19396 ))),
19397 _ => Ok(Expression::Function(f)),
19398 }
19399 }
19400 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
19401 "LTRIM" if f.args.len() == 2 => match target {
19402 DialectType::Spark
19403 | DialectType::Hive
19404 | DialectType::Databricks
19405 | DialectType::ClickHouse => {
19406 let mut args = f.args;
19407 let str_expr = args.remove(0);
19408 let chars = args.remove(0);
19409 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
19410 this: str_expr,
19411 characters: Some(chars),
19412 position: crate::expressions::TrimPosition::Leading,
19413 sql_standard_syntax: true,
19414 position_explicit: true,
19415 })))
19416 }
19417 _ => Ok(Expression::Function(f)),
19418 },
19419 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
19420 "RTRIM" if f.args.len() == 2 => match target {
19421 DialectType::Spark
19422 | DialectType::Hive
19423 | DialectType::Databricks
19424 | DialectType::ClickHouse => {
19425 let mut args = f.args;
19426 let str_expr = args.remove(0);
19427 let chars = args.remove(0);
19428 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
19429 this: str_expr,
19430 characters: Some(chars),
19431 position: crate::expressions::TrimPosition::Trailing,
19432 sql_standard_syntax: true,
19433 position_explicit: true,
19434 })))
19435 }
19436 _ => Ok(Expression::Function(f)),
19437 },
19438 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
19439 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
19440 DialectType::ClickHouse => {
19441 let mut new_f = *f;
19442 new_f.name = "arrayReverse".to_string();
19443 Ok(Expression::Function(Box::new(new_f)))
19444 }
19445 _ => Ok(Expression::Function(f)),
19446 },
19447 // UUID() -> NEWID() for TSQL
19448 "UUID" if f.args.is_empty() => match target {
19449 DialectType::TSQL | DialectType::Fabric => {
19450 Ok(Expression::Function(Box::new(Function::new(
19451 "NEWID".to_string(),
19452 vec![],
19453 ))))
19454 }
19455 _ => Ok(Expression::Function(f)),
19456 },
19457 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
19458 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
19459 DialectType::ClickHouse => {
19460 let mut new_f = *f;
19461 new_f.name = "farmFingerprint64".to_string();
19462 Ok(Expression::Function(Box::new(new_f)))
19463 }
19464 DialectType::Redshift => {
19465 let mut new_f = *f;
19466 new_f.name = "FARMFINGERPRINT64".to_string();
19467 Ok(Expression::Function(Box::new(new_f)))
19468 }
19469 _ => Ok(Expression::Function(f)),
19470 },
19471 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
19472 "JSON_KEYS" => match target {
19473 DialectType::Databricks | DialectType::Spark => {
19474 let mut new_f = *f;
19475 new_f.name = "JSON_OBJECT_KEYS".to_string();
19476 Ok(Expression::Function(Box::new(new_f)))
19477 }
19478 DialectType::Snowflake => {
19479 let mut new_f = *f;
19480 new_f.name = "OBJECT_KEYS".to_string();
19481 Ok(Expression::Function(Box::new(new_f)))
19482 }
19483 _ => Ok(Expression::Function(f)),
19484 },
19485 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
19486 "WEEKOFYEAR" => match target {
19487 DialectType::Snowflake => {
19488 let mut new_f = *f;
19489 new_f.name = "WEEKISO".to_string();
19490 Ok(Expression::Function(Box::new(new_f)))
19491 }
19492 _ => Ok(Expression::Function(f)),
19493 },
19494 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
19495 "FORMAT"
19496 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
19497 {
19498 match target {
19499 DialectType::Databricks | DialectType::Spark => {
19500 let mut new_f = *f;
19501 new_f.name = "FORMAT_STRING".to_string();
19502 Ok(Expression::Function(Box::new(new_f)))
19503 }
19504 _ => Ok(Expression::Function(f)),
19505 }
19506 }
19507 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
19508 "CONCAT_WS" if f.args.len() >= 2 => match target {
19509 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
19510 let mut args = f.args;
19511 let sep = args.remove(0);
19512 let cast_args: Vec<Expression> = args
19513 .into_iter()
19514 .map(|a| {
19515 Expression::Cast(Box::new(Cast {
19516 this: a,
19517 to: DataType::VarChar {
19518 length: None,
19519 parenthesized_length: false,
19520 },
19521 double_colon_syntax: false,
19522 trailing_comments: Vec::new(),
19523 format: None,
19524 default: None,
19525 inferred_type: None,
19526 }))
19527 })
19528 .collect();
19529 let mut new_args = vec![sep];
19530 new_args.extend(cast_args);
19531 Ok(Expression::Function(Box::new(Function::new(
19532 "CONCAT_WS".to_string(),
19533 new_args,
19534 ))))
19535 }
19536 _ => Ok(Expression::Function(f)),
19537 },
19538 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
19539 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
19540 DialectType::DuckDB
19541 if f.args.len() == 3
19542 && matches!(source, DialectType::Snowflake) =>
19543 {
19544 // Snowflake ARRAY_SLICE (0-indexed, exclusive end)
19545 // -> DuckDB ARRAY_SLICE (1-indexed, inclusive end)
19546 let mut args = f.args;
19547 let arr = args.remove(0);
19548 let start = args.remove(0);
19549 let end = args.remove(0);
19550
19551 // CASE WHEN start >= 0 THEN start + 1 ELSE start END
19552 let adjusted_start = Expression::Case(Box::new(Case {
19553 operand: None,
19554 whens: vec![(
19555 Expression::Gte(Box::new(BinaryOp {
19556 left: start.clone(),
19557 right: Expression::number(0),
19558 left_comments: vec![],
19559 operator_comments: vec![],
19560 trailing_comments: vec![],
19561 inferred_type: None,
19562 })),
19563 Expression::Add(Box::new(BinaryOp {
19564 left: start.clone(),
19565 right: Expression::number(1),
19566 left_comments: vec![],
19567 operator_comments: vec![],
19568 trailing_comments: vec![],
19569 inferred_type: None,
19570 })),
19571 )],
19572 else_: Some(start),
19573 comments: vec![],
19574 inferred_type: None,
19575 }));
19576
19577 // CASE WHEN end < 0 THEN end - 1 ELSE end END
19578 let adjusted_end = Expression::Case(Box::new(Case {
19579 operand: None,
19580 whens: vec![(
19581 Expression::Lt(Box::new(BinaryOp {
19582 left: end.clone(),
19583 right: Expression::number(0),
19584 left_comments: vec![],
19585 operator_comments: vec![],
19586 trailing_comments: vec![],
19587 inferred_type: None,
19588 })),
19589 Expression::Sub(Box::new(BinaryOp {
19590 left: end.clone(),
19591 right: Expression::number(1),
19592 left_comments: vec![],
19593 operator_comments: vec![],
19594 trailing_comments: vec![],
19595 inferred_type: None,
19596 })),
19597 )],
19598 else_: Some(end),
19599 comments: vec![],
19600 inferred_type: None,
19601 }));
19602
19603 Ok(Expression::Function(Box::new(Function::new(
19604 "ARRAY_SLICE".to_string(),
19605 vec![arr, adjusted_start, adjusted_end],
19606 ))))
19607 }
19608 DialectType::Presto
19609 | DialectType::Trino
19610 | DialectType::Athena
19611 | DialectType::Databricks
19612 | DialectType::Spark => {
19613 let mut new_f = *f;
19614 new_f.name = "SLICE".to_string();
19615 Ok(Expression::Function(Box::new(new_f)))
19616 }
19617 DialectType::ClickHouse => {
19618 let mut new_f = *f;
19619 new_f.name = "arraySlice".to_string();
19620 Ok(Expression::Function(Box::new(new_f)))
19621 }
19622 _ => Ok(Expression::Function(f)),
19623 },
19624 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
19625 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
19626 DialectType::DuckDB => {
19627 let mut args = f.args;
19628 let arr = args.remove(0);
19629 let val = args.remove(0);
19630 Ok(Expression::Function(Box::new(Function::new(
19631 "LIST_PREPEND".to_string(),
19632 vec![val, arr],
19633 ))))
19634 }
19635 _ => Ok(Expression::Function(f)),
19636 },
19637 // ARRAY_REMOVE(arr, target) -> dialect-specific
19638 "ARRAY_REMOVE" if f.args.len() == 2 => {
19639 match target {
19640 DialectType::DuckDB => {
19641 let mut args = f.args;
19642 let arr = args.remove(0);
19643 let target_val = args.remove(0);
19644 let u_id = crate::expressions::Identifier::new("_u");
19645 // LIST_FILTER(arr, _u -> _u <> target)
19646 let lambda = Expression::Lambda(Box::new(
19647 crate::expressions::LambdaExpr {
19648 parameters: vec![u_id.clone()],
19649 body: Expression::Neq(Box::new(BinaryOp {
19650 left: Expression::Identifier(u_id),
19651 right: target_val,
19652 left_comments: Vec::new(),
19653 operator_comments: Vec::new(),
19654 trailing_comments: Vec::new(),
19655 inferred_type: None,
19656 })),
19657 colon: false,
19658 parameter_types: Vec::new(),
19659 },
19660 ));
19661 Ok(Expression::Function(Box::new(Function::new(
19662 "LIST_FILTER".to_string(),
19663 vec![arr, lambda],
19664 ))))
19665 }
19666 DialectType::ClickHouse => {
19667 let mut args = f.args;
19668 let arr = args.remove(0);
19669 let target_val = args.remove(0);
19670 let u_id = crate::expressions::Identifier::new("_u");
19671 // arrayFilter(_u -> _u <> target, arr)
19672 let lambda = Expression::Lambda(Box::new(
19673 crate::expressions::LambdaExpr {
19674 parameters: vec![u_id.clone()],
19675 body: Expression::Neq(Box::new(BinaryOp {
19676 left: Expression::Identifier(u_id),
19677 right: target_val,
19678 left_comments: Vec::new(),
19679 operator_comments: Vec::new(),
19680 trailing_comments: Vec::new(),
19681 inferred_type: None,
19682 })),
19683 colon: false,
19684 parameter_types: Vec::new(),
19685 },
19686 ));
19687 Ok(Expression::Function(Box::new(Function::new(
19688 "arrayFilter".to_string(),
19689 vec![lambda, arr],
19690 ))))
19691 }
19692 DialectType::BigQuery => {
19693 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
19694 let mut args = f.args;
19695 let arr = args.remove(0);
19696 let target_val = args.remove(0);
19697 let u_id = crate::expressions::Identifier::new("_u");
19698 let u_col = Expression::Column(Box::new(
19699 crate::expressions::Column {
19700 name: u_id.clone(),
19701 table: None,
19702 join_mark: false,
19703 trailing_comments: Vec::new(),
19704 span: None,
19705 inferred_type: None,
19706 },
19707 ));
19708 // UNNEST(the_array) AS _u
19709 let unnest_expr = Expression::Unnest(Box::new(
19710 crate::expressions::UnnestFunc {
19711 this: arr,
19712 expressions: Vec::new(),
19713 with_ordinality: false,
19714 alias: None,
19715 offset_alias: None,
19716 },
19717 ));
19718 let aliased_unnest = Expression::Alias(Box::new(
19719 crate::expressions::Alias {
19720 this: unnest_expr,
19721 alias: u_id.clone(),
19722 column_aliases: Vec::new(),
19723 pre_alias_comments: Vec::new(),
19724 trailing_comments: Vec::new(),
19725 inferred_type: None,
19726 },
19727 ));
19728 // _u <> target
19729 let where_cond = Expression::Neq(Box::new(BinaryOp {
19730 left: u_col.clone(),
19731 right: target_val,
19732 left_comments: Vec::new(),
19733 operator_comments: Vec::new(),
19734 trailing_comments: Vec::new(),
19735 inferred_type: None,
19736 }));
19737 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
19738 let subquery = Expression::Select(Box::new(
19739 crate::expressions::Select::new()
19740 .column(u_col)
19741 .from(aliased_unnest)
19742 .where_(where_cond),
19743 ));
19744 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
19745 Ok(Expression::ArrayFunc(Box::new(
19746 crate::expressions::ArrayConstructor {
19747 expressions: vec![subquery],
19748 bracket_notation: false,
19749 use_list_keyword: false,
19750 },
19751 )))
19752 }
19753 _ => Ok(Expression::Function(f)),
19754 }
19755 }
19756 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
19757 "PARSE_JSON" if f.args.len() == 1 => {
19758 match target {
19759 DialectType::SQLite
19760 | DialectType::Doris
19761 | DialectType::MySQL
19762 | DialectType::StarRocks => {
19763 // Strip PARSE_JSON, return the inner argument
19764 Ok(f.args.into_iter().next().unwrap())
19765 }
19766 _ => Ok(Expression::Function(f)),
19767 }
19768 }
19769 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
19770 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
19771 "JSON_REMOVE" => Ok(Expression::Function(f)),
19772 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
19773 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
19774 "JSON_SET" => Ok(Expression::Function(f)),
19775 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
19776 // Behavior per search value type:
19777 // NULL literal -> CASE WHEN x IS NULL THEN result
19778 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
19779 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
19780 "DECODE" if f.args.len() >= 3 => {
19781 // Keep as DECODE for targets that support it natively
19782 let keep_as_decode = matches!(
19783 target,
19784 DialectType::Oracle
19785 | DialectType::Snowflake
19786 | DialectType::Redshift
19787 | DialectType::Teradata
19788 | DialectType::Spark
19789 | DialectType::Databricks
19790 );
19791 if keep_as_decode {
19792 return Ok(Expression::Function(f));
19793 }
19794
19795 let mut args = f.args;
19796 let this_expr = args.remove(0);
19797 let mut pairs = Vec::new();
19798 let mut default = None;
19799 let mut i = 0;
19800 while i + 1 < args.len() {
19801 pairs.push((args[i].clone(), args[i + 1].clone()));
19802 i += 2;
19803 }
19804 if i < args.len() {
19805 default = Some(args[i].clone());
19806 }
19807 // Helper: check if expression is a literal value
19808 fn is_literal(e: &Expression) -> bool {
19809 matches!(
19810 e,
19811 Expression::Literal(_)
19812 | Expression::Boolean(_)
19813 | Expression::Neg(_)
19814 )
19815 }
19816 let whens: Vec<(Expression, Expression)> = pairs
19817 .into_iter()
19818 .map(|(search, result)| {
19819 if matches!(&search, Expression::Null(_)) {
19820 // NULL search -> IS NULL
19821 let condition = Expression::Is(Box::new(BinaryOp {
19822 left: this_expr.clone(),
19823 right: Expression::Null(crate::expressions::Null),
19824 left_comments: Vec::new(),
19825 operator_comments: Vec::new(),
19826 trailing_comments: Vec::new(),
19827 inferred_type: None,
19828 }));
19829 (condition, result)
19830 } else if is_literal(&search) {
19831 // Literal search -> simple equality
19832 let eq = Expression::Eq(Box::new(BinaryOp {
19833 left: this_expr.clone(),
19834 right: search,
19835 left_comments: Vec::new(),
19836 operator_comments: Vec::new(),
19837 trailing_comments: Vec::new(),
19838 inferred_type: None,
19839 }));
19840 (eq, result)
19841 } else {
19842 // Non-literal (column ref, expression) -> null-safe comparison
19843 let needs_paren = matches!(
19844 &search,
19845 Expression::Eq(_)
19846 | Expression::Neq(_)
19847 | Expression::Gt(_)
19848 | Expression::Gte(_)
19849 | Expression::Lt(_)
19850 | Expression::Lte(_)
19851 );
19852 let search_for_eq = if needs_paren {
19853 Expression::Paren(Box::new(
19854 crate::expressions::Paren {
19855 this: search.clone(),
19856 trailing_comments: Vec::new(),
19857 },
19858 ))
19859 } else {
19860 search.clone()
19861 };
19862 let eq = Expression::Eq(Box::new(BinaryOp {
19863 left: this_expr.clone(),
19864 right: search_for_eq,
19865 left_comments: Vec::new(),
19866 operator_comments: Vec::new(),
19867 trailing_comments: Vec::new(),
19868 inferred_type: None,
19869 }));
19870 let search_for_null = if needs_paren {
19871 Expression::Paren(Box::new(
19872 crate::expressions::Paren {
19873 this: search.clone(),
19874 trailing_comments: Vec::new(),
19875 },
19876 ))
19877 } else {
19878 search.clone()
19879 };
19880 let x_is_null = Expression::Is(Box::new(BinaryOp {
19881 left: this_expr.clone(),
19882 right: Expression::Null(crate::expressions::Null),
19883 left_comments: Vec::new(),
19884 operator_comments: Vec::new(),
19885 trailing_comments: Vec::new(),
19886 inferred_type: None,
19887 }));
19888 let s_is_null = Expression::Is(Box::new(BinaryOp {
19889 left: search_for_null,
19890 right: Expression::Null(crate::expressions::Null),
19891 left_comments: Vec::new(),
19892 operator_comments: Vec::new(),
19893 trailing_comments: Vec::new(),
19894 inferred_type: None,
19895 }));
19896 let both_null = Expression::And(Box::new(BinaryOp {
19897 left: x_is_null,
19898 right: s_is_null,
19899 left_comments: Vec::new(),
19900 operator_comments: Vec::new(),
19901 trailing_comments: Vec::new(),
19902 inferred_type: None,
19903 }));
19904 let condition = Expression::Or(Box::new(BinaryOp {
19905 left: eq,
19906 right: Expression::Paren(Box::new(
19907 crate::expressions::Paren {
19908 this: both_null,
19909 trailing_comments: Vec::new(),
19910 },
19911 )),
19912 left_comments: Vec::new(),
19913 operator_comments: Vec::new(),
19914 trailing_comments: Vec::new(),
19915 inferred_type: None,
19916 }));
19917 (condition, result)
19918 }
19919 })
19920 .collect();
19921 Ok(Expression::Case(Box::new(Case {
19922 operand: None,
19923 whens,
19924 else_: default,
19925 comments: Vec::new(),
19926 inferred_type: None,
19927 })))
19928 }
19929 // LEVENSHTEIN(a, b, ...) -> dialect-specific
19930 "LEVENSHTEIN" => {
19931 match target {
19932 DialectType::BigQuery => {
19933 let mut new_f = *f;
19934 new_f.name = "EDIT_DISTANCE".to_string();
19935 Ok(Expression::Function(Box::new(new_f)))
19936 }
19937 DialectType::Drill => {
19938 let mut new_f = *f;
19939 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
19940 Ok(Expression::Function(Box::new(new_f)))
19941 }
19942 DialectType::PostgreSQL if f.args.len() == 6 => {
19943 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
19944 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
19945 let mut new_f = *f;
19946 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
19947 Ok(Expression::Function(Box::new(new_f)))
19948 }
19949 _ => Ok(Expression::Function(f)),
19950 }
19951 }
19952 // ARRAY_MAX(x) -> arrayMax(x) for ClickHouse, LIST_MAX(x) for DuckDB
19953 "ARRAY_MAX" => {
19954 let name = match target {
19955 DialectType::ClickHouse => "arrayMax",
19956 DialectType::DuckDB => "LIST_MAX",
19957 _ => "ARRAY_MAX",
19958 };
19959 let mut new_f = *f;
19960 new_f.name = name.to_string();
19961 Ok(Expression::Function(Box::new(new_f)))
19962 }
19963 // ARRAY_MIN(x) -> arrayMin(x) for ClickHouse, LIST_MIN(x) for DuckDB
19964 "ARRAY_MIN" => {
19965 let name = match target {
19966 DialectType::ClickHouse => "arrayMin",
19967 DialectType::DuckDB => "LIST_MIN",
19968 _ => "ARRAY_MIN",
19969 };
19970 let mut new_f = *f;
19971 new_f.name = name.to_string();
19972 Ok(Expression::Function(Box::new(new_f)))
19973 }
19974 // JAROWINKLER_SIMILARITY(a, b) -> jaroWinklerSimilarity(UPPER(a), UPPER(b)) for ClickHouse
19975 // -> JARO_WINKLER_SIMILARITY(UPPER(a), UPPER(b)) for DuckDB
19976 "JAROWINKLER_SIMILARITY" if f.args.len() == 2 => {
19977 let mut args = f.args;
19978 let b = args.pop().unwrap();
19979 let a = args.pop().unwrap();
19980 match target {
19981 DialectType::ClickHouse => {
19982 let upper_a = Expression::Upper(Box::new(
19983 crate::expressions::UnaryFunc::new(a),
19984 ));
19985 let upper_b = Expression::Upper(Box::new(
19986 crate::expressions::UnaryFunc::new(b),
19987 ));
19988 Ok(Expression::Function(Box::new(Function::new(
19989 "jaroWinklerSimilarity".to_string(),
19990 vec![upper_a, upper_b],
19991 ))))
19992 }
19993 DialectType::DuckDB => {
19994 let upper_a = Expression::Upper(Box::new(
19995 crate::expressions::UnaryFunc::new(a),
19996 ));
19997 let upper_b = Expression::Upper(Box::new(
19998 crate::expressions::UnaryFunc::new(b),
19999 ));
20000 Ok(Expression::Function(Box::new(Function::new(
20001 "JARO_WINKLER_SIMILARITY".to_string(),
20002 vec![upper_a, upper_b],
20003 ))))
20004 }
20005 _ => Ok(Expression::Function(Box::new(Function::new(
20006 "JAROWINKLER_SIMILARITY".to_string(),
20007 vec![a, b],
20008 )))),
20009 }
20010 }
20011 // CURRENT_SCHEMAS(x) -> CURRENT_SCHEMAS() for Snowflake (drop arg)
20012 "CURRENT_SCHEMAS" => match target {
20013 DialectType::Snowflake => Ok(Expression::Function(Box::new(
20014 Function::new("CURRENT_SCHEMAS".to_string(), vec![]),
20015 ))),
20016 _ => Ok(Expression::Function(f)),
20017 },
20018 // TRUNC/TRUNCATE (numeric) -> dialect-specific
20019 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 => {
20020 match target {
20021 DialectType::TSQL | DialectType::Fabric => {
20022 // ROUND(x, decimals, 1) - the 1 flag means truncation
20023 let mut args = f.args;
20024 let this = if args.is_empty() {
20025 return Ok(Expression::Function(Box::new(
20026 Function::new("TRUNC".to_string(), args),
20027 )));
20028 } else {
20029 args.remove(0)
20030 };
20031 let decimals = if args.is_empty() {
20032 Expression::Literal(Box::new(Literal::Number(
20033 "0".to_string(),
20034 )))
20035 } else {
20036 args.remove(0)
20037 };
20038 Ok(Expression::Function(Box::new(Function::new(
20039 "ROUND".to_string(),
20040 vec![
20041 this,
20042 decimals,
20043 Expression::Literal(Box::new(Literal::Number(
20044 "1".to_string(),
20045 ))),
20046 ],
20047 ))))
20048 }
20049 DialectType::Presto
20050 | DialectType::Trino
20051 | DialectType::Athena => {
20052 // TRUNCATE(x, decimals)
20053 let mut new_f = *f;
20054 new_f.name = "TRUNCATE".to_string();
20055 Ok(Expression::Function(Box::new(new_f)))
20056 }
20057 DialectType::MySQL
20058 | DialectType::SingleStore
20059 | DialectType::TiDB => {
20060 // TRUNCATE(x, decimals)
20061 let mut new_f = *f;
20062 new_f.name = "TRUNCATE".to_string();
20063 Ok(Expression::Function(Box::new(new_f)))
20064 }
20065 DialectType::DuckDB => {
20066 // DuckDB supports TRUNC(x, decimals) — preserve both args
20067 let mut args = f.args;
20068 // Snowflake fractions_supported: wrap non-INT decimals in CAST(... AS INT)
20069 if args.len() == 2 && matches!(source, DialectType::Snowflake) {
20070 let decimals = args.remove(1);
20071 let is_int = matches!(&decimals, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
20072 || matches!(&decimals, Expression::Cast(c) if matches!(c.to, DataType::Int { .. } | DataType::SmallInt { .. } | DataType::BigInt { .. } | DataType::TinyInt { .. }));
20073 let wrapped = if !is_int {
20074 Expression::Cast(Box::new(crate::expressions::Cast {
20075 this: decimals,
20076 to: DataType::Int { length: None, integer_spelling: false },
20077 double_colon_syntax: false,
20078 trailing_comments: Vec::new(),
20079 format: None,
20080 default: None,
20081 inferred_type: None,
20082 }))
20083 } else {
20084 decimals
20085 };
20086 args.push(wrapped);
20087 }
20088 Ok(Expression::Function(Box::new(Function::new(
20089 "TRUNC".to_string(),
20090 args,
20091 ))))
20092 }
20093 DialectType::ClickHouse => {
20094 // trunc(x, decimals) - lowercase
20095 let mut new_f = *f;
20096 new_f.name = "trunc".to_string();
20097 Ok(Expression::Function(Box::new(new_f)))
20098 }
20099 DialectType::Spark | DialectType::Databricks => {
20100 // Spark: TRUNC is date-only; numeric TRUNC → CAST(x AS BIGINT)
20101 let this = f.args.into_iter().next().unwrap_or(
20102 Expression::Literal(Box::new(Literal::Number(
20103 "0".to_string(),
20104 ))),
20105 );
20106 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
20107 this,
20108 to: crate::expressions::DataType::BigInt {
20109 length: None,
20110 },
20111 double_colon_syntax: false,
20112 trailing_comments: Vec::new(),
20113 format: None,
20114 default: None,
20115 inferred_type: None,
20116 })))
20117 }
20118 _ => {
20119 // TRUNC(x, decimals) for PostgreSQL, Oracle, Snowflake, etc.
20120 let mut new_f = *f;
20121 new_f.name = "TRUNC".to_string();
20122 Ok(Expression::Function(Box::new(new_f)))
20123 }
20124 }
20125 }
20126 // CURRENT_VERSION() -> VERSION() for most dialects
20127 "CURRENT_VERSION" => match target {
20128 DialectType::Snowflake
20129 | DialectType::Databricks
20130 | DialectType::StarRocks => Ok(Expression::Function(f)),
20131 DialectType::SQLite => {
20132 let mut new_f = *f;
20133 new_f.name = "SQLITE_VERSION".to_string();
20134 Ok(Expression::Function(Box::new(new_f)))
20135 }
20136 _ => {
20137 let mut new_f = *f;
20138 new_f.name = "VERSION".to_string();
20139 Ok(Expression::Function(Box::new(new_f)))
20140 }
20141 },
20142 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
20143 "ARRAY_REVERSE" => match target {
20144 DialectType::ClickHouse => {
20145 let mut new_f = *f;
20146 new_f.name = "arrayReverse".to_string();
20147 Ok(Expression::Function(Box::new(new_f)))
20148 }
20149 _ => Ok(Expression::Function(f)),
20150 },
20151 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
20152 "GENERATE_DATE_ARRAY" => {
20153 let mut args = f.args;
20154 if matches!(target, DialectType::BigQuery) {
20155 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
20156 if args.len() == 2 {
20157 let default_interval = Expression::Interval(Box::new(
20158 crate::expressions::Interval {
20159 this: Some(Expression::Literal(Box::new(
20160 Literal::String("1".to_string()),
20161 ))),
20162 unit: Some(
20163 crate::expressions::IntervalUnitSpec::Simple {
20164 unit: crate::expressions::IntervalUnit::Day,
20165 use_plural: false,
20166 },
20167 ),
20168 },
20169 ));
20170 args.push(default_interval);
20171 }
20172 Ok(Expression::Function(Box::new(Function::new(
20173 "GENERATE_DATE_ARRAY".to_string(),
20174 args,
20175 ))))
20176 } else if matches!(target, DialectType::DuckDB) {
20177 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
20178 let start = args.get(0).cloned();
20179 let end = args.get(1).cloned();
20180 let step = args.get(2).cloned().or_else(|| {
20181 Some(Expression::Interval(Box::new(
20182 crate::expressions::Interval {
20183 this: Some(Expression::Literal(Box::new(
20184 Literal::String("1".to_string()),
20185 ))),
20186 unit: Some(
20187 crate::expressions::IntervalUnitSpec::Simple {
20188 unit: crate::expressions::IntervalUnit::Day,
20189 use_plural: false,
20190 },
20191 ),
20192 },
20193 )))
20194 });
20195 let gen_series = Expression::GenerateSeries(Box::new(
20196 crate::expressions::GenerateSeries {
20197 start: start.map(Box::new),
20198 end: end.map(Box::new),
20199 step: step.map(Box::new),
20200 is_end_exclusive: None,
20201 },
20202 ));
20203 Ok(Expression::Cast(Box::new(Cast {
20204 this: gen_series,
20205 to: DataType::Array {
20206 element_type: Box::new(DataType::Date),
20207 dimension: None,
20208 },
20209 trailing_comments: vec![],
20210 double_colon_syntax: false,
20211 format: None,
20212 default: None,
20213 inferred_type: None,
20214 })))
20215 } else if matches!(
20216 target,
20217 DialectType::Presto | DialectType::Trino | DialectType::Athena
20218 ) {
20219 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
20220 let start = args.get(0).cloned();
20221 let end = args.get(1).cloned();
20222 let step = args.get(2).cloned().or_else(|| {
20223 Some(Expression::Interval(Box::new(
20224 crate::expressions::Interval {
20225 this: Some(Expression::Literal(Box::new(
20226 Literal::String("1".to_string()),
20227 ))),
20228 unit: Some(
20229 crate::expressions::IntervalUnitSpec::Simple {
20230 unit: crate::expressions::IntervalUnit::Day,
20231 use_plural: false,
20232 },
20233 ),
20234 },
20235 )))
20236 });
20237 let gen_series = Expression::GenerateSeries(Box::new(
20238 crate::expressions::GenerateSeries {
20239 start: start.map(Box::new),
20240 end: end.map(Box::new),
20241 step: step.map(Box::new),
20242 is_end_exclusive: None,
20243 },
20244 ));
20245 Ok(gen_series)
20246 } else if matches!(
20247 target,
20248 DialectType::Spark | DialectType::Databricks
20249 ) {
20250 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
20251 let start = args.get(0).cloned();
20252 let end = args.get(1).cloned();
20253 let step = args.get(2).cloned().or_else(|| {
20254 Some(Expression::Interval(Box::new(
20255 crate::expressions::Interval {
20256 this: Some(Expression::Literal(Box::new(
20257 Literal::String("1".to_string()),
20258 ))),
20259 unit: Some(
20260 crate::expressions::IntervalUnitSpec::Simple {
20261 unit: crate::expressions::IntervalUnit::Day,
20262 use_plural: false,
20263 },
20264 ),
20265 },
20266 )))
20267 });
20268 let gen_series = Expression::GenerateSeries(Box::new(
20269 crate::expressions::GenerateSeries {
20270 start: start.map(Box::new),
20271 end: end.map(Box::new),
20272 step: step.map(Box::new),
20273 is_end_exclusive: None,
20274 },
20275 ));
20276 Ok(gen_series)
20277 } else if matches!(target, DialectType::Snowflake) {
20278 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
20279 if args.len() == 2 {
20280 let default_interval = Expression::Interval(Box::new(
20281 crate::expressions::Interval {
20282 this: Some(Expression::Literal(Box::new(
20283 Literal::String("1".to_string()),
20284 ))),
20285 unit: Some(
20286 crate::expressions::IntervalUnitSpec::Simple {
20287 unit: crate::expressions::IntervalUnit::Day,
20288 use_plural: false,
20289 },
20290 ),
20291 },
20292 ));
20293 args.push(default_interval);
20294 }
20295 Ok(Expression::Function(Box::new(Function::new(
20296 "GENERATE_DATE_ARRAY".to_string(),
20297 args,
20298 ))))
20299 } else if matches!(
20300 target,
20301 DialectType::MySQL
20302 | DialectType::TSQL
20303 | DialectType::Fabric
20304 | DialectType::Redshift
20305 ) {
20306 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
20307 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
20308 Ok(Expression::Function(Box::new(Function::new(
20309 "GENERATE_DATE_ARRAY".to_string(),
20310 args,
20311 ))))
20312 } else {
20313 // PostgreSQL/others: convert to GenerateSeries
20314 let start = args.get(0).cloned();
20315 let end = args.get(1).cloned();
20316 let step = args.get(2).cloned().or_else(|| {
20317 Some(Expression::Interval(Box::new(
20318 crate::expressions::Interval {
20319 this: Some(Expression::Literal(Box::new(
20320 Literal::String("1".to_string()),
20321 ))),
20322 unit: Some(
20323 crate::expressions::IntervalUnitSpec::Simple {
20324 unit: crate::expressions::IntervalUnit::Day,
20325 use_plural: false,
20326 },
20327 ),
20328 },
20329 )))
20330 });
20331 Ok(Expression::GenerateSeries(Box::new(
20332 crate::expressions::GenerateSeries {
20333 start: start.map(Box::new),
20334 end: end.map(Box::new),
20335 step: step.map(Box::new),
20336 is_end_exclusive: None,
20337 },
20338 )))
20339 }
20340 }
20341 // ARRAYS_OVERLAP(arr1, arr2) from Snowflake -> DuckDB:
20342 // (arr1 && arr2) OR (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
20343 "ARRAYS_OVERLAP"
20344 if f.args.len() == 2
20345 && matches!(source, DialectType::Snowflake)
20346 && matches!(target, DialectType::DuckDB) =>
20347 {
20348 let mut args = f.args;
20349 let arr1 = args.remove(0);
20350 let arr2 = args.remove(0);
20351
20352 // (arr1 && arr2)
20353 let overlap = Expression::Paren(Box::new(Paren {
20354 this: Expression::ArrayOverlaps(Box::new(BinaryOp {
20355 left: arr1.clone(),
20356 right: arr2.clone(),
20357 left_comments: vec![],
20358 operator_comments: vec![],
20359 trailing_comments: vec![],
20360 inferred_type: None,
20361 })),
20362 trailing_comments: vec![],
20363 }));
20364
20365 // ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1)
20366 let arr1_has_null = Expression::Neq(Box::new(BinaryOp {
20367 left: Expression::Function(Box::new(Function::new(
20368 "ARRAY_LENGTH".to_string(),
20369 vec![arr1.clone()],
20370 ))),
20371 right: Expression::Function(Box::new(Function::new(
20372 "LIST_COUNT".to_string(),
20373 vec![arr1],
20374 ))),
20375 left_comments: vec![],
20376 operator_comments: vec![],
20377 trailing_comments: vec![],
20378 inferred_type: None,
20379 }));
20380
20381 // ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2)
20382 let arr2_has_null = Expression::Neq(Box::new(BinaryOp {
20383 left: Expression::Function(Box::new(Function::new(
20384 "ARRAY_LENGTH".to_string(),
20385 vec![arr2.clone()],
20386 ))),
20387 right: Expression::Function(Box::new(Function::new(
20388 "LIST_COUNT".to_string(),
20389 vec![arr2],
20390 ))),
20391 left_comments: vec![],
20392 operator_comments: vec![],
20393 trailing_comments: vec![],
20394 inferred_type: None,
20395 }));
20396
20397 // (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
20398 let null_check = Expression::Paren(Box::new(Paren {
20399 this: Expression::And(Box::new(BinaryOp {
20400 left: arr1_has_null,
20401 right: arr2_has_null,
20402 left_comments: vec![],
20403 operator_comments: vec![],
20404 trailing_comments: vec![],
20405 inferred_type: None,
20406 })),
20407 trailing_comments: vec![],
20408 }));
20409
20410 // (arr1 && arr2) OR (null_check)
20411 Ok(Expression::Or(Box::new(BinaryOp {
20412 left: overlap,
20413 right: null_check,
20414 left_comments: vec![],
20415 operator_comments: vec![],
20416 trailing_comments: vec![],
20417 inferred_type: None,
20418 })))
20419 }
20420 // ARRAY_INTERSECTION([1, 2], [2, 3]) from Snowflake -> DuckDB:
20421 // Bag semantics using LIST_TRANSFORM/LIST_FILTER with GENERATE_SERIES
20422 "ARRAY_INTERSECTION"
20423 if f.args.len() == 2
20424 && matches!(source, DialectType::Snowflake)
20425 && matches!(target, DialectType::DuckDB) =>
20426 {
20427 let mut args = f.args;
20428 let arr1 = args.remove(0);
20429 let arr2 = args.remove(0);
20430
20431 // Build: arr1 IS NULL
20432 let arr1_is_null = Expression::IsNull(Box::new(IsNull {
20433 this: arr1.clone(),
20434 not: false,
20435 postfix_form: false,
20436 }));
20437 let arr2_is_null = Expression::IsNull(Box::new(IsNull {
20438 this: arr2.clone(),
20439 not: false,
20440 postfix_form: false,
20441 }));
20442 let null_check = Expression::Or(Box::new(BinaryOp {
20443 left: arr1_is_null,
20444 right: arr2_is_null,
20445 left_comments: vec![],
20446 operator_comments: vec![],
20447 trailing_comments: vec![],
20448 inferred_type: None,
20449 }));
20450
20451 // GENERATE_SERIES(1, LENGTH(arr1))
20452 let gen_series = Expression::Function(Box::new(Function::new(
20453 "GENERATE_SERIES".to_string(),
20454 vec![
20455 Expression::number(1),
20456 Expression::Function(Box::new(Function::new(
20457 "LENGTH".to_string(),
20458 vec![arr1.clone()],
20459 ))),
20460 ],
20461 )));
20462
20463 // LIST_ZIP(arr1, GENERATE_SERIES(1, LENGTH(arr1)))
20464 let list_zip = Expression::Function(Box::new(Function::new(
20465 "LIST_ZIP".to_string(),
20466 vec![arr1.clone(), gen_series],
20467 )));
20468
20469 // pair[1] and pair[2]
20470 let pair_col = Expression::column("pair");
20471 let pair_1 = Expression::Subscript(Box::new(
20472 crate::expressions::Subscript {
20473 this: pair_col.clone(),
20474 index: Expression::number(1),
20475 },
20476 ));
20477 let pair_2 = Expression::Subscript(Box::new(
20478 crate::expressions::Subscript {
20479 this: pair_col.clone(),
20480 index: Expression::number(2),
20481 },
20482 ));
20483
20484 // arr1[1:pair[2]]
20485 let arr1_slice = Expression::ArraySlice(Box::new(
20486 crate::expressions::ArraySlice {
20487 this: arr1.clone(),
20488 start: Some(Expression::number(1)),
20489 end: Some(pair_2),
20490 },
20491 ));
20492
20493 // e IS NOT DISTINCT FROM pair[1]
20494 let e_col = Expression::column("e");
20495 let is_not_distinct = Expression::NullSafeEq(Box::new(BinaryOp {
20496 left: e_col.clone(),
20497 right: pair_1.clone(),
20498 left_comments: vec![],
20499 operator_comments: vec![],
20500 trailing_comments: vec![],
20501 inferred_type: None,
20502 }));
20503
20504 // e -> e IS NOT DISTINCT FROM pair[1]
20505 let inner_lambda1 =
20506 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
20507 parameters: vec![crate::expressions::Identifier::new("e")],
20508 body: is_not_distinct,
20509 colon: false,
20510 parameter_types: vec![],
20511 }));
20512
20513 // LIST_FILTER(arr1[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
20514 let inner_filter1 = Expression::Function(Box::new(Function::new(
20515 "LIST_FILTER".to_string(),
20516 vec![arr1_slice, inner_lambda1],
20517 )));
20518
20519 // LENGTH(LIST_FILTER(arr1[1:pair[2]], ...))
20520 let len1 = Expression::Function(Box::new(Function::new(
20521 "LENGTH".to_string(),
20522 vec![inner_filter1],
20523 )));
20524
20525 // e -> e IS NOT DISTINCT FROM pair[1]
20526 let inner_lambda2 =
20527 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
20528 parameters: vec![crate::expressions::Identifier::new("e")],
20529 body: Expression::NullSafeEq(Box::new(BinaryOp {
20530 left: e_col,
20531 right: pair_1.clone(),
20532 left_comments: vec![],
20533 operator_comments: vec![],
20534 trailing_comments: vec![],
20535 inferred_type: None,
20536 })),
20537 colon: false,
20538 parameter_types: vec![],
20539 }));
20540
20541 // LIST_FILTER(arr2, e -> e IS NOT DISTINCT FROM pair[1])
20542 let inner_filter2 = Expression::Function(Box::new(Function::new(
20543 "LIST_FILTER".to_string(),
20544 vec![arr2.clone(), inner_lambda2],
20545 )));
20546
20547 // LENGTH(LIST_FILTER(arr2, ...))
20548 let len2 = Expression::Function(Box::new(Function::new(
20549 "LENGTH".to_string(),
20550 vec![inner_filter2],
20551 )));
20552
20553 // LENGTH(...) <= LENGTH(...)
20554 let cond = Expression::Paren(Box::new(Paren {
20555 this: Expression::Lte(Box::new(BinaryOp {
20556 left: len1,
20557 right: len2,
20558 left_comments: vec![],
20559 operator_comments: vec![],
20560 trailing_comments: vec![],
20561 inferred_type: None,
20562 })),
20563 trailing_comments: vec![],
20564 }));
20565
20566 // pair -> (condition)
20567 let filter_lambda =
20568 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
20569 parameters: vec![crate::expressions::Identifier::new(
20570 "pair",
20571 )],
20572 body: cond,
20573 colon: false,
20574 parameter_types: vec![],
20575 }));
20576
20577 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
20578 let outer_filter = Expression::Function(Box::new(Function::new(
20579 "LIST_FILTER".to_string(),
20580 vec![list_zip, filter_lambda],
20581 )));
20582
20583 // pair -> pair[1]
20584 let transform_lambda =
20585 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
20586 parameters: vec![crate::expressions::Identifier::new(
20587 "pair",
20588 )],
20589 body: pair_1,
20590 colon: false,
20591 parameter_types: vec![],
20592 }));
20593
20594 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
20595 let list_transform = Expression::Function(Box::new(Function::new(
20596 "LIST_TRANSFORM".to_string(),
20597 vec![outer_filter, transform_lambda],
20598 )));
20599
20600 // CASE WHEN arr1 IS NULL OR arr2 IS NULL THEN NULL
20601 // ELSE LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
20602 // END
20603 Ok(Expression::Case(Box::new(Case {
20604 operand: None,
20605 whens: vec![(null_check, Expression::Null(Null))],
20606 else_: Some(list_transform),
20607 comments: vec![],
20608 inferred_type: None,
20609 })))
20610 }
20611 // ARRAY_CONSTRUCT(args) -> Expression::Array for all targets
20612 "ARRAY_CONSTRUCT" => {
20613 Ok(Expression::Array(Box::new(crate::expressions::Array {
20614 expressions: f.args,
20615 })))
20616 }
20617 // ARRAY(args) function -> Expression::Array for DuckDB/Snowflake/Presto/Trino/Athena
20618 "ARRAY"
20619 if !f.args.iter().any(|a| {
20620 matches!(a, Expression::Select(_) | Expression::Subquery(_))
20621 }) =>
20622 {
20623 match target {
20624 DialectType::DuckDB
20625 | DialectType::Snowflake
20626 | DialectType::Presto
20627 | DialectType::Trino
20628 | DialectType::Athena => {
20629 Ok(Expression::Array(Box::new(crate::expressions::Array {
20630 expressions: f.args,
20631 })))
20632 }
20633 _ => Ok(Expression::Function(f)),
20634 }
20635 }
20636 _ => Ok(Expression::Function(f)),
20637 }
20638 } else if let Expression::AggregateFunction(mut af) = e {
20639 let name = af.name.to_ascii_uppercase();
20640 match name.as_str() {
20641 "ARBITRARY" if af.args.len() == 1 => {
20642 let arg = af.args.into_iter().next().unwrap();
20643 Ok(convert_arbitrary(arg, target))
20644 }
20645 "JSON_ARRAYAGG" => {
20646 match target {
20647 DialectType::PostgreSQL => {
20648 af.name = "JSON_AGG".to_string();
20649 // Add NULLS FIRST to ORDER BY items for PostgreSQL
20650 for ordered in af.order_by.iter_mut() {
20651 if ordered.nulls_first.is_none() {
20652 ordered.nulls_first = Some(true);
20653 }
20654 }
20655 Ok(Expression::AggregateFunction(af))
20656 }
20657 _ => Ok(Expression::AggregateFunction(af)),
20658 }
20659 }
20660 _ => Ok(Expression::AggregateFunction(af)),
20661 }
20662 } else if let Expression::JSONArrayAgg(ja) = e {
20663 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
20664 match target {
20665 DialectType::PostgreSQL => {
20666 let mut order_by = Vec::new();
20667 if let Some(order_expr) = ja.order {
20668 if let Expression::OrderBy(ob) = *order_expr {
20669 for mut ordered in ob.expressions {
20670 if ordered.nulls_first.is_none() {
20671 ordered.nulls_first = Some(true);
20672 }
20673 order_by.push(ordered);
20674 }
20675 }
20676 }
20677 Ok(Expression::AggregateFunction(Box::new(
20678 crate::expressions::AggregateFunction {
20679 name: "JSON_AGG".to_string(),
20680 args: vec![*ja.this],
20681 distinct: false,
20682 filter: None,
20683 order_by,
20684 limit: None,
20685 ignore_nulls: None,
20686 inferred_type: None,
20687 },
20688 )))
20689 }
20690 _ => Ok(Expression::JSONArrayAgg(ja)),
20691 }
20692 } else if let Expression::ToNumber(tn) = e {
20693 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
20694 let arg = *tn.this;
20695 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
20696 this: arg,
20697 to: crate::expressions::DataType::Double {
20698 precision: None,
20699 scale: None,
20700 },
20701 double_colon_syntax: false,
20702 trailing_comments: Vec::new(),
20703 format: None,
20704 default: None,
20705 inferred_type: None,
20706 })))
20707 } else {
20708 Ok(e)
20709 }
20710 }
20711
20712 Action::RegexpLikeToDuckDB => {
20713 if let Expression::RegexpLike(f) = e {
20714 let mut args = vec![f.this, f.pattern];
20715 if let Some(flags) = f.flags {
20716 args.push(flags);
20717 }
20718 Ok(Expression::Function(Box::new(Function::new(
20719 "REGEXP_MATCHES".to_string(),
20720 args,
20721 ))))
20722 } else {
20723 Ok(e)
20724 }
20725 }
20726 Action::EpochConvert => {
20727 if let Expression::Epoch(f) = e {
20728 let arg = f.this;
20729 let name = match target {
20730 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
20731 "UNIX_TIMESTAMP"
20732 }
20733 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
20734 DialectType::BigQuery => "TIME_TO_UNIX",
20735 _ => "EPOCH",
20736 };
20737 Ok(Expression::Function(Box::new(Function::new(
20738 name.to_string(),
20739 vec![arg],
20740 ))))
20741 } else {
20742 Ok(e)
20743 }
20744 }
20745 Action::EpochMsConvert => {
20746 use crate::expressions::{BinaryOp, Cast};
20747 if let Expression::EpochMs(f) = e {
20748 let arg = f.this;
20749 match target {
20750 DialectType::Spark | DialectType::Databricks => {
20751 Ok(Expression::Function(Box::new(Function::new(
20752 "TIMESTAMP_MILLIS".to_string(),
20753 vec![arg],
20754 ))))
20755 }
20756 DialectType::BigQuery => Ok(Expression::Function(Box::new(
20757 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
20758 ))),
20759 DialectType::Presto | DialectType::Trino => {
20760 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
20761 let cast_arg = Expression::Cast(Box::new(Cast {
20762 this: arg,
20763 to: DataType::Double {
20764 precision: None,
20765 scale: None,
20766 },
20767 trailing_comments: Vec::new(),
20768 double_colon_syntax: false,
20769 format: None,
20770 default: None,
20771 inferred_type: None,
20772 }));
20773 let div = Expression::Div(Box::new(BinaryOp::new(
20774 cast_arg,
20775 Expression::Function(Box::new(Function::new(
20776 "POW".to_string(),
20777 vec![Expression::number(10), Expression::number(3)],
20778 ))),
20779 )));
20780 Ok(Expression::Function(Box::new(Function::new(
20781 "FROM_UNIXTIME".to_string(),
20782 vec![div],
20783 ))))
20784 }
20785 DialectType::MySQL => {
20786 // FROM_UNIXTIME(x / POWER(10, 3))
20787 let div = Expression::Div(Box::new(BinaryOp::new(
20788 arg,
20789 Expression::Function(Box::new(Function::new(
20790 "POWER".to_string(),
20791 vec![Expression::number(10), Expression::number(3)],
20792 ))),
20793 )));
20794 Ok(Expression::Function(Box::new(Function::new(
20795 "FROM_UNIXTIME".to_string(),
20796 vec![div],
20797 ))))
20798 }
20799 DialectType::PostgreSQL | DialectType::Redshift => {
20800 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
20801 let cast_arg = Expression::Cast(Box::new(Cast {
20802 this: arg,
20803 to: DataType::Custom {
20804 name: "DOUBLE PRECISION".to_string(),
20805 },
20806 trailing_comments: Vec::new(),
20807 double_colon_syntax: false,
20808 format: None,
20809 default: None,
20810 inferred_type: None,
20811 }));
20812 let div = Expression::Div(Box::new(BinaryOp::new(
20813 cast_arg,
20814 Expression::Function(Box::new(Function::new(
20815 "POWER".to_string(),
20816 vec![Expression::number(10), Expression::number(3)],
20817 ))),
20818 )));
20819 Ok(Expression::Function(Box::new(Function::new(
20820 "TO_TIMESTAMP".to_string(),
20821 vec![div],
20822 ))))
20823 }
20824 DialectType::ClickHouse => {
20825 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
20826 let cast_arg = Expression::Cast(Box::new(Cast {
20827 this: arg,
20828 to: DataType::Nullable {
20829 inner: Box::new(DataType::BigInt { length: None }),
20830 },
20831 trailing_comments: Vec::new(),
20832 double_colon_syntax: false,
20833 format: None,
20834 default: None,
20835 inferred_type: None,
20836 }));
20837 Ok(Expression::Function(Box::new(Function::new(
20838 "fromUnixTimestamp64Milli".to_string(),
20839 vec![cast_arg],
20840 ))))
20841 }
20842 _ => Ok(Expression::Function(Box::new(Function::new(
20843 "EPOCH_MS".to_string(),
20844 vec![arg],
20845 )))),
20846 }
20847 } else {
20848 Ok(e)
20849 }
20850 }
20851 Action::TSQLTypeNormalize => {
20852 if let Expression::DataType(dt) = e {
20853 let new_dt = match &dt {
20854 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
20855 DataType::Decimal {
20856 precision: Some(15),
20857 scale: Some(4),
20858 }
20859 }
20860 DataType::Custom { name }
20861 if name.eq_ignore_ascii_case("SMALLMONEY") =>
20862 {
20863 DataType::Decimal {
20864 precision: Some(6),
20865 scale: Some(4),
20866 }
20867 }
20868 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
20869 DataType::Timestamp {
20870 timezone: false,
20871 precision: None,
20872 }
20873 }
20874 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
20875 DataType::Float {
20876 precision: None,
20877 scale: None,
20878 real_spelling: false,
20879 }
20880 }
20881 DataType::Float {
20882 real_spelling: true,
20883 ..
20884 } => DataType::Float {
20885 precision: None,
20886 scale: None,
20887 real_spelling: false,
20888 },
20889 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
20890 DataType::Custom {
20891 name: "BLOB".to_string(),
20892 }
20893 }
20894 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
20895 DataType::Boolean
20896 }
20897 DataType::Custom { name }
20898 if name.eq_ignore_ascii_case("ROWVERSION") =>
20899 {
20900 DataType::Custom {
20901 name: "BINARY".to_string(),
20902 }
20903 }
20904 DataType::Custom { name }
20905 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
20906 {
20907 match target {
20908 DialectType::Spark
20909 | DialectType::Databricks
20910 | DialectType::Hive => DataType::Custom {
20911 name: "STRING".to_string(),
20912 },
20913 _ => DataType::VarChar {
20914 length: Some(36),
20915 parenthesized_length: true,
20916 },
20917 }
20918 }
20919 DataType::Custom { name }
20920 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
20921 {
20922 match target {
20923 DialectType::Spark
20924 | DialectType::Databricks
20925 | DialectType::Hive => DataType::Timestamp {
20926 timezone: false,
20927 precision: None,
20928 },
20929 _ => DataType::Timestamp {
20930 timezone: true,
20931 precision: None,
20932 },
20933 }
20934 }
20935 DataType::Custom { ref name }
20936 if name.len() >= 10
20937 && name[..10].eq_ignore_ascii_case("DATETIME2(") =>
20938 {
20939 // DATETIME2(n) -> TIMESTAMP
20940 DataType::Timestamp {
20941 timezone: false,
20942 precision: None,
20943 }
20944 }
20945 DataType::Custom { ref name }
20946 if name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME(") =>
20947 {
20948 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
20949 match target {
20950 DialectType::Spark
20951 | DialectType::Databricks
20952 | DialectType::Hive => DataType::Timestamp {
20953 timezone: false,
20954 precision: None,
20955 },
20956 _ => return Ok(Expression::DataType(dt)),
20957 }
20958 }
20959 DataType::Custom { ref name }
20960 if name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC") =>
20961 {
20962 // Parse NUMERIC(p,s) back to Decimal(p,s)
20963 let upper = name.to_ascii_uppercase();
20964 if let Some(inner) = upper
20965 .strip_prefix("NUMERIC(")
20966 .and_then(|s| s.strip_suffix(')'))
20967 {
20968 let parts: Vec<&str> = inner.split(',').collect();
20969 let precision =
20970 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
20971 let scale =
20972 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
20973 DataType::Decimal { precision, scale }
20974 } else if upper == "NUMERIC" {
20975 DataType::Decimal {
20976 precision: None,
20977 scale: None,
20978 }
20979 } else {
20980 return Ok(Expression::DataType(dt));
20981 }
20982 }
20983 DataType::Float {
20984 precision: Some(p), ..
20985 } => {
20986 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
20987 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
20988 let boundary = match target {
20989 DialectType::Hive
20990 | DialectType::Spark
20991 | DialectType::Databricks => 32,
20992 _ => 24,
20993 };
20994 if *p <= boundary {
20995 DataType::Float {
20996 precision: None,
20997 scale: None,
20998 real_spelling: false,
20999 }
21000 } else {
21001 DataType::Double {
21002 precision: None,
21003 scale: None,
21004 }
21005 }
21006 }
21007 DataType::TinyInt { .. } => match target {
21008 DialectType::DuckDB => DataType::Custom {
21009 name: "UTINYINT".to_string(),
21010 },
21011 DialectType::Hive
21012 | DialectType::Spark
21013 | DialectType::Databricks => DataType::SmallInt { length: None },
21014 _ => return Ok(Expression::DataType(dt)),
21015 },
21016 // INTEGER -> INT for Spark/Databricks
21017 DataType::Int {
21018 length,
21019 integer_spelling: true,
21020 } => DataType::Int {
21021 length: *length,
21022 integer_spelling: false,
21023 },
21024 _ => return Ok(Expression::DataType(dt)),
21025 };
21026 Ok(Expression::DataType(new_dt))
21027 } else {
21028 Ok(e)
21029 }
21030 }
21031 Action::MySQLSafeDivide => {
21032 use crate::expressions::{BinaryOp, Cast};
21033 if let Expression::Div(op) = e {
21034 let left = op.left;
21035 let right = op.right;
21036 // For SQLite: CAST left as REAL but NO NULLIF wrapping
21037 if matches!(target, DialectType::SQLite) {
21038 let new_left = Expression::Cast(Box::new(Cast {
21039 this: left,
21040 to: DataType::Float {
21041 precision: None,
21042 scale: None,
21043 real_spelling: true,
21044 },
21045 trailing_comments: Vec::new(),
21046 double_colon_syntax: false,
21047 format: None,
21048 default: None,
21049 inferred_type: None,
21050 }));
21051 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
21052 }
21053 // Wrap right in NULLIF(right, 0)
21054 let nullif_right = Expression::Function(Box::new(Function::new(
21055 "NULLIF".to_string(),
21056 vec![right, Expression::number(0)],
21057 )));
21058 // For some dialects, also CAST the left side
21059 let new_left = match target {
21060 DialectType::PostgreSQL
21061 | DialectType::Redshift
21062 | DialectType::Teradata
21063 | DialectType::Materialize
21064 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
21065 this: left,
21066 to: DataType::Custom {
21067 name: "DOUBLE PRECISION".to_string(),
21068 },
21069 trailing_comments: Vec::new(),
21070 double_colon_syntax: false,
21071 format: None,
21072 default: None,
21073 inferred_type: None,
21074 })),
21075 DialectType::Drill
21076 | DialectType::Trino
21077 | DialectType::Presto
21078 | DialectType::Athena => Expression::Cast(Box::new(Cast {
21079 this: left,
21080 to: DataType::Double {
21081 precision: None,
21082 scale: None,
21083 },
21084 trailing_comments: Vec::new(),
21085 double_colon_syntax: false,
21086 format: None,
21087 default: None,
21088 inferred_type: None,
21089 })),
21090 DialectType::TSQL => Expression::Cast(Box::new(Cast {
21091 this: left,
21092 to: DataType::Float {
21093 precision: None,
21094 scale: None,
21095 real_spelling: false,
21096 },
21097 trailing_comments: Vec::new(),
21098 double_colon_syntax: false,
21099 format: None,
21100 default: None,
21101 inferred_type: None,
21102 })),
21103 _ => left,
21104 };
21105 Ok(Expression::Div(Box::new(BinaryOp::new(
21106 new_left,
21107 nullif_right,
21108 ))))
21109 } else {
21110 Ok(e)
21111 }
21112 }
21113 Action::AlterTableRenameStripSchema => {
21114 if let Expression::AlterTable(mut at) = e {
21115 if let Some(crate::expressions::AlterTableAction::RenameTable(
21116 ref mut new_tbl,
21117 )) = at.actions.first_mut()
21118 {
21119 new_tbl.schema = None;
21120 new_tbl.catalog = None;
21121 }
21122 Ok(Expression::AlterTable(at))
21123 } else {
21124 Ok(e)
21125 }
21126 }
21127 Action::NullsOrdering => {
21128 // Fill in the source dialect's implied null ordering default.
21129 // This makes implicit null ordering explicit so the target generator
21130 // can correctly strip or keep it.
21131 //
21132 // Dialect null ordering categories:
21133 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
21134 // ASC -> NULLS LAST, DESC -> NULLS FIRST
21135 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
21136 // ASC -> NULLS FIRST, DESC -> NULLS LAST
21137 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
21138 // NULLS LAST always (both ASC and DESC)
21139 if let Expression::Ordered(mut o) = e {
21140 let is_asc = !o.desc;
21141
21142 let is_source_nulls_large = matches!(
21143 source,
21144 DialectType::Oracle
21145 | DialectType::PostgreSQL
21146 | DialectType::Redshift
21147 | DialectType::Snowflake
21148 );
21149 let is_source_nulls_last = matches!(
21150 source,
21151 DialectType::DuckDB
21152 | DialectType::Presto
21153 | DialectType::Trino
21154 | DialectType::Dremio
21155 | DialectType::Athena
21156 | DialectType::ClickHouse
21157 | DialectType::Drill
21158 | DialectType::Exasol
21159 | DialectType::DataFusion
21160 );
21161
21162 // Determine target category to check if default matches
21163 let is_target_nulls_large = matches!(
21164 target,
21165 DialectType::Oracle
21166 | DialectType::PostgreSQL
21167 | DialectType::Redshift
21168 | DialectType::Snowflake
21169 );
21170 let is_target_nulls_last = matches!(
21171 target,
21172 DialectType::DuckDB
21173 | DialectType::Presto
21174 | DialectType::Trino
21175 | DialectType::Dremio
21176 | DialectType::Athena
21177 | DialectType::ClickHouse
21178 | DialectType::Drill
21179 | DialectType::Exasol
21180 | DialectType::DataFusion
21181 );
21182
21183 // Compute the implied nulls_first for source
21184 let source_nulls_first = if is_source_nulls_large {
21185 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
21186 } else if is_source_nulls_last {
21187 false // NULLS LAST always
21188 } else {
21189 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
21190 };
21191
21192 // Compute the target's default
21193 let target_nulls_first = if is_target_nulls_large {
21194 !is_asc
21195 } else if is_target_nulls_last {
21196 false
21197 } else {
21198 is_asc
21199 };
21200
21201 // Only add explicit nulls ordering if source and target defaults differ
21202 if source_nulls_first != target_nulls_first {
21203 o.nulls_first = Some(source_nulls_first);
21204 }
21205 // If they match, leave nulls_first as None so the generator won't output it
21206
21207 Ok(Expression::Ordered(o))
21208 } else {
21209 Ok(e)
21210 }
21211 }
21212 Action::StringAggConvert => {
21213 match e {
21214 Expression::WithinGroup(wg) => {
21215 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
21216 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
21217 let (x_opt, sep_opt, distinct) = match wg.this {
21218 Expression::AggregateFunction(ref af)
21219 if af.name.eq_ignore_ascii_case("STRING_AGG")
21220 && af.args.len() >= 2 =>
21221 {
21222 (
21223 Some(af.args[0].clone()),
21224 Some(af.args[1].clone()),
21225 af.distinct,
21226 )
21227 }
21228 Expression::Function(ref f)
21229 if f.name.eq_ignore_ascii_case("STRING_AGG")
21230 && f.args.len() >= 2 =>
21231 {
21232 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
21233 }
21234 Expression::StringAgg(ref sa) => {
21235 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
21236 }
21237 _ => (None, None, false),
21238 };
21239 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
21240 let order_by = wg.order_by;
21241
21242 match target {
21243 DialectType::TSQL | DialectType::Fabric => {
21244 // Keep as WithinGroup(StringAgg) for TSQL
21245 Ok(Expression::WithinGroup(Box::new(
21246 crate::expressions::WithinGroup {
21247 this: Expression::StringAgg(Box::new(
21248 crate::expressions::StringAggFunc {
21249 this: x,
21250 separator: Some(sep),
21251 order_by: None, // order_by goes in WithinGroup, not StringAgg
21252 distinct,
21253 filter: None,
21254 limit: None,
21255 inferred_type: None,
21256 },
21257 )),
21258 order_by,
21259 },
21260 )))
21261 }
21262 DialectType::MySQL
21263 | DialectType::SingleStore
21264 | DialectType::Doris
21265 | DialectType::StarRocks => {
21266 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
21267 Ok(Expression::GroupConcat(Box::new(
21268 crate::expressions::GroupConcatFunc {
21269 this: x,
21270 separator: Some(sep),
21271 order_by: Some(order_by),
21272 distinct,
21273 filter: None,
21274 limit: None,
21275 inferred_type: None,
21276 },
21277 )))
21278 }
21279 DialectType::SQLite => {
21280 // GROUP_CONCAT(x, sep) - no ORDER BY support
21281 Ok(Expression::GroupConcat(Box::new(
21282 crate::expressions::GroupConcatFunc {
21283 this: x,
21284 separator: Some(sep),
21285 order_by: None,
21286 distinct,
21287 filter: None,
21288 limit: None,
21289 inferred_type: None,
21290 },
21291 )))
21292 }
21293 DialectType::PostgreSQL | DialectType::Redshift => {
21294 // STRING_AGG(x, sep ORDER BY z)
21295 Ok(Expression::StringAgg(Box::new(
21296 crate::expressions::StringAggFunc {
21297 this: x,
21298 separator: Some(sep),
21299 order_by: Some(order_by),
21300 distinct,
21301 filter: None,
21302 limit: None,
21303 inferred_type: None,
21304 },
21305 )))
21306 }
21307 _ => {
21308 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
21309 Ok(Expression::StringAgg(Box::new(
21310 crate::expressions::StringAggFunc {
21311 this: x,
21312 separator: Some(sep),
21313 order_by: Some(order_by),
21314 distinct,
21315 filter: None,
21316 limit: None,
21317 inferred_type: None,
21318 },
21319 )))
21320 }
21321 }
21322 } else {
21323 Ok(Expression::WithinGroup(wg))
21324 }
21325 }
21326 Expression::StringAgg(sa) => {
21327 match target {
21328 DialectType::MySQL
21329 | DialectType::SingleStore
21330 | DialectType::Doris
21331 | DialectType::StarRocks => {
21332 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
21333 Ok(Expression::GroupConcat(Box::new(
21334 crate::expressions::GroupConcatFunc {
21335 this: sa.this,
21336 separator: sa.separator,
21337 order_by: sa.order_by,
21338 distinct: sa.distinct,
21339 filter: sa.filter,
21340 limit: None,
21341 inferred_type: None,
21342 },
21343 )))
21344 }
21345 DialectType::SQLite => {
21346 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
21347 Ok(Expression::GroupConcat(Box::new(
21348 crate::expressions::GroupConcatFunc {
21349 this: sa.this,
21350 separator: sa.separator,
21351 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
21352 distinct: sa.distinct,
21353 filter: sa.filter,
21354 limit: None,
21355 inferred_type: None,
21356 },
21357 )))
21358 }
21359 DialectType::Spark | DialectType::Databricks => {
21360 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
21361 Ok(Expression::ListAgg(Box::new(
21362 crate::expressions::ListAggFunc {
21363 this: sa.this,
21364 separator: sa.separator,
21365 on_overflow: None,
21366 order_by: sa.order_by,
21367 distinct: sa.distinct,
21368 filter: None,
21369 inferred_type: None,
21370 },
21371 )))
21372 }
21373 _ => Ok(Expression::StringAgg(sa)),
21374 }
21375 }
21376 _ => Ok(e),
21377 }
21378 }
21379 Action::GroupConcatConvert => {
21380 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
21381 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
21382 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
21383 if let Expression::Function(ref f) = expr {
21384 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
21385 let mut result = f.args[0].clone();
21386 for arg in &f.args[1..] {
21387 result = Expression::Concat(Box::new(BinaryOp {
21388 left: result,
21389 right: arg.clone(),
21390 left_comments: vec![],
21391 operator_comments: vec![],
21392 trailing_comments: vec![],
21393 inferred_type: None,
21394 }));
21395 }
21396 return result;
21397 }
21398 }
21399 expr
21400 }
21401 fn expand_concat_to_plus(expr: Expression) -> Expression {
21402 if let Expression::Function(ref f) = expr {
21403 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
21404 let mut result = f.args[0].clone();
21405 for arg in &f.args[1..] {
21406 result = Expression::Add(Box::new(BinaryOp {
21407 left: result,
21408 right: arg.clone(),
21409 left_comments: vec![],
21410 operator_comments: vec![],
21411 trailing_comments: vec![],
21412 inferred_type: None,
21413 }));
21414 }
21415 return result;
21416 }
21417 }
21418 expr
21419 }
21420 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
21421 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
21422 if let Expression::Function(ref f) = expr {
21423 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
21424 let new_args: Vec<Expression> = f
21425 .args
21426 .iter()
21427 .map(|arg| {
21428 Expression::Cast(Box::new(crate::expressions::Cast {
21429 this: arg.clone(),
21430 to: crate::expressions::DataType::VarChar {
21431 length: None,
21432 parenthesized_length: false,
21433 },
21434 trailing_comments: Vec::new(),
21435 double_colon_syntax: false,
21436 format: None,
21437 default: None,
21438 inferred_type: None,
21439 }))
21440 })
21441 .collect();
21442 return Expression::Function(Box::new(
21443 crate::expressions::Function::new(
21444 "CONCAT".to_string(),
21445 new_args,
21446 ),
21447 ));
21448 }
21449 }
21450 expr
21451 }
21452 if let Expression::GroupConcat(gc) = e {
21453 match target {
21454 DialectType::Presto => {
21455 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
21456 let sep = gc.separator.unwrap_or(Expression::string(","));
21457 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
21458 let this = wrap_concat_args_in_varchar_cast(gc.this);
21459 let array_agg =
21460 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
21461 this,
21462 distinct: gc.distinct,
21463 filter: gc.filter,
21464 order_by: gc.order_by.unwrap_or_default(),
21465 name: None,
21466 ignore_nulls: None,
21467 having_max: None,
21468 limit: None,
21469 inferred_type: None,
21470 }));
21471 Ok(Expression::ArrayJoin(Box::new(
21472 crate::expressions::ArrayJoinFunc {
21473 this: array_agg,
21474 separator: sep,
21475 null_replacement: None,
21476 },
21477 )))
21478 }
21479 DialectType::Trino => {
21480 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
21481 let sep = gc.separator.unwrap_or(Expression::string(","));
21482 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
21483 let this = wrap_concat_args_in_varchar_cast(gc.this);
21484 Ok(Expression::ListAgg(Box::new(
21485 crate::expressions::ListAggFunc {
21486 this,
21487 separator: Some(sep),
21488 on_overflow: None,
21489 order_by: gc.order_by,
21490 distinct: gc.distinct,
21491 filter: gc.filter,
21492 inferred_type: None,
21493 },
21494 )))
21495 }
21496 DialectType::PostgreSQL
21497 | DialectType::Redshift
21498 | DialectType::Snowflake
21499 | DialectType::DuckDB
21500 | DialectType::Hive
21501 | DialectType::ClickHouse => {
21502 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
21503 let sep = gc.separator.unwrap_or(Expression::string(","));
21504 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
21505 let this = expand_concat_to_dpipe(gc.this);
21506 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
21507 let order_by = if target == DialectType::PostgreSQL {
21508 gc.order_by.map(|ords| {
21509 ords.into_iter()
21510 .map(|mut o| {
21511 if o.nulls_first.is_none() {
21512 if o.desc {
21513 o.nulls_first = Some(false);
21514 // NULLS LAST
21515 } else {
21516 o.nulls_first = Some(true);
21517 // NULLS FIRST
21518 }
21519 }
21520 o
21521 })
21522 .collect()
21523 })
21524 } else {
21525 gc.order_by
21526 };
21527 Ok(Expression::StringAgg(Box::new(
21528 crate::expressions::StringAggFunc {
21529 this,
21530 separator: Some(sep),
21531 order_by,
21532 distinct: gc.distinct,
21533 filter: gc.filter,
21534 limit: None,
21535 inferred_type: None,
21536 },
21537 )))
21538 }
21539 DialectType::TSQL => {
21540 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
21541 // TSQL doesn't support DISTINCT in STRING_AGG
21542 let sep = gc.separator.unwrap_or(Expression::string(","));
21543 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
21544 let this = expand_concat_to_plus(gc.this);
21545 Ok(Expression::StringAgg(Box::new(
21546 crate::expressions::StringAggFunc {
21547 this,
21548 separator: Some(sep),
21549 order_by: gc.order_by,
21550 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
21551 filter: gc.filter,
21552 limit: None,
21553 inferred_type: None,
21554 },
21555 )))
21556 }
21557 DialectType::SQLite => {
21558 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
21559 // SQLite GROUP_CONCAT doesn't support ORDER BY
21560 // Expand CONCAT(a,b,c) -> a || b || c
21561 let this = expand_concat_to_dpipe(gc.this);
21562 Ok(Expression::GroupConcat(Box::new(
21563 crate::expressions::GroupConcatFunc {
21564 this,
21565 separator: gc.separator,
21566 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
21567 distinct: gc.distinct,
21568 filter: gc.filter,
21569 limit: None,
21570 inferred_type: None,
21571 },
21572 )))
21573 }
21574 DialectType::Spark | DialectType::Databricks => {
21575 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
21576 let sep = gc.separator.unwrap_or(Expression::string(","));
21577 Ok(Expression::ListAgg(Box::new(
21578 crate::expressions::ListAggFunc {
21579 this: gc.this,
21580 separator: Some(sep),
21581 on_overflow: None,
21582 order_by: gc.order_by,
21583 distinct: gc.distinct,
21584 filter: None,
21585 inferred_type: None,
21586 },
21587 )))
21588 }
21589 DialectType::MySQL
21590 | DialectType::SingleStore
21591 | DialectType::StarRocks => {
21592 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
21593 if gc.separator.is_none() {
21594 let mut gc = gc;
21595 gc.separator = Some(Expression::string(","));
21596 Ok(Expression::GroupConcat(gc))
21597 } else {
21598 Ok(Expression::GroupConcat(gc))
21599 }
21600 }
21601 _ => Ok(Expression::GroupConcat(gc)),
21602 }
21603 } else {
21604 Ok(e)
21605 }
21606 }
21607 Action::TempTableHash => {
21608 match e {
21609 Expression::CreateTable(mut ct) => {
21610 // TSQL #table -> TEMPORARY TABLE with # stripped from name
21611 let name = &ct.name.name.name;
21612 if name.starts_with('#') {
21613 ct.name.name.name = name.trim_start_matches('#').to_string();
21614 }
21615 // Set temporary flag
21616 ct.temporary = true;
21617 Ok(Expression::CreateTable(ct))
21618 }
21619 Expression::Table(mut tr) => {
21620 // Strip # from table references
21621 let name = &tr.name.name;
21622 if name.starts_with('#') {
21623 tr.name.name = name.trim_start_matches('#').to_string();
21624 }
21625 Ok(Expression::Table(tr))
21626 }
21627 Expression::DropTable(mut dt) => {
21628 // Strip # from DROP TABLE names
21629 for table_ref in &mut dt.names {
21630 if table_ref.name.name.starts_with('#') {
21631 table_ref.name.name =
21632 table_ref.name.name.trim_start_matches('#').to_string();
21633 }
21634 }
21635 Ok(Expression::DropTable(dt))
21636 }
21637 _ => Ok(e),
21638 }
21639 }
21640 Action::NvlClearOriginal => {
21641 if let Expression::Nvl(mut f) = e {
21642 f.original_name = None;
21643 Ok(Expression::Nvl(f))
21644 } else {
21645 Ok(e)
21646 }
21647 }
21648 Action::HiveCastToTryCast => {
21649 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
21650 if let Expression::Cast(mut c) = e {
21651 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
21652 // (Spark's TIMESTAMP is always timezone-aware)
21653 if matches!(target, DialectType::DuckDB)
21654 && matches!(source, DialectType::Spark | DialectType::Databricks)
21655 && matches!(
21656 c.to,
21657 DataType::Timestamp {
21658 timezone: false,
21659 ..
21660 }
21661 )
21662 {
21663 c.to = DataType::Custom {
21664 name: "TIMESTAMPTZ".to_string(),
21665 };
21666 }
21667 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
21668 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
21669 if matches!(target, DialectType::Databricks | DialectType::Spark)
21670 && matches!(
21671 source,
21672 DialectType::Spark | DialectType::Databricks | DialectType::Hive
21673 )
21674 && Self::has_varchar_char_type(&c.to)
21675 {
21676 c.to = Self::normalize_varchar_to_string(c.to);
21677 }
21678 Ok(Expression::TryCast(c))
21679 } else {
21680 Ok(e)
21681 }
21682 }
21683 Action::XorExpand => {
21684 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
21685 // Snowflake: use BOOLXOR(a, b) instead
21686 if let Expression::Xor(xor) = e {
21687 // Collect all XOR operands
21688 let mut operands = Vec::new();
21689 if let Some(this) = xor.this {
21690 operands.push(*this);
21691 }
21692 if let Some(expr) = xor.expression {
21693 operands.push(*expr);
21694 }
21695 operands.extend(xor.expressions);
21696
21697 // Snowflake: use BOOLXOR(a, b)
21698 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
21699 let a = operands.remove(0);
21700 let b = operands.remove(0);
21701 return Ok(Expression::Function(Box::new(Function::new(
21702 "BOOLXOR".to_string(),
21703 vec![a, b],
21704 ))));
21705 }
21706
21707 // Helper to build (a AND NOT b) OR (NOT a AND b)
21708 let make_xor = |a: Expression, b: Expression| -> Expression {
21709 let not_b = Expression::Not(Box::new(
21710 crate::expressions::UnaryOp::new(b.clone()),
21711 ));
21712 let not_a = Expression::Not(Box::new(
21713 crate::expressions::UnaryOp::new(a.clone()),
21714 ));
21715 let left_and = Expression::And(Box::new(BinaryOp {
21716 left: a,
21717 right: Expression::Paren(Box::new(Paren {
21718 this: not_b,
21719 trailing_comments: Vec::new(),
21720 })),
21721 left_comments: Vec::new(),
21722 operator_comments: Vec::new(),
21723 trailing_comments: Vec::new(),
21724 inferred_type: None,
21725 }));
21726 let right_and = Expression::And(Box::new(BinaryOp {
21727 left: Expression::Paren(Box::new(Paren {
21728 this: not_a,
21729 trailing_comments: Vec::new(),
21730 })),
21731 right: b,
21732 left_comments: Vec::new(),
21733 operator_comments: Vec::new(),
21734 trailing_comments: Vec::new(),
21735 inferred_type: None,
21736 }));
21737 Expression::Or(Box::new(BinaryOp {
21738 left: Expression::Paren(Box::new(Paren {
21739 this: left_and,
21740 trailing_comments: Vec::new(),
21741 })),
21742 right: Expression::Paren(Box::new(Paren {
21743 this: right_and,
21744 trailing_comments: Vec::new(),
21745 })),
21746 left_comments: Vec::new(),
21747 operator_comments: Vec::new(),
21748 trailing_comments: Vec::new(),
21749 inferred_type: None,
21750 }))
21751 };
21752
21753 if operands.len() >= 2 {
21754 let mut result = make_xor(operands.remove(0), operands.remove(0));
21755 for operand in operands {
21756 result = make_xor(result, operand);
21757 }
21758 Ok(result)
21759 } else if operands.len() == 1 {
21760 Ok(operands.remove(0))
21761 } else {
21762 // No operands - return FALSE (shouldn't happen)
21763 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
21764 value: false,
21765 }))
21766 }
21767 } else {
21768 Ok(e)
21769 }
21770 }
21771 Action::DatePartUnquote => {
21772 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
21773 // Convert the quoted string first arg to a bare Column/Identifier
21774 if let Expression::Function(mut f) = e {
21775 if let Some(Expression::Literal(lit)) = f.args.first() {
21776 if let crate::expressions::Literal::String(s) = lit.as_ref() {
21777 let bare_name = s.to_ascii_lowercase();
21778 f.args[0] =
21779 Expression::Column(Box::new(crate::expressions::Column {
21780 name: Identifier::new(bare_name),
21781 table: None,
21782 join_mark: false,
21783 trailing_comments: Vec::new(),
21784 span: None,
21785 inferred_type: None,
21786 }));
21787 }
21788 }
21789 Ok(Expression::Function(f))
21790 } else {
21791 Ok(e)
21792 }
21793 }
21794 Action::ArrayLengthConvert => {
21795 // Extract the argument from the expression
21796 let arg = match e {
21797 Expression::Cardinality(ref f) => f.this.clone(),
21798 Expression::ArrayLength(ref f) => f.this.clone(),
21799 Expression::ArraySize(ref f) => f.this.clone(),
21800 _ => return Ok(e),
21801 };
21802 match target {
21803 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
21804 Ok(Expression::Function(Box::new(Function::new(
21805 "SIZE".to_string(),
21806 vec![arg],
21807 ))))
21808 }
21809 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21810 Ok(Expression::Cardinality(Box::new(
21811 crate::expressions::UnaryFunc::new(arg),
21812 )))
21813 }
21814 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
21815 crate::expressions::UnaryFunc::new(arg),
21816 ))),
21817 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
21818 crate::expressions::UnaryFunc::new(arg),
21819 ))),
21820 DialectType::PostgreSQL | DialectType::Redshift => {
21821 // PostgreSQL ARRAY_LENGTH requires dimension arg
21822 Ok(Expression::Function(Box::new(Function::new(
21823 "ARRAY_LENGTH".to_string(),
21824 vec![arg, Expression::number(1)],
21825 ))))
21826 }
21827 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
21828 crate::expressions::UnaryFunc::new(arg),
21829 ))),
21830 _ => Ok(e), // Keep original
21831 }
21832 }
21833
21834 Action::JsonExtractToArrow => {
21835 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
21836 if let Expression::JsonExtract(mut f) = e {
21837 f.arrow_syntax = true;
21838 // Transform path: convert bracket notation to dot notation
21839 // SQLite strips wildcards, DuckDB preserves them
21840 if let Expression::Literal(ref lit) = f.path {
21841 if let Literal::String(ref s) = lit.as_ref() {
21842 let mut transformed = s.clone();
21843 if matches!(target, DialectType::SQLite) {
21844 transformed = Self::strip_json_wildcards(&transformed);
21845 }
21846 transformed = Self::bracket_to_dot_notation(&transformed);
21847 if transformed != *s {
21848 f.path = Expression::string(&transformed);
21849 }
21850 }
21851 }
21852 Ok(Expression::JsonExtract(f))
21853 } else {
21854 Ok(e)
21855 }
21856 }
21857
21858 Action::JsonExtractToGetJsonObject => {
21859 if let Expression::JsonExtract(f) = e {
21860 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
21861 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
21862 // Use proper decomposition that handles brackets
21863 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path {
21864 if let Literal::String(ref s) = lit.as_ref() {
21865 let parts = Self::decompose_json_path(s);
21866 parts.into_iter().map(|k| Expression::string(&k)).collect()
21867 } else {
21868 vec![]
21869 }
21870 } else {
21871 vec![f.path]
21872 };
21873 let func_name = if matches!(target, DialectType::Redshift) {
21874 "JSON_EXTRACT_PATH_TEXT"
21875 } else {
21876 "JSON_EXTRACT_PATH"
21877 };
21878 let mut args = vec![f.this];
21879 args.extend(keys);
21880 Ok(Expression::Function(Box::new(Function::new(
21881 func_name.to_string(),
21882 args,
21883 ))))
21884 } else {
21885 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
21886 // Convert bracket double quotes to single quotes
21887 let path = if let Expression::Literal(ref lit) = f.path {
21888 if let Literal::String(ref s) = lit.as_ref() {
21889 let normalized = Self::bracket_to_single_quotes(s);
21890 if normalized != *s {
21891 Expression::string(&normalized)
21892 } else {
21893 f.path.clone()
21894 }
21895 } else {
21896 f.path.clone()
21897 }
21898 } else {
21899 f.path.clone()
21900 };
21901 Ok(Expression::Function(Box::new(Function::new(
21902 "GET_JSON_OBJECT".to_string(),
21903 vec![f.this, path],
21904 ))))
21905 }
21906 } else {
21907 Ok(e)
21908 }
21909 }
21910
21911 Action::JsonExtractScalarToGetJsonObject => {
21912 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
21913 if let Expression::JsonExtractScalar(f) = e {
21914 Ok(Expression::Function(Box::new(Function::new(
21915 "GET_JSON_OBJECT".to_string(),
21916 vec![f.this, f.path],
21917 ))))
21918 } else {
21919 Ok(e)
21920 }
21921 }
21922
21923 Action::JsonExtractToTsql => {
21924 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
21925 let (this, path) = match e {
21926 Expression::JsonExtract(f) => (f.this, f.path),
21927 Expression::JsonExtractScalar(f) => (f.this, f.path),
21928 _ => return Ok(e),
21929 };
21930 // Transform path: strip wildcards, convert bracket notation to dot notation
21931 let transformed_path = if let Expression::Literal(ref lit) = path {
21932 if let Literal::String(ref s) = lit.as_ref() {
21933 let stripped = Self::strip_json_wildcards(s);
21934 let dotted = Self::bracket_to_dot_notation(&stripped);
21935 Expression::string(&dotted)
21936 } else {
21937 path.clone()
21938 }
21939 } else {
21940 path
21941 };
21942 let json_query = Expression::Function(Box::new(Function::new(
21943 "JSON_QUERY".to_string(),
21944 vec![this.clone(), transformed_path.clone()],
21945 )));
21946 let json_value = Expression::Function(Box::new(Function::new(
21947 "JSON_VALUE".to_string(),
21948 vec![this, transformed_path],
21949 )));
21950 Ok(Expression::Function(Box::new(Function::new(
21951 "ISNULL".to_string(),
21952 vec![json_query, json_value],
21953 ))))
21954 }
21955
21956 Action::JsonExtractToClickHouse => {
21957 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
21958 let (this, path) = match e {
21959 Expression::JsonExtract(f) => (f.this, f.path),
21960 Expression::JsonExtractScalar(f) => (f.this, f.path),
21961 _ => return Ok(e),
21962 };
21963 let args: Vec<Expression> = if let Expression::Literal(lit) = path {
21964 if let Literal::String(ref s) = lit.as_ref() {
21965 let parts = Self::decompose_json_path(s);
21966 let mut result = vec![this];
21967 for part in parts {
21968 // ClickHouse uses 1-based integer indices for array access
21969 if let Ok(idx) = part.parse::<i64>() {
21970 result.push(Expression::number(idx + 1));
21971 } else {
21972 result.push(Expression::string(&part));
21973 }
21974 }
21975 result
21976 } else {
21977 vec![]
21978 }
21979 } else {
21980 vec![this, path]
21981 };
21982 Ok(Expression::Function(Box::new(Function::new(
21983 "JSONExtractString".to_string(),
21984 args,
21985 ))))
21986 }
21987
21988 Action::JsonExtractScalarConvert => {
21989 // JSON_EXTRACT_SCALAR -> target-specific
21990 if let Expression::JsonExtractScalar(f) = e {
21991 match target {
21992 DialectType::PostgreSQL | DialectType::Redshift => {
21993 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
21994 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path
21995 {
21996 if let Literal::String(ref s) = lit.as_ref() {
21997 let parts = Self::decompose_json_path(s);
21998 parts.into_iter().map(|k| Expression::string(&k)).collect()
21999 } else {
22000 vec![]
22001 }
22002 } else {
22003 vec![f.path]
22004 };
22005 let mut args = vec![f.this];
22006 args.extend(keys);
22007 Ok(Expression::Function(Box::new(Function::new(
22008 "JSON_EXTRACT_PATH_TEXT".to_string(),
22009 args,
22010 ))))
22011 }
22012 DialectType::Snowflake => {
22013 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
22014 let stripped_path = if let Expression::Literal(ref lit) = f.path {
22015 if let Literal::String(ref s) = lit.as_ref() {
22016 let stripped = Self::strip_json_dollar_prefix(s);
22017 Expression::string(&stripped)
22018 } else {
22019 f.path.clone()
22020 }
22021 } else {
22022 f.path
22023 };
22024 Ok(Expression::Function(Box::new(Function::new(
22025 "JSON_EXTRACT_PATH_TEXT".to_string(),
22026 vec![f.this, stripped_path],
22027 ))))
22028 }
22029 DialectType::SQLite | DialectType::DuckDB => {
22030 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
22031 Ok(Expression::JsonExtractScalar(Box::new(
22032 crate::expressions::JsonExtractFunc {
22033 this: f.this,
22034 path: f.path,
22035 returning: f.returning,
22036 arrow_syntax: true,
22037 hash_arrow_syntax: false,
22038 wrapper_option: None,
22039 quotes_option: None,
22040 on_scalar_string: false,
22041 on_error: None,
22042 },
22043 )))
22044 }
22045 _ => Ok(Expression::JsonExtractScalar(f)),
22046 }
22047 } else {
22048 Ok(e)
22049 }
22050 }
22051
22052 Action::JsonPathNormalize => {
22053 // Normalize JSON path format for BigQuery, MySQL, etc.
22054 if let Expression::JsonExtract(mut f) = e {
22055 if let Expression::Literal(ref lit) = f.path {
22056 if let Literal::String(ref s) = lit.as_ref() {
22057 let mut normalized = s.clone();
22058 // Convert bracket notation and handle wildcards per dialect
22059 match target {
22060 DialectType::BigQuery => {
22061 // BigQuery strips wildcards and uses single quotes in brackets
22062 normalized = Self::strip_json_wildcards(&normalized);
22063 normalized = Self::bracket_to_single_quotes(&normalized);
22064 }
22065 DialectType::MySQL => {
22066 // MySQL preserves wildcards, converts brackets to dot notation
22067 normalized = Self::bracket_to_dot_notation(&normalized);
22068 }
22069 _ => {}
22070 }
22071 if normalized != *s {
22072 f.path = Expression::string(&normalized);
22073 }
22074 }
22075 }
22076 Ok(Expression::JsonExtract(f))
22077 } else {
22078 Ok(e)
22079 }
22080 }
22081
22082 Action::JsonQueryValueConvert => {
22083 // JsonQuery/JsonValue -> target-specific
22084 let (f, is_query) = match e {
22085 Expression::JsonQuery(f) => (f, true),
22086 Expression::JsonValue(f) => (f, false),
22087 _ => return Ok(e),
22088 };
22089 match target {
22090 DialectType::TSQL | DialectType::Fabric => {
22091 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
22092 let json_query = Expression::Function(Box::new(Function::new(
22093 "JSON_QUERY".to_string(),
22094 vec![f.this.clone(), f.path.clone()],
22095 )));
22096 let json_value = Expression::Function(Box::new(Function::new(
22097 "JSON_VALUE".to_string(),
22098 vec![f.this, f.path],
22099 )));
22100 Ok(Expression::Function(Box::new(Function::new(
22101 "ISNULL".to_string(),
22102 vec![json_query, json_value],
22103 ))))
22104 }
22105 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
22106 Ok(Expression::Function(Box::new(Function::new(
22107 "GET_JSON_OBJECT".to_string(),
22108 vec![f.this, f.path],
22109 ))))
22110 }
22111 DialectType::PostgreSQL | DialectType::Redshift => {
22112 Ok(Expression::Function(Box::new(Function::new(
22113 "JSON_EXTRACT_PATH_TEXT".to_string(),
22114 vec![f.this, f.path],
22115 ))))
22116 }
22117 DialectType::DuckDB | DialectType::SQLite => {
22118 // json -> path arrow syntax
22119 Ok(Expression::JsonExtract(Box::new(
22120 crate::expressions::JsonExtractFunc {
22121 this: f.this,
22122 path: f.path,
22123 returning: f.returning,
22124 arrow_syntax: true,
22125 hash_arrow_syntax: false,
22126 wrapper_option: f.wrapper_option,
22127 quotes_option: f.quotes_option,
22128 on_scalar_string: f.on_scalar_string,
22129 on_error: f.on_error,
22130 },
22131 )))
22132 }
22133 DialectType::Snowflake => {
22134 // GET_PATH(PARSE_JSON(json), 'path')
22135 // Strip $. prefix from path
22136 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
22137 let json_expr = match &f.this {
22138 Expression::Function(ref inner_f)
22139 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
22140 {
22141 f.this
22142 }
22143 Expression::ParseJson(_) => {
22144 // Already a ParseJson expression, which generates as PARSE_JSON(...)
22145 f.this
22146 }
22147 _ => Expression::Function(Box::new(Function::new(
22148 "PARSE_JSON".to_string(),
22149 vec![f.this],
22150 ))),
22151 };
22152 let path_str = match &f.path {
22153 Expression::Literal(lit)
22154 if matches!(lit.as_ref(), Literal::String(_)) =>
22155 {
22156 let Literal::String(s) = lit.as_ref() else {
22157 unreachable!()
22158 };
22159 let stripped = s.strip_prefix("$.").unwrap_or(s);
22160 Expression::Literal(Box::new(Literal::String(
22161 stripped.to_string(),
22162 )))
22163 }
22164 other => other.clone(),
22165 };
22166 Ok(Expression::Function(Box::new(Function::new(
22167 "GET_PATH".to_string(),
22168 vec![json_expr, path_str],
22169 ))))
22170 }
22171 _ => {
22172 // Default: keep as JSON_QUERY/JSON_VALUE function
22173 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
22174 Ok(Expression::Function(Box::new(Function::new(
22175 func_name.to_string(),
22176 vec![f.this, f.path],
22177 ))))
22178 }
22179 }
22180 }
22181
22182 Action::JsonLiteralToJsonParse => {
22183 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
22184 // Also DuckDB CAST(x AS JSON) -> JSON_PARSE(x) for Trino/Presto/Athena
22185 if let Expression::Cast(c) = e {
22186 let func_name = if matches!(target, DialectType::Snowflake) {
22187 "PARSE_JSON"
22188 } else {
22189 "JSON_PARSE"
22190 };
22191 Ok(Expression::Function(Box::new(Function::new(
22192 func_name.to_string(),
22193 vec![c.this],
22194 ))))
22195 } else {
22196 Ok(e)
22197 }
22198 }
22199
22200 Action::DuckDBTryCastJsonToTryJsonParse => {
22201 // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
22202 if let Expression::TryCast(c) = e {
22203 let json_parse = Expression::Function(Box::new(Function::new(
22204 "JSON_PARSE".to_string(),
22205 vec![c.this],
22206 )));
22207 Ok(Expression::Function(Box::new(Function::new(
22208 "TRY".to_string(),
22209 vec![json_parse],
22210 ))))
22211 } else {
22212 Ok(e)
22213 }
22214 }
22215
22216 Action::DuckDBJsonFuncToJsonParse => {
22217 // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
22218 if let Expression::Function(f) = e {
22219 let args = f.args;
22220 Ok(Expression::Function(Box::new(Function::new(
22221 "JSON_PARSE".to_string(),
22222 args,
22223 ))))
22224 } else {
22225 Ok(e)
22226 }
22227 }
22228
22229 Action::DuckDBJsonValidToIsJson => {
22230 // DuckDB json_valid(x) -> x IS JSON (SQL:2016 predicate) for Trino/Presto/Athena
22231 if let Expression::Function(mut f) = e {
22232 let arg = f.args.remove(0);
22233 Ok(Expression::IsJson(Box::new(
22234 crate::expressions::IsJson {
22235 this: arg,
22236 json_type: None,
22237 unique_keys: None,
22238 negated: false,
22239 },
22240 )))
22241 } else {
22242 Ok(e)
22243 }
22244 }
22245
22246 Action::AtTimeZoneConvert => {
22247 // AT TIME ZONE -> target-specific conversion
22248 if let Expression::AtTimeZone(atz) = e {
22249 match target {
22250 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22251 Ok(Expression::Function(Box::new(Function::new(
22252 "AT_TIMEZONE".to_string(),
22253 vec![atz.this, atz.zone],
22254 ))))
22255 }
22256 DialectType::Spark | DialectType::Databricks => {
22257 Ok(Expression::Function(Box::new(Function::new(
22258 "FROM_UTC_TIMESTAMP".to_string(),
22259 vec![atz.this, atz.zone],
22260 ))))
22261 }
22262 DialectType::Snowflake => {
22263 // CONVERT_TIMEZONE('zone', expr)
22264 Ok(Expression::Function(Box::new(Function::new(
22265 "CONVERT_TIMEZONE".to_string(),
22266 vec![atz.zone, atz.this],
22267 ))))
22268 }
22269 DialectType::BigQuery => {
22270 // TIMESTAMP(DATETIME(expr, 'zone'))
22271 let datetime_call = Expression::Function(Box::new(Function::new(
22272 "DATETIME".to_string(),
22273 vec![atz.this, atz.zone],
22274 )));
22275 Ok(Expression::Function(Box::new(Function::new(
22276 "TIMESTAMP".to_string(),
22277 vec![datetime_call],
22278 ))))
22279 }
22280 _ => Ok(Expression::Function(Box::new(Function::new(
22281 "AT_TIMEZONE".to_string(),
22282 vec![atz.this, atz.zone],
22283 )))),
22284 }
22285 } else {
22286 Ok(e)
22287 }
22288 }
22289
22290 Action::DayOfWeekConvert => {
22291 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
22292 if let Expression::DayOfWeek(f) = e {
22293 match target {
22294 DialectType::DuckDB => Ok(Expression::Function(Box::new(
22295 Function::new("ISODOW".to_string(), vec![f.this]),
22296 ))),
22297 DialectType::Spark | DialectType::Databricks => {
22298 // ((DAYOFWEEK(x) % 7) + 1)
22299 let dayofweek = Expression::Function(Box::new(Function::new(
22300 "DAYOFWEEK".to_string(),
22301 vec![f.this],
22302 )));
22303 let modulo = Expression::Mod(Box::new(BinaryOp {
22304 left: dayofweek,
22305 right: Expression::number(7),
22306 left_comments: Vec::new(),
22307 operator_comments: Vec::new(),
22308 trailing_comments: Vec::new(),
22309 inferred_type: None,
22310 }));
22311 let paren_mod = Expression::Paren(Box::new(Paren {
22312 this: modulo,
22313 trailing_comments: Vec::new(),
22314 }));
22315 let add_one = Expression::Add(Box::new(BinaryOp {
22316 left: paren_mod,
22317 right: Expression::number(1),
22318 left_comments: Vec::new(),
22319 operator_comments: Vec::new(),
22320 trailing_comments: Vec::new(),
22321 inferred_type: None,
22322 }));
22323 Ok(Expression::Paren(Box::new(Paren {
22324 this: add_one,
22325 trailing_comments: Vec::new(),
22326 })))
22327 }
22328 _ => Ok(Expression::DayOfWeek(f)),
22329 }
22330 } else {
22331 Ok(e)
22332 }
22333 }
22334
22335 Action::MaxByMinByConvert => {
22336 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
22337 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
22338 // Handle both Expression::Function and Expression::AggregateFunction
22339 let (is_max, args) = match &e {
22340 Expression::Function(f) => {
22341 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
22342 }
22343 Expression::AggregateFunction(af) => {
22344 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
22345 }
22346 _ => return Ok(e),
22347 };
22348 match target {
22349 DialectType::ClickHouse => {
22350 let name = if is_max { "argMax" } else { "argMin" };
22351 let mut args = args;
22352 args.truncate(2);
22353 Ok(Expression::Function(Box::new(Function::new(
22354 name.to_string(),
22355 args,
22356 ))))
22357 }
22358 DialectType::DuckDB => {
22359 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
22360 Ok(Expression::Function(Box::new(Function::new(
22361 name.to_string(),
22362 args,
22363 ))))
22364 }
22365 DialectType::Spark | DialectType::Databricks => {
22366 let mut args = args;
22367 args.truncate(2);
22368 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
22369 Ok(Expression::Function(Box::new(Function::new(
22370 name.to_string(),
22371 args,
22372 ))))
22373 }
22374 _ => Ok(e),
22375 }
22376 }
22377
22378 Action::ElementAtConvert => {
22379 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
22380 let (arr, idx) = if let Expression::ElementAt(bf) = e {
22381 (bf.this, bf.expression)
22382 } else if let Expression::Function(ref f) = e {
22383 if f.args.len() >= 2 {
22384 if let Expression::Function(f) = e {
22385 let mut args = f.args;
22386 let arr = args.remove(0);
22387 let idx = args.remove(0);
22388 (arr, idx)
22389 } else {
22390 unreachable!("outer condition already matched Expression::Function")
22391 }
22392 } else {
22393 return Ok(e);
22394 }
22395 } else {
22396 return Ok(e);
22397 };
22398 match target {
22399 DialectType::PostgreSQL => {
22400 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
22401 let arr_expr = Expression::Paren(Box::new(Paren {
22402 this: arr,
22403 trailing_comments: vec![],
22404 }));
22405 Ok(Expression::Subscript(Box::new(
22406 crate::expressions::Subscript {
22407 this: arr_expr,
22408 index: idx,
22409 },
22410 )))
22411 }
22412 DialectType::BigQuery => {
22413 // BigQuery: convert ARRAY[...] to bare [...] for subscript
22414 let arr_expr = match arr {
22415 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
22416 crate::expressions::ArrayConstructor {
22417 expressions: af.expressions,
22418 bracket_notation: true,
22419 use_list_keyword: false,
22420 },
22421 )),
22422 other => other,
22423 };
22424 let safe_ordinal = Expression::Function(Box::new(Function::new(
22425 "SAFE_ORDINAL".to_string(),
22426 vec![idx],
22427 )));
22428 Ok(Expression::Subscript(Box::new(
22429 crate::expressions::Subscript {
22430 this: arr_expr,
22431 index: safe_ordinal,
22432 },
22433 )))
22434 }
22435 _ => Ok(Expression::Function(Box::new(Function::new(
22436 "ELEMENT_AT".to_string(),
22437 vec![arr, idx],
22438 )))),
22439 }
22440 }
22441
22442 Action::CurrentUserParens => {
22443 // CURRENT_USER -> CURRENT_USER() for Snowflake
22444 Ok(Expression::Function(Box::new(Function::new(
22445 "CURRENT_USER".to_string(),
22446 vec![],
22447 ))))
22448 }
22449
22450 Action::ArrayAggToCollectList => {
22451 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
22452 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
22453 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
22454 match e {
22455 Expression::AggregateFunction(mut af) => {
22456 let is_simple =
22457 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
22458 let args = if af.args.is_empty() {
22459 vec![]
22460 } else {
22461 vec![af.args[0].clone()]
22462 };
22463 af.name = "COLLECT_LIST".to_string();
22464 af.args = args;
22465 if is_simple {
22466 af.order_by = Vec::new();
22467 }
22468 Ok(Expression::AggregateFunction(af))
22469 }
22470 Expression::ArrayAgg(agg) => {
22471 let is_simple =
22472 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
22473 Ok(Expression::AggregateFunction(Box::new(
22474 crate::expressions::AggregateFunction {
22475 name: "COLLECT_LIST".to_string(),
22476 args: vec![agg.this.clone()],
22477 distinct: agg.distinct,
22478 filter: agg.filter.clone(),
22479 order_by: if is_simple {
22480 Vec::new()
22481 } else {
22482 agg.order_by.clone()
22483 },
22484 limit: agg.limit.clone(),
22485 ignore_nulls: agg.ignore_nulls,
22486 inferred_type: None,
22487 },
22488 )))
22489 }
22490 _ => Ok(e),
22491 }
22492 }
22493
22494 Action::ArraySyntaxConvert => {
22495 match e {
22496 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
22497 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
22498 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
22499 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
22500 expressions: arr.expressions,
22501 bracket_notation: true,
22502 use_list_keyword: false,
22503 })),
22504 ),
22505 // ARRAY(y) function style -> ArrayFunc for target dialect
22506 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
22507 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
22508 let bracket = matches!(
22509 target,
22510 DialectType::BigQuery
22511 | DialectType::DuckDB
22512 | DialectType::Snowflake
22513 | DialectType::ClickHouse
22514 | DialectType::StarRocks
22515 );
22516 Ok(Expression::ArrayFunc(Box::new(
22517 crate::expressions::ArrayConstructor {
22518 expressions: f.args,
22519 bracket_notation: bracket,
22520 use_list_keyword: false,
22521 },
22522 )))
22523 }
22524 _ => Ok(e),
22525 }
22526 }
22527
22528 Action::CastToJsonForSpark => {
22529 // CAST(x AS JSON) -> TO_JSON(x) for Spark
22530 if let Expression::Cast(c) = e {
22531 Ok(Expression::Function(Box::new(Function::new(
22532 "TO_JSON".to_string(),
22533 vec![c.this],
22534 ))))
22535 } else {
22536 Ok(e)
22537 }
22538 }
22539
22540 Action::CastJsonToFromJson => {
22541 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
22542 if let Expression::Cast(c) = e {
22543 // Extract the string literal from ParseJson
22544 let literal_expr = if let Expression::ParseJson(pj) = c.this {
22545 pj.this
22546 } else {
22547 c.this
22548 };
22549 // Convert the target DataType to Spark's type string format
22550 let type_str = Self::data_type_to_spark_string(&c.to);
22551 Ok(Expression::Function(Box::new(Function::new(
22552 "FROM_JSON".to_string(),
22553 vec![
22554 literal_expr,
22555 Expression::Literal(Box::new(Literal::String(type_str))),
22556 ],
22557 ))))
22558 } else {
22559 Ok(e)
22560 }
22561 }
22562
22563 Action::ToJsonConvert => {
22564 // TO_JSON(x) -> target-specific conversion
22565 if let Expression::ToJson(f) = e {
22566 let arg = f.this;
22567 match target {
22568 DialectType::Presto | DialectType::Trino => {
22569 // JSON_FORMAT(CAST(x AS JSON))
22570 let cast_json = Expression::Cast(Box::new(Cast {
22571 this: arg,
22572 to: DataType::Custom {
22573 name: "JSON".to_string(),
22574 },
22575 trailing_comments: vec![],
22576 double_colon_syntax: false,
22577 format: None,
22578 default: None,
22579 inferred_type: None,
22580 }));
22581 Ok(Expression::Function(Box::new(Function::new(
22582 "JSON_FORMAT".to_string(),
22583 vec![cast_json],
22584 ))))
22585 }
22586 DialectType::BigQuery => Ok(Expression::Function(Box::new(
22587 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
22588 ))),
22589 DialectType::DuckDB => {
22590 // CAST(TO_JSON(x) AS TEXT)
22591 let to_json =
22592 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
22593 this: arg,
22594 original_name: None,
22595 inferred_type: None,
22596 }));
22597 Ok(Expression::Cast(Box::new(Cast {
22598 this: to_json,
22599 to: DataType::Text,
22600 trailing_comments: vec![],
22601 double_colon_syntax: false,
22602 format: None,
22603 default: None,
22604 inferred_type: None,
22605 })))
22606 }
22607 _ => Ok(Expression::ToJson(Box::new(
22608 crate::expressions::UnaryFunc {
22609 this: arg,
22610 original_name: None,
22611 inferred_type: None,
22612 },
22613 ))),
22614 }
22615 } else {
22616 Ok(e)
22617 }
22618 }
22619
22620 Action::VarianceToClickHouse => {
22621 if let Expression::Variance(f) = e {
22622 Ok(Expression::Function(Box::new(Function::new(
22623 "varSamp".to_string(),
22624 vec![f.this],
22625 ))))
22626 } else {
22627 Ok(e)
22628 }
22629 }
22630
22631 Action::StddevToClickHouse => {
22632 if let Expression::Stddev(f) = e {
22633 Ok(Expression::Function(Box::new(Function::new(
22634 "stddevSamp".to_string(),
22635 vec![f.this],
22636 ))))
22637 } else {
22638 Ok(e)
22639 }
22640 }
22641
22642 Action::ApproxQuantileConvert => {
22643 if let Expression::ApproxQuantile(aq) = e {
22644 let mut args = vec![*aq.this];
22645 if let Some(q) = aq.quantile {
22646 args.push(*q);
22647 }
22648 Ok(Expression::Function(Box::new(Function::new(
22649 "APPROX_PERCENTILE".to_string(),
22650 args,
22651 ))))
22652 } else {
22653 Ok(e)
22654 }
22655 }
22656
22657 Action::DollarParamConvert => {
22658 if let Expression::Parameter(p) = e {
22659 Ok(Expression::Parameter(Box::new(
22660 crate::expressions::Parameter {
22661 name: p.name,
22662 index: p.index,
22663 style: crate::expressions::ParameterStyle::At,
22664 quoted: p.quoted,
22665 string_quoted: p.string_quoted,
22666 expression: p.expression,
22667 },
22668 )))
22669 } else {
22670 Ok(e)
22671 }
22672 }
22673
22674 Action::EscapeStringNormalize => {
22675 if let Expression::Literal(ref lit) = e {
22676 if let Literal::EscapeString(s) = lit.as_ref() {
22677 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
22678 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
22679 s[2..].to_string()
22680 } else {
22681 s.clone()
22682 };
22683 let normalized = stripped
22684 .replace('\n', "\\n")
22685 .replace('\r', "\\r")
22686 .replace('\t', "\\t");
22687 match target {
22688 DialectType::BigQuery => {
22689 // BigQuery: e'...' -> CAST(b'...' AS STRING)
22690 // Use Raw for the b'...' part to avoid double-escaping
22691 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
22692 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
22693 }
22694 _ => Ok(Expression::Literal(Box::new(Literal::EscapeString(
22695 normalized,
22696 )))),
22697 }
22698 } else {
22699 Ok(e)
22700 }
22701 } else {
22702 Ok(e)
22703 }
22704 }
22705
22706 Action::StraightJoinCase => {
22707 // straight_join: keep lowercase for DuckDB, quote for MySQL
22708 if let Expression::Column(col) = e {
22709 if col.name.name == "STRAIGHT_JOIN" {
22710 let mut new_col = col;
22711 new_col.name.name = "straight_join".to_string();
22712 if matches!(target, DialectType::MySQL) {
22713 // MySQL: needs quoting since it's a reserved keyword
22714 new_col.name.quoted = true;
22715 }
22716 Ok(Expression::Column(new_col))
22717 } else {
22718 Ok(Expression::Column(col))
22719 }
22720 } else {
22721 Ok(e)
22722 }
22723 }
22724
22725 Action::TablesampleReservoir => {
22726 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
22727 if let Expression::TableSample(mut ts) = e {
22728 if let Some(ref mut sample) = ts.sample {
22729 sample.method = crate::expressions::SampleMethod::Reservoir;
22730 sample.explicit_method = true;
22731 }
22732 Ok(Expression::TableSample(ts))
22733 } else {
22734 Ok(e)
22735 }
22736 }
22737
22738 Action::TablesampleSnowflakeStrip => {
22739 // Strip method and PERCENT for Snowflake target from non-Snowflake source
22740 match e {
22741 Expression::TableSample(mut ts) => {
22742 if let Some(ref mut sample) = ts.sample {
22743 sample.suppress_method_output = true;
22744 sample.unit_after_size = false;
22745 sample.is_percent = false;
22746 }
22747 Ok(Expression::TableSample(ts))
22748 }
22749 Expression::Table(mut t) => {
22750 if let Some(ref mut sample) = t.table_sample {
22751 sample.suppress_method_output = true;
22752 sample.unit_after_size = false;
22753 sample.is_percent = false;
22754 }
22755 Ok(Expression::Table(t))
22756 }
22757 _ => Ok(e),
22758 }
22759 }
22760
22761 Action::FirstToAnyValue => {
22762 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
22763 if let Expression::First(mut agg) = e {
22764 agg.ignore_nulls = None;
22765 agg.name = Some("ANY_VALUE".to_string());
22766 Ok(Expression::AnyValue(agg))
22767 } else {
22768 Ok(e)
22769 }
22770 }
22771
22772 Action::ArrayIndexConvert => {
22773 // Subscript index: 1-based to 0-based for BigQuery
22774 if let Expression::Subscript(mut sub) = e {
22775 if let Expression::Literal(ref lit) = sub.index {
22776 if let Literal::Number(ref n) = lit.as_ref() {
22777 if let Ok(val) = n.parse::<i64>() {
22778 sub.index = Expression::Literal(Box::new(Literal::Number(
22779 (val - 1).to_string(),
22780 )));
22781 }
22782 }
22783 }
22784 Ok(Expression::Subscript(sub))
22785 } else {
22786 Ok(e)
22787 }
22788 }
22789
22790 Action::AnyValueIgnoreNulls => {
22791 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
22792 if let Expression::AnyValue(mut av) = e {
22793 if av.ignore_nulls.is_none() {
22794 av.ignore_nulls = Some(true);
22795 }
22796 Ok(Expression::AnyValue(av))
22797 } else {
22798 Ok(e)
22799 }
22800 }
22801
22802 Action::BigQueryNullsOrdering => {
22803 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
22804 if let Expression::WindowFunction(mut wf) = e {
22805 for o in &mut wf.over.order_by {
22806 o.nulls_first = None;
22807 }
22808 Ok(Expression::WindowFunction(wf))
22809 } else if let Expression::Ordered(mut o) = e {
22810 o.nulls_first = None;
22811 Ok(Expression::Ordered(o))
22812 } else {
22813 Ok(e)
22814 }
22815 }
22816
22817 Action::SnowflakeFloatProtect => {
22818 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
22819 // Snowflake's target transform from converting it to DOUBLE.
22820 // Non-Snowflake sources should keep their FLOAT spelling.
22821 if let Expression::DataType(DataType::Float { .. }) = e {
22822 Ok(Expression::DataType(DataType::Custom {
22823 name: "FLOAT".to_string(),
22824 }))
22825 } else {
22826 Ok(e)
22827 }
22828 }
22829
22830 Action::MysqlNullsOrdering => {
22831 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
22832 if let Expression::Ordered(mut o) = e {
22833 let nulls_last = o.nulls_first == Some(false);
22834 let desc = o.desc;
22835 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
22836 // If requested ordering matches default, just strip NULLS clause
22837 let matches_default = if desc {
22838 // DESC default is NULLS FIRST, so nulls_first=true matches
22839 o.nulls_first == Some(true)
22840 } else {
22841 // ASC default is NULLS LAST, so nulls_first=false matches
22842 nulls_last
22843 };
22844 if matches_default {
22845 o.nulls_first = None;
22846 Ok(Expression::Ordered(o))
22847 } else {
22848 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
22849 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
22850 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
22851 let null_val = if desc { 1 } else { 0 };
22852 let non_null_val = if desc { 0 } else { 1 };
22853 let _case_expr = Expression::Case(Box::new(Case {
22854 operand: None,
22855 whens: vec![(
22856 Expression::IsNull(Box::new(crate::expressions::IsNull {
22857 this: o.this.clone(),
22858 not: false,
22859 postfix_form: false,
22860 })),
22861 Expression::number(null_val),
22862 )],
22863 else_: Some(Expression::number(non_null_val)),
22864 comments: Vec::new(),
22865 inferred_type: None,
22866 }));
22867 o.nulls_first = None;
22868 // Return a tuple of [case_expr, ordered_expr]
22869 // We need to return both as part of the ORDER BY
22870 // But since transform_recursive processes individual expressions,
22871 // we can't easily add extra ORDER BY items here.
22872 // Instead, strip the nulls_first
22873 o.nulls_first = None;
22874 Ok(Expression::Ordered(o))
22875 }
22876 } else {
22877 Ok(e)
22878 }
22879 }
22880
22881 Action::MysqlNullsLastRewrite => {
22882 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
22883 // to simulate NULLS LAST for ASC ordering
22884 if let Expression::WindowFunction(mut wf) = e {
22885 let mut new_order_by = Vec::new();
22886 for o in wf.over.order_by {
22887 if !o.desc {
22888 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
22889 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
22890 let case_expr = Expression::Case(Box::new(Case {
22891 operand: None,
22892 whens: vec![(
22893 Expression::IsNull(Box::new(crate::expressions::IsNull {
22894 this: o.this.clone(),
22895 not: false,
22896 postfix_form: false,
22897 })),
22898 Expression::Literal(Box::new(Literal::Number(
22899 "1".to_string(),
22900 ))),
22901 )],
22902 else_: Some(Expression::Literal(Box::new(Literal::Number(
22903 "0".to_string(),
22904 )))),
22905 comments: Vec::new(),
22906 inferred_type: None,
22907 }));
22908 new_order_by.push(crate::expressions::Ordered {
22909 this: case_expr,
22910 desc: false,
22911 nulls_first: None,
22912 explicit_asc: false,
22913 with_fill: None,
22914 });
22915 let mut ordered = o;
22916 ordered.nulls_first = None;
22917 new_order_by.push(ordered);
22918 } else {
22919 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
22920 // No change needed
22921 let mut ordered = o;
22922 ordered.nulls_first = None;
22923 new_order_by.push(ordered);
22924 }
22925 }
22926 wf.over.order_by = new_order_by;
22927 Ok(Expression::WindowFunction(wf))
22928 } else {
22929 Ok(e)
22930 }
22931 }
22932
22933 Action::RespectNullsConvert => {
22934 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
22935 if let Expression::WindowFunction(mut wf) = e {
22936 match &mut wf.this {
22937 Expression::FirstValue(ref mut vf) => {
22938 if vf.ignore_nulls == Some(false) {
22939 vf.ignore_nulls = None;
22940 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
22941 // but that's handled by the generator's NULLS ordering
22942 }
22943 }
22944 Expression::LastValue(ref mut vf) => {
22945 if vf.ignore_nulls == Some(false) {
22946 vf.ignore_nulls = None;
22947 }
22948 }
22949 _ => {}
22950 }
22951 Ok(Expression::WindowFunction(wf))
22952 } else {
22953 Ok(e)
22954 }
22955 }
22956
22957 Action::SnowflakeWindowFrameStrip => {
22958 // Strip the default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
22959 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when targeting Snowflake
22960 if let Expression::WindowFunction(mut wf) = e {
22961 wf.over.frame = None;
22962 Ok(Expression::WindowFunction(wf))
22963 } else {
22964 Ok(e)
22965 }
22966 }
22967
22968 Action::SnowflakeWindowFrameAdd => {
22969 // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
22970 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when transpiling from Snowflake to non-Snowflake
22971 if let Expression::WindowFunction(mut wf) = e {
22972 wf.over.frame = Some(crate::expressions::WindowFrame {
22973 kind: crate::expressions::WindowFrameKind::Rows,
22974 start: crate::expressions::WindowFrameBound::UnboundedPreceding,
22975 end: Some(crate::expressions::WindowFrameBound::UnboundedFollowing),
22976 exclude: None,
22977 kind_text: None,
22978 start_side_text: None,
22979 end_side_text: None,
22980 });
22981 Ok(Expression::WindowFunction(wf))
22982 } else {
22983 Ok(e)
22984 }
22985 }
22986
22987 Action::CreateTableStripComment => {
22988 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
22989 if let Expression::CreateTable(mut ct) = e {
22990 for col in &mut ct.columns {
22991 col.comment = None;
22992 col.constraints.retain(|c| {
22993 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
22994 });
22995 // Also remove Comment from constraint_order
22996 col.constraint_order.retain(|c| {
22997 !matches!(c, crate::expressions::ConstraintType::Comment)
22998 });
22999 }
23000 // Strip properties (USING, PARTITIONED BY, etc.)
23001 ct.properties.clear();
23002 Ok(Expression::CreateTable(ct))
23003 } else {
23004 Ok(e)
23005 }
23006 }
23007
23008 Action::AlterTableToSpRename => {
23009 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
23010 if let Expression::AlterTable(ref at) = e {
23011 if let Some(crate::expressions::AlterTableAction::RenameTable(
23012 ref new_tbl,
23013 )) = at.actions.first()
23014 {
23015 // Build the old table name using TSQL bracket quoting
23016 let old_name = if let Some(ref schema) = at.name.schema {
23017 if at.name.name.quoted || schema.quoted {
23018 format!("[{}].[{}]", schema.name, at.name.name.name)
23019 } else {
23020 format!("{}.{}", schema.name, at.name.name.name)
23021 }
23022 } else {
23023 if at.name.name.quoted {
23024 format!("[{}]", at.name.name.name)
23025 } else {
23026 at.name.name.name.clone()
23027 }
23028 };
23029 let new_name = new_tbl.name.name.clone();
23030 // EXEC sp_rename 'old_name', 'new_name'
23031 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
23032 Ok(Expression::Raw(crate::expressions::Raw { sql }))
23033 } else {
23034 Ok(e)
23035 }
23036 } else {
23037 Ok(e)
23038 }
23039 }
23040
23041 Action::SnowflakeIntervalFormat => {
23042 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
23043 if let Expression::Interval(mut iv) = e {
23044 if let (Some(Expression::Literal(lit)), Some(ref unit_spec)) =
23045 (&iv.this, &iv.unit)
23046 {
23047 if let Literal::String(ref val) = lit.as_ref() {
23048 let unit_str = match unit_spec {
23049 crate::expressions::IntervalUnitSpec::Simple {
23050 unit, ..
23051 } => match unit {
23052 crate::expressions::IntervalUnit::Year => "YEAR",
23053 crate::expressions::IntervalUnit::Quarter => "QUARTER",
23054 crate::expressions::IntervalUnit::Month => "MONTH",
23055 crate::expressions::IntervalUnit::Week => "WEEK",
23056 crate::expressions::IntervalUnit::Day => "DAY",
23057 crate::expressions::IntervalUnit::Hour => "HOUR",
23058 crate::expressions::IntervalUnit::Minute => "MINUTE",
23059 crate::expressions::IntervalUnit::Second => "SECOND",
23060 crate::expressions::IntervalUnit::Millisecond => {
23061 "MILLISECOND"
23062 }
23063 crate::expressions::IntervalUnit::Microsecond => {
23064 "MICROSECOND"
23065 }
23066 crate::expressions::IntervalUnit::Nanosecond => {
23067 "NANOSECOND"
23068 }
23069 },
23070 _ => "",
23071 };
23072 if !unit_str.is_empty() {
23073 let combined = format!("{} {}", val, unit_str);
23074 iv.this = Some(Expression::Literal(Box::new(Literal::String(
23075 combined,
23076 ))));
23077 iv.unit = None;
23078 }
23079 }
23080 }
23081 Ok(Expression::Interval(iv))
23082 } else {
23083 Ok(e)
23084 }
23085 }
23086
23087 Action::ArrayConcatBracketConvert => {
23088 // Expression::Array/ArrayFunc -> target-specific
23089 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
23090 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
23091 match e {
23092 Expression::Array(arr) => {
23093 if matches!(target, DialectType::Redshift) {
23094 Ok(Expression::Function(Box::new(Function::new(
23095 "ARRAY".to_string(),
23096 arr.expressions,
23097 ))))
23098 } else {
23099 Ok(Expression::ArrayFunc(Box::new(
23100 crate::expressions::ArrayConstructor {
23101 expressions: arr.expressions,
23102 bracket_notation: false,
23103 use_list_keyword: false,
23104 },
23105 )))
23106 }
23107 }
23108 Expression::ArrayFunc(arr) => {
23109 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
23110 if matches!(target, DialectType::Redshift) {
23111 Ok(Expression::Function(Box::new(Function::new(
23112 "ARRAY".to_string(),
23113 arr.expressions,
23114 ))))
23115 } else {
23116 Ok(Expression::ArrayFunc(arr))
23117 }
23118 }
23119 _ => Ok(e),
23120 }
23121 }
23122
23123 Action::BitAggFloatCast => {
23124 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
23125 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
23126 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
23127 let int_type = DataType::Int {
23128 length: None,
23129 integer_spelling: false,
23130 };
23131 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
23132 if let Expression::Cast(c) = agg_this {
23133 match &c.to {
23134 DataType::Float { .. }
23135 | DataType::Double { .. }
23136 | DataType::Custom { .. } => {
23137 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
23138 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
23139 let inner_type = match &c.to {
23140 DataType::Float {
23141 precision, scale, ..
23142 } => DataType::Float {
23143 precision: *precision,
23144 scale: *scale,
23145 real_spelling: true,
23146 },
23147 other => other.clone(),
23148 };
23149 let inner_cast =
23150 Expression::Cast(Box::new(crate::expressions::Cast {
23151 this: c.this.clone(),
23152 to: inner_type,
23153 trailing_comments: Vec::new(),
23154 double_colon_syntax: false,
23155 format: None,
23156 default: None,
23157 inferred_type: None,
23158 }));
23159 let rounded = Expression::Function(Box::new(Function::new(
23160 "ROUND".to_string(),
23161 vec![inner_cast],
23162 )));
23163 Expression::Cast(Box::new(crate::expressions::Cast {
23164 this: rounded,
23165 to: int_dt,
23166 trailing_comments: Vec::new(),
23167 double_colon_syntax: false,
23168 format: None,
23169 default: None,
23170 inferred_type: None,
23171 }))
23172 }
23173 DataType::Decimal { .. } => {
23174 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
23175 Expression::Cast(Box::new(crate::expressions::Cast {
23176 this: Expression::Cast(c),
23177 to: int_dt,
23178 trailing_comments: Vec::new(),
23179 double_colon_syntax: false,
23180 format: None,
23181 default: None,
23182 inferred_type: None,
23183 }))
23184 }
23185 _ => Expression::Cast(c),
23186 }
23187 } else {
23188 agg_this
23189 }
23190 };
23191 match e {
23192 Expression::BitwiseOrAgg(mut f) => {
23193 f.this = wrap_agg(f.this, int_type);
23194 Ok(Expression::BitwiseOrAgg(f))
23195 }
23196 Expression::BitwiseAndAgg(mut f) => {
23197 let int_type = DataType::Int {
23198 length: None,
23199 integer_spelling: false,
23200 };
23201 f.this = wrap_agg(f.this, int_type);
23202 Ok(Expression::BitwiseAndAgg(f))
23203 }
23204 Expression::BitwiseXorAgg(mut f) => {
23205 let int_type = DataType::Int {
23206 length: None,
23207 integer_spelling: false,
23208 };
23209 f.this = wrap_agg(f.this, int_type);
23210 Ok(Expression::BitwiseXorAgg(f))
23211 }
23212 _ => Ok(e),
23213 }
23214 }
23215
23216 Action::BitAggSnowflakeRename => {
23217 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
23218 match e {
23219 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
23220 Function::new("BITORAGG".to_string(), vec![f.this]),
23221 ))),
23222 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
23223 Function::new("BITANDAGG".to_string(), vec![f.this]),
23224 ))),
23225 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
23226 Function::new("BITXORAGG".to_string(), vec![f.this]),
23227 ))),
23228 _ => Ok(e),
23229 }
23230 }
23231
23232 Action::StrftimeCastTimestamp => {
23233 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
23234 if let Expression::Cast(mut c) = e {
23235 if matches!(
23236 c.to,
23237 DataType::Timestamp {
23238 timezone: false,
23239 ..
23240 }
23241 ) {
23242 c.to = DataType::Custom {
23243 name: "TIMESTAMP_NTZ".to_string(),
23244 };
23245 }
23246 Ok(Expression::Cast(c))
23247 } else {
23248 Ok(e)
23249 }
23250 }
23251
23252 Action::DecimalDefaultPrecision => {
23253 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
23254 if let Expression::Cast(mut c) = e {
23255 if matches!(
23256 c.to,
23257 DataType::Decimal {
23258 precision: None,
23259 ..
23260 }
23261 ) {
23262 c.to = DataType::Decimal {
23263 precision: Some(18),
23264 scale: Some(3),
23265 };
23266 }
23267 Ok(Expression::Cast(c))
23268 } else {
23269 Ok(e)
23270 }
23271 }
23272
23273 Action::FilterToIff => {
23274 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
23275 if let Expression::Filter(f) = e {
23276 let condition = *f.expression;
23277 let agg = *f.this;
23278 // Strip WHERE from condition
23279 let cond = match condition {
23280 Expression::Where(w) => w.this,
23281 other => other,
23282 };
23283 // Extract the aggregate function and its argument
23284 // We want AVG(IFF(condition, x, NULL))
23285 match agg {
23286 Expression::Function(mut func) => {
23287 if !func.args.is_empty() {
23288 let orig_arg = func.args[0].clone();
23289 let iff_call = Expression::Function(Box::new(Function::new(
23290 "IFF".to_string(),
23291 vec![cond, orig_arg, Expression::Null(Null)],
23292 )));
23293 func.args[0] = iff_call;
23294 Ok(Expression::Function(func))
23295 } else {
23296 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
23297 this: Box::new(Expression::Function(func)),
23298 expression: Box::new(cond),
23299 })))
23300 }
23301 }
23302 Expression::Avg(mut avg) => {
23303 let iff_call = Expression::Function(Box::new(Function::new(
23304 "IFF".to_string(),
23305 vec![cond, avg.this.clone(), Expression::Null(Null)],
23306 )));
23307 avg.this = iff_call;
23308 Ok(Expression::Avg(avg))
23309 }
23310 Expression::Sum(mut s) => {
23311 let iff_call = Expression::Function(Box::new(Function::new(
23312 "IFF".to_string(),
23313 vec![cond, s.this.clone(), Expression::Null(Null)],
23314 )));
23315 s.this = iff_call;
23316 Ok(Expression::Sum(s))
23317 }
23318 Expression::Count(mut c) => {
23319 if let Some(ref this_expr) = c.this {
23320 let iff_call = Expression::Function(Box::new(Function::new(
23321 "IFF".to_string(),
23322 vec![cond, this_expr.clone(), Expression::Null(Null)],
23323 )));
23324 c.this = Some(iff_call);
23325 }
23326 Ok(Expression::Count(c))
23327 }
23328 other => {
23329 // Fallback: keep as Filter
23330 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
23331 this: Box::new(other),
23332 expression: Box::new(cond),
23333 })))
23334 }
23335 }
23336 } else {
23337 Ok(e)
23338 }
23339 }
23340
23341 Action::AggFilterToIff => {
23342 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
23343 // Helper macro to handle the common AggFunc case
23344 macro_rules! handle_agg_filter_to_iff {
23345 ($variant:ident, $agg:expr) => {{
23346 let mut agg = $agg;
23347 if let Some(filter_cond) = agg.filter.take() {
23348 let iff_call = Expression::Function(Box::new(Function::new(
23349 "IFF".to_string(),
23350 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
23351 )));
23352 agg.this = iff_call;
23353 }
23354 Ok(Expression::$variant(agg))
23355 }};
23356 }
23357
23358 match e {
23359 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
23360 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
23361 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
23362 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
23363 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
23364 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
23365 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
23366 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
23367 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
23368 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
23369 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
23370 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
23371 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
23372 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
23373 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
23374 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
23375 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
23376 Expression::ApproxDistinct(agg) => {
23377 handle_agg_filter_to_iff!(ApproxDistinct, agg)
23378 }
23379 Expression::Count(mut c) => {
23380 if let Some(filter_cond) = c.filter.take() {
23381 if let Some(ref this_expr) = c.this {
23382 let iff_call = Expression::Function(Box::new(Function::new(
23383 "IFF".to_string(),
23384 vec![
23385 filter_cond,
23386 this_expr.clone(),
23387 Expression::Null(Null),
23388 ],
23389 )));
23390 c.this = Some(iff_call);
23391 }
23392 }
23393 Ok(Expression::Count(c))
23394 }
23395 other => Ok(other),
23396 }
23397 }
23398
23399 Action::JsonToGetPath => {
23400 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
23401 if let Expression::JsonExtract(je) = e {
23402 // Convert to PARSE_JSON() wrapper:
23403 // - JSON(x) -> PARSE_JSON(x)
23404 // - PARSE_JSON(x) -> keep as-is
23405 // - anything else -> wrap in PARSE_JSON()
23406 let this = match &je.this {
23407 Expression::Function(f)
23408 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
23409 {
23410 Expression::Function(Box::new(Function::new(
23411 "PARSE_JSON".to_string(),
23412 f.args.clone(),
23413 )))
23414 }
23415 Expression::Function(f)
23416 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
23417 {
23418 je.this.clone()
23419 }
23420 // GET_PATH result is already JSON, don't wrap
23421 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
23422 je.this.clone()
23423 }
23424 other => {
23425 // Wrap non-JSON expressions in PARSE_JSON()
23426 Expression::Function(Box::new(Function::new(
23427 "PARSE_JSON".to_string(),
23428 vec![other.clone()],
23429 )))
23430 }
23431 };
23432 // Convert path: extract key from JSONPath or strip $. prefix from string
23433 let path = match &je.path {
23434 Expression::JSONPath(jp) => {
23435 // Extract the key from JSONPath: $root.key -> 'key'
23436 let mut key_parts = Vec::new();
23437 for expr in &jp.expressions {
23438 match expr {
23439 Expression::JSONPathRoot(_) => {} // skip root
23440 Expression::JSONPathKey(k) => {
23441 if let Expression::Literal(lit) = &*k.this {
23442 if let Literal::String(s) = lit.as_ref() {
23443 key_parts.push(s.clone());
23444 }
23445 }
23446 }
23447 _ => {}
23448 }
23449 }
23450 if !key_parts.is_empty() {
23451 Expression::Literal(Box::new(Literal::String(
23452 key_parts.join("."),
23453 )))
23454 } else {
23455 je.path.clone()
23456 }
23457 }
23458 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with("$.")) =>
23459 {
23460 let Literal::String(s) = lit.as_ref() else {
23461 unreachable!()
23462 };
23463 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
23464 Expression::Literal(Box::new(Literal::String(stripped)))
23465 }
23466 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$')) =>
23467 {
23468 let Literal::String(s) = lit.as_ref() else {
23469 unreachable!()
23470 };
23471 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
23472 Expression::Literal(Box::new(Literal::String(stripped)))
23473 }
23474 _ => je.path.clone(),
23475 };
23476 Ok(Expression::Function(Box::new(Function::new(
23477 "GET_PATH".to_string(),
23478 vec![this, path],
23479 ))))
23480 } else {
23481 Ok(e)
23482 }
23483 }
23484
23485 Action::StructToRow => {
23486 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
23487 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
23488
23489 // Extract key-value pairs from either Struct or MapFunc
23490 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
23491 Expression::Struct(s) => Some(
23492 s.fields
23493 .iter()
23494 .map(|(opt_name, field_expr)| {
23495 if let Some(name) = opt_name {
23496 (name.clone(), field_expr.clone())
23497 } else if let Expression::NamedArgument(na) = field_expr {
23498 (na.name.name.clone(), na.value.clone())
23499 } else {
23500 (String::new(), field_expr.clone())
23501 }
23502 })
23503 .collect(),
23504 ),
23505 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
23506 m.keys
23507 .iter()
23508 .zip(m.values.iter())
23509 .map(|(key, value)| {
23510 let key_name = match key {
23511 Expression::Literal(lit)
23512 if matches!(lit.as_ref(), Literal::String(_)) =>
23513 {
23514 let Literal::String(s) = lit.as_ref() else {
23515 unreachable!()
23516 };
23517 s.clone()
23518 }
23519 Expression::Identifier(id) => id.name.clone(),
23520 _ => String::new(),
23521 };
23522 (key_name, value.clone())
23523 })
23524 .collect(),
23525 ),
23526 _ => None,
23527 };
23528
23529 if let Some(pairs) = kv_pairs {
23530 let mut named_args = Vec::new();
23531 for (key_name, value) in pairs {
23532 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
23533 named_args.push(Expression::Alias(Box::new(
23534 crate::expressions::Alias::new(
23535 value,
23536 Identifier::new(key_name),
23537 ),
23538 )));
23539 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
23540 named_args.push(value);
23541 } else {
23542 named_args.push(value);
23543 }
23544 }
23545
23546 if matches!(target, DialectType::BigQuery) {
23547 Ok(Expression::Function(Box::new(Function::new(
23548 "STRUCT".to_string(),
23549 named_args,
23550 ))))
23551 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
23552 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
23553 let row_func = Expression::Function(Box::new(Function::new(
23554 "ROW".to_string(),
23555 named_args,
23556 )));
23557
23558 // Try to infer types for each pair
23559 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
23560 Expression::Struct(s) => Some(
23561 s.fields
23562 .iter()
23563 .map(|(opt_name, field_expr)| {
23564 if let Some(name) = opt_name {
23565 (name.clone(), field_expr.clone())
23566 } else if let Expression::NamedArgument(na) = field_expr
23567 {
23568 (na.name.name.clone(), na.value.clone())
23569 } else {
23570 (String::new(), field_expr.clone())
23571 }
23572 })
23573 .collect(),
23574 ),
23575 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
23576 m.keys
23577 .iter()
23578 .zip(m.values.iter())
23579 .map(|(key, value)| {
23580 let key_name = match key {
23581 Expression::Literal(lit)
23582 if matches!(
23583 lit.as_ref(),
23584 Literal::String(_)
23585 ) =>
23586 {
23587 let Literal::String(s) = lit.as_ref() else {
23588 unreachable!()
23589 };
23590 s.clone()
23591 }
23592 Expression::Identifier(id) => id.name.clone(),
23593 _ => String::new(),
23594 };
23595 (key_name, value.clone())
23596 })
23597 .collect(),
23598 ),
23599 _ => None,
23600 };
23601
23602 if let Some(pairs) = kv_pairs_again {
23603 // Infer types for all values
23604 let mut all_inferred = true;
23605 let mut fields = Vec::new();
23606 for (name, value) in &pairs {
23607 let inferred_type = match value {
23608 Expression::Literal(lit)
23609 if matches!(lit.as_ref(), Literal::Number(_)) =>
23610 {
23611 let Literal::Number(n) = lit.as_ref() else {
23612 unreachable!()
23613 };
23614 if n.contains('.') {
23615 Some(DataType::Double {
23616 precision: None,
23617 scale: None,
23618 })
23619 } else {
23620 Some(DataType::Int {
23621 length: None,
23622 integer_spelling: true,
23623 })
23624 }
23625 }
23626 Expression::Literal(lit)
23627 if matches!(lit.as_ref(), Literal::String(_)) =>
23628 {
23629 Some(DataType::VarChar {
23630 length: None,
23631 parenthesized_length: false,
23632 })
23633 }
23634 Expression::Boolean(_) => Some(DataType::Boolean),
23635 _ => None,
23636 };
23637 if let Some(dt) = inferred_type {
23638 fields.push(crate::expressions::StructField::new(
23639 name.clone(),
23640 dt,
23641 ));
23642 } else {
23643 all_inferred = false;
23644 break;
23645 }
23646 }
23647
23648 if all_inferred && !fields.is_empty() {
23649 let row_type = DataType::Struct {
23650 fields,
23651 nested: true,
23652 };
23653 Ok(Expression::Cast(Box::new(Cast {
23654 this: row_func,
23655 to: row_type,
23656 trailing_comments: Vec::new(),
23657 double_colon_syntax: false,
23658 format: None,
23659 default: None,
23660 inferred_type: None,
23661 })))
23662 } else {
23663 Ok(row_func)
23664 }
23665 } else {
23666 Ok(row_func)
23667 }
23668 } else {
23669 Ok(Expression::Function(Box::new(Function::new(
23670 "ROW".to_string(),
23671 named_args,
23672 ))))
23673 }
23674 } else {
23675 Ok(e)
23676 }
23677 }
23678
23679 Action::SparkStructConvert => {
23680 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
23681 // or DuckDB {'name': val, ...}
23682 if let Expression::Function(f) = e {
23683 // Extract name-value pairs from aliased args
23684 let mut pairs: Vec<(String, Expression)> = Vec::new();
23685 for arg in &f.args {
23686 match arg {
23687 Expression::Alias(a) => {
23688 pairs.push((a.alias.name.clone(), a.this.clone()));
23689 }
23690 _ => {
23691 pairs.push((String::new(), arg.clone()));
23692 }
23693 }
23694 }
23695
23696 match target {
23697 DialectType::DuckDB => {
23698 // Convert to DuckDB struct literal {'name': value, ...}
23699 let mut keys = Vec::new();
23700 let mut values = Vec::new();
23701 for (name, value) in &pairs {
23702 keys.push(Expression::Literal(Box::new(Literal::String(
23703 name.clone(),
23704 ))));
23705 values.push(value.clone());
23706 }
23707 Ok(Expression::MapFunc(Box::new(
23708 crate::expressions::MapConstructor {
23709 keys,
23710 values,
23711 curly_brace_syntax: true,
23712 with_map_keyword: false,
23713 },
23714 )))
23715 }
23716 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23717 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
23718 let row_args: Vec<Expression> =
23719 pairs.iter().map(|(_, v)| v.clone()).collect();
23720 let row_func = Expression::Function(Box::new(Function::new(
23721 "ROW".to_string(),
23722 row_args,
23723 )));
23724
23725 // Infer types
23726 let mut all_inferred = true;
23727 let mut fields = Vec::new();
23728 for (name, value) in &pairs {
23729 let inferred_type = match value {
23730 Expression::Literal(lit)
23731 if matches!(lit.as_ref(), Literal::Number(_)) =>
23732 {
23733 let Literal::Number(n) = lit.as_ref() else {
23734 unreachable!()
23735 };
23736 if n.contains('.') {
23737 Some(DataType::Double {
23738 precision: None,
23739 scale: None,
23740 })
23741 } else {
23742 Some(DataType::Int {
23743 length: None,
23744 integer_spelling: true,
23745 })
23746 }
23747 }
23748 Expression::Literal(lit)
23749 if matches!(lit.as_ref(), Literal::String(_)) =>
23750 {
23751 Some(DataType::VarChar {
23752 length: None,
23753 parenthesized_length: false,
23754 })
23755 }
23756 Expression::Boolean(_) => Some(DataType::Boolean),
23757 _ => None,
23758 };
23759 if let Some(dt) = inferred_type {
23760 fields.push(crate::expressions::StructField::new(
23761 name.clone(),
23762 dt,
23763 ));
23764 } else {
23765 all_inferred = false;
23766 break;
23767 }
23768 }
23769
23770 if all_inferred && !fields.is_empty() {
23771 let row_type = DataType::Struct {
23772 fields,
23773 nested: true,
23774 };
23775 Ok(Expression::Cast(Box::new(Cast {
23776 this: row_func,
23777 to: row_type,
23778 trailing_comments: Vec::new(),
23779 double_colon_syntax: false,
23780 format: None,
23781 default: None,
23782 inferred_type: None,
23783 })))
23784 } else {
23785 Ok(row_func)
23786 }
23787 }
23788 _ => Ok(Expression::Function(f)),
23789 }
23790 } else {
23791 Ok(e)
23792 }
23793 }
23794
23795 Action::ApproxCountDistinctToApproxDistinct => {
23796 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
23797 if let Expression::ApproxCountDistinct(f) = e {
23798 Ok(Expression::ApproxDistinct(f))
23799 } else {
23800 Ok(e)
23801 }
23802 }
23803
23804 Action::CollectListToArrayAgg => {
23805 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
23806 if let Expression::AggregateFunction(f) = e {
23807 let filter_expr = if !f.args.is_empty() {
23808 let arg = f.args[0].clone();
23809 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
23810 this: arg,
23811 not: true,
23812 postfix_form: false,
23813 })))
23814 } else {
23815 None
23816 };
23817 let agg = crate::expressions::AggFunc {
23818 this: if f.args.is_empty() {
23819 Expression::Null(crate::expressions::Null)
23820 } else {
23821 f.args[0].clone()
23822 },
23823 distinct: f.distinct,
23824 order_by: f.order_by.clone(),
23825 filter: filter_expr,
23826 ignore_nulls: None,
23827 name: None,
23828 having_max: None,
23829 limit: None,
23830 inferred_type: None,
23831 };
23832 Ok(Expression::ArrayAgg(Box::new(agg)))
23833 } else {
23834 Ok(e)
23835 }
23836 }
23837
23838 Action::CollectSetConvert => {
23839 // COLLECT_SET(x) -> target-specific
23840 if let Expression::AggregateFunction(f) = e {
23841 match target {
23842 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
23843 crate::expressions::AggregateFunction {
23844 name: "SET_AGG".to_string(),
23845 args: f.args,
23846 distinct: false,
23847 order_by: f.order_by,
23848 filter: f.filter,
23849 limit: f.limit,
23850 ignore_nulls: f.ignore_nulls,
23851 inferred_type: None,
23852 },
23853 ))),
23854 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
23855 crate::expressions::AggregateFunction {
23856 name: "ARRAY_UNIQUE_AGG".to_string(),
23857 args: f.args,
23858 distinct: false,
23859 order_by: f.order_by,
23860 filter: f.filter,
23861 limit: f.limit,
23862 ignore_nulls: f.ignore_nulls,
23863 inferred_type: None,
23864 },
23865 ))),
23866 DialectType::Trino | DialectType::DuckDB => {
23867 let agg = crate::expressions::AggFunc {
23868 this: if f.args.is_empty() {
23869 Expression::Null(crate::expressions::Null)
23870 } else {
23871 f.args[0].clone()
23872 },
23873 distinct: true,
23874 order_by: Vec::new(),
23875 filter: None,
23876 ignore_nulls: None,
23877 name: None,
23878 having_max: None,
23879 limit: None,
23880 inferred_type: None,
23881 };
23882 Ok(Expression::ArrayAgg(Box::new(agg)))
23883 }
23884 _ => Ok(Expression::AggregateFunction(f)),
23885 }
23886 } else {
23887 Ok(e)
23888 }
23889 }
23890
23891 Action::PercentileConvert => {
23892 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
23893 if let Expression::AggregateFunction(f) = e {
23894 let name = match target {
23895 DialectType::DuckDB => "QUANTILE",
23896 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
23897 _ => "PERCENTILE",
23898 };
23899 Ok(Expression::AggregateFunction(Box::new(
23900 crate::expressions::AggregateFunction {
23901 name: name.to_string(),
23902 args: f.args,
23903 distinct: f.distinct,
23904 order_by: f.order_by,
23905 filter: f.filter,
23906 limit: f.limit,
23907 ignore_nulls: f.ignore_nulls,
23908 inferred_type: None,
23909 },
23910 )))
23911 } else {
23912 Ok(e)
23913 }
23914 }
23915
23916 Action::CorrIsnanWrap => {
23917 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
23918 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
23919 let corr_clone = e.clone();
23920 let isnan = Expression::Function(Box::new(Function::new(
23921 "ISNAN".to_string(),
23922 vec![corr_clone.clone()],
23923 )));
23924 let case_expr = Expression::Case(Box::new(Case {
23925 operand: None,
23926 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
23927 else_: Some(corr_clone),
23928 comments: Vec::new(),
23929 inferred_type: None,
23930 }));
23931 Ok(case_expr)
23932 }
23933
23934 Action::TruncToDateTrunc => {
23935 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
23936 if let Expression::Function(f) = e {
23937 if f.args.len() == 2 {
23938 let timestamp = f.args[0].clone();
23939 let unit_expr = f.args[1].clone();
23940
23941 if matches!(target, DialectType::ClickHouse) {
23942 // For ClickHouse, produce Expression::DateTrunc which the generator
23943 // outputs as DATE_TRUNC(...) without going through the ClickHouse
23944 // target transform that would convert it to dateTrunc
23945 let unit_str = Self::get_unit_str_static(&unit_expr);
23946 let dt_field = match unit_str.as_str() {
23947 "YEAR" => DateTimeField::Year,
23948 "MONTH" => DateTimeField::Month,
23949 "DAY" => DateTimeField::Day,
23950 "HOUR" => DateTimeField::Hour,
23951 "MINUTE" => DateTimeField::Minute,
23952 "SECOND" => DateTimeField::Second,
23953 "WEEK" => DateTimeField::Week,
23954 "QUARTER" => DateTimeField::Quarter,
23955 _ => DateTimeField::Custom(unit_str),
23956 };
23957 Ok(Expression::DateTrunc(Box::new(
23958 crate::expressions::DateTruncFunc {
23959 this: timestamp,
23960 unit: dt_field,
23961 },
23962 )))
23963 } else {
23964 let new_args = vec![unit_expr, timestamp];
23965 Ok(Expression::Function(Box::new(Function::new(
23966 "DATE_TRUNC".to_string(),
23967 new_args,
23968 ))))
23969 }
23970 } else {
23971 Ok(Expression::Function(f))
23972 }
23973 } else {
23974 Ok(e)
23975 }
23976 }
23977
23978 Action::ArrayContainsConvert => {
23979 if let Expression::ArrayContains(f) = e {
23980 match target {
23981 DialectType::Presto | DialectType::Trino => {
23982 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
23983 Ok(Expression::Function(Box::new(Function::new(
23984 "CONTAINS".to_string(),
23985 vec![f.this, f.expression],
23986 ))))
23987 }
23988 DialectType::Snowflake => {
23989 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
23990 let cast_val =
23991 Expression::Cast(Box::new(crate::expressions::Cast {
23992 this: f.expression,
23993 to: crate::expressions::DataType::Custom {
23994 name: "VARIANT".to_string(),
23995 },
23996 trailing_comments: Vec::new(),
23997 double_colon_syntax: false,
23998 format: None,
23999 default: None,
24000 inferred_type: None,
24001 }));
24002 Ok(Expression::Function(Box::new(Function::new(
24003 "ARRAY_CONTAINS".to_string(),
24004 vec![cast_val, f.this],
24005 ))))
24006 }
24007 _ => Ok(Expression::ArrayContains(f)),
24008 }
24009 } else {
24010 Ok(e)
24011 }
24012 }
24013
24014 Action::ArrayExceptConvert => {
24015 if let Expression::ArrayExcept(f) = e {
24016 let source_arr = f.this;
24017 let exclude_arr = f.expression;
24018 match target {
24019 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
24020 // Snowflake ARRAY_EXCEPT -> DuckDB bag semantics:
24021 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
24022 // ELSE LIST_TRANSFORM(LIST_FILTER(
24023 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source))),
24024 // pair -> (LENGTH(LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1]))
24025 // > LENGTH(LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])))),
24026 // pair -> pair[1])
24027 // END
24028
24029 // Build null check
24030 let source_is_null =
24031 Expression::IsNull(Box::new(crate::expressions::IsNull {
24032 this: source_arr.clone(),
24033 not: false,
24034 postfix_form: false,
24035 }));
24036 let exclude_is_null =
24037 Expression::IsNull(Box::new(crate::expressions::IsNull {
24038 this: exclude_arr.clone(),
24039 not: false,
24040 postfix_form: false,
24041 }));
24042 let null_check =
24043 Expression::Or(Box::new(crate::expressions::BinaryOp {
24044 left: source_is_null,
24045 right: exclude_is_null,
24046 left_comments: vec![],
24047 operator_comments: vec![],
24048 trailing_comments: vec![],
24049 inferred_type: None,
24050 }));
24051
24052 // GENERATE_SERIES(1, LENGTH(source))
24053 let gen_series = Expression::Function(Box::new(Function::new(
24054 "GENERATE_SERIES".to_string(),
24055 vec![
24056 Expression::number(1),
24057 Expression::Function(Box::new(Function::new(
24058 "LENGTH".to_string(),
24059 vec![source_arr.clone()],
24060 ))),
24061 ],
24062 )));
24063
24064 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source)))
24065 let list_zip = Expression::Function(Box::new(Function::new(
24066 "LIST_ZIP".to_string(),
24067 vec![source_arr.clone(), gen_series],
24068 )));
24069
24070 // pair[1] and pair[2]
24071 let pair_col = Expression::column("pair");
24072 let pair_1 = Expression::Subscript(Box::new(
24073 crate::expressions::Subscript {
24074 this: pair_col.clone(),
24075 index: Expression::number(1),
24076 },
24077 ));
24078 let pair_2 = Expression::Subscript(Box::new(
24079 crate::expressions::Subscript {
24080 this: pair_col.clone(),
24081 index: Expression::number(2),
24082 },
24083 ));
24084
24085 // source[1:pair[2]]
24086 let source_slice = Expression::ArraySlice(Box::new(
24087 crate::expressions::ArraySlice {
24088 this: source_arr.clone(),
24089 start: Some(Expression::number(1)),
24090 end: Some(pair_2),
24091 },
24092 ));
24093
24094 let e_col = Expression::column("e");
24095
24096 // e -> e IS NOT DISTINCT FROM pair[1]
24097 let inner_lambda1 =
24098 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24099 parameters: vec![crate::expressions::Identifier::new("e")],
24100 body: Expression::NullSafeEq(Box::new(
24101 crate::expressions::BinaryOp {
24102 left: e_col.clone(),
24103 right: pair_1.clone(),
24104 left_comments: vec![],
24105 operator_comments: vec![],
24106 trailing_comments: vec![],
24107 inferred_type: None,
24108 },
24109 )),
24110 colon: false,
24111 parameter_types: vec![],
24112 }));
24113
24114 // LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
24115 let inner_filter1 = Expression::Function(Box::new(Function::new(
24116 "LIST_FILTER".to_string(),
24117 vec![source_slice, inner_lambda1],
24118 )));
24119
24120 // LENGTH(LIST_FILTER(source[1:pair[2]], ...))
24121 let len1 = Expression::Function(Box::new(Function::new(
24122 "LENGTH".to_string(),
24123 vec![inner_filter1],
24124 )));
24125
24126 // e -> e IS NOT DISTINCT FROM pair[1]
24127 let inner_lambda2 =
24128 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24129 parameters: vec![crate::expressions::Identifier::new("e")],
24130 body: Expression::NullSafeEq(Box::new(
24131 crate::expressions::BinaryOp {
24132 left: e_col,
24133 right: pair_1.clone(),
24134 left_comments: vec![],
24135 operator_comments: vec![],
24136 trailing_comments: vec![],
24137 inferred_type: None,
24138 },
24139 )),
24140 colon: false,
24141 parameter_types: vec![],
24142 }));
24143
24144 // LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])
24145 let inner_filter2 = Expression::Function(Box::new(Function::new(
24146 "LIST_FILTER".to_string(),
24147 vec![exclude_arr.clone(), inner_lambda2],
24148 )));
24149
24150 // LENGTH(LIST_FILTER(exclude, ...))
24151 let len2 = Expression::Function(Box::new(Function::new(
24152 "LENGTH".to_string(),
24153 vec![inner_filter2],
24154 )));
24155
24156 // (LENGTH(...) > LENGTH(...))
24157 let cond = Expression::Paren(Box::new(Paren {
24158 this: Expression::Gt(Box::new(crate::expressions::BinaryOp {
24159 left: len1,
24160 right: len2,
24161 left_comments: vec![],
24162 operator_comments: vec![],
24163 trailing_comments: vec![],
24164 inferred_type: None,
24165 })),
24166 trailing_comments: vec![],
24167 }));
24168
24169 // pair -> (condition)
24170 let filter_lambda =
24171 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24172 parameters: vec![crate::expressions::Identifier::new(
24173 "pair",
24174 )],
24175 body: cond,
24176 colon: false,
24177 parameter_types: vec![],
24178 }));
24179
24180 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
24181 let outer_filter = Expression::Function(Box::new(Function::new(
24182 "LIST_FILTER".to_string(),
24183 vec![list_zip, filter_lambda],
24184 )));
24185
24186 // pair -> pair[1]
24187 let transform_lambda =
24188 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24189 parameters: vec![crate::expressions::Identifier::new(
24190 "pair",
24191 )],
24192 body: pair_1,
24193 colon: false,
24194 parameter_types: vec![],
24195 }));
24196
24197 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
24198 let list_transform = Expression::Function(Box::new(Function::new(
24199 "LIST_TRANSFORM".to_string(),
24200 vec![outer_filter, transform_lambda],
24201 )));
24202
24203 Ok(Expression::Case(Box::new(Case {
24204 operand: None,
24205 whens: vec![(null_check, Expression::Null(Null))],
24206 else_: Some(list_transform),
24207 comments: Vec::new(),
24208 inferred_type: None,
24209 })))
24210 }
24211 DialectType::DuckDB => {
24212 // ARRAY_EXCEPT(source, exclude) -> set semantics for DuckDB:
24213 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
24214 // ELSE LIST_FILTER(LIST_DISTINCT(source),
24215 // e -> LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)) = 0)
24216 // END
24217
24218 // Build: source IS NULL
24219 let source_is_null =
24220 Expression::IsNull(Box::new(crate::expressions::IsNull {
24221 this: source_arr.clone(),
24222 not: false,
24223 postfix_form: false,
24224 }));
24225 // Build: exclude IS NULL
24226 let exclude_is_null =
24227 Expression::IsNull(Box::new(crate::expressions::IsNull {
24228 this: exclude_arr.clone(),
24229 not: false,
24230 postfix_form: false,
24231 }));
24232 // source IS NULL OR exclude IS NULL
24233 let null_check =
24234 Expression::Or(Box::new(crate::expressions::BinaryOp {
24235 left: source_is_null,
24236 right: exclude_is_null,
24237 left_comments: vec![],
24238 operator_comments: vec![],
24239 trailing_comments: vec![],
24240 inferred_type: None,
24241 }));
24242
24243 // LIST_DISTINCT(source)
24244 let list_distinct = Expression::Function(Box::new(Function::new(
24245 "LIST_DISTINCT".to_string(),
24246 vec![source_arr.clone()],
24247 )));
24248
24249 // x IS NOT DISTINCT FROM e
24250 let x_col = Expression::column("x");
24251 let e_col = Expression::column("e");
24252 let is_not_distinct = Expression::NullSafeEq(Box::new(
24253 crate::expressions::BinaryOp {
24254 left: x_col,
24255 right: e_col.clone(),
24256 left_comments: vec![],
24257 operator_comments: vec![],
24258 trailing_comments: vec![],
24259 inferred_type: None,
24260 },
24261 ));
24262
24263 // x -> x IS NOT DISTINCT FROM e
24264 let inner_lambda =
24265 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24266 parameters: vec![crate::expressions::Identifier::new("x")],
24267 body: is_not_distinct,
24268 colon: false,
24269 parameter_types: vec![],
24270 }));
24271
24272 // LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)
24273 let inner_list_filter =
24274 Expression::Function(Box::new(Function::new(
24275 "LIST_FILTER".to_string(),
24276 vec![exclude_arr.clone(), inner_lambda],
24277 )));
24278
24279 // LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e))
24280 let len_inner = Expression::Function(Box::new(Function::new(
24281 "LENGTH".to_string(),
24282 vec![inner_list_filter],
24283 )));
24284
24285 // LENGTH(...) = 0
24286 let eq_zero =
24287 Expression::Eq(Box::new(crate::expressions::BinaryOp {
24288 left: len_inner,
24289 right: Expression::number(0),
24290 left_comments: vec![],
24291 operator_comments: vec![],
24292 trailing_comments: vec![],
24293 inferred_type: None,
24294 }));
24295
24296 // e -> LENGTH(LIST_FILTER(...)) = 0
24297 let outer_lambda =
24298 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24299 parameters: vec![crate::expressions::Identifier::new("e")],
24300 body: eq_zero,
24301 colon: false,
24302 parameter_types: vec![],
24303 }));
24304
24305 // LIST_FILTER(LIST_DISTINCT(source), e -> ...)
24306 let outer_list_filter =
24307 Expression::Function(Box::new(Function::new(
24308 "LIST_FILTER".to_string(),
24309 vec![list_distinct, outer_lambda],
24310 )));
24311
24312 // CASE WHEN ... IS NULL ... THEN NULL ELSE LIST_FILTER(...) END
24313 Ok(Expression::Case(Box::new(Case {
24314 operand: None,
24315 whens: vec![(null_check, Expression::Null(Null))],
24316 else_: Some(outer_list_filter),
24317 comments: Vec::new(),
24318 inferred_type: None,
24319 })))
24320 }
24321 DialectType::Snowflake => {
24322 // Snowflake: ARRAY_EXCEPT(source, exclude) - keep as-is
24323 Ok(Expression::ArrayExcept(Box::new(
24324 crate::expressions::BinaryFunc {
24325 this: source_arr,
24326 expression: exclude_arr,
24327 original_name: None,
24328 inferred_type: None,
24329 },
24330 )))
24331 }
24332 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24333 // Presto/Trino: ARRAY_EXCEPT(source, exclude) - keep function name, array syntax already converted
24334 Ok(Expression::Function(Box::new(Function::new(
24335 "ARRAY_EXCEPT".to_string(),
24336 vec![source_arr, exclude_arr],
24337 ))))
24338 }
24339 _ => Ok(Expression::ArrayExcept(Box::new(
24340 crate::expressions::BinaryFunc {
24341 this: source_arr,
24342 expression: exclude_arr,
24343 original_name: None,
24344 inferred_type: None,
24345 },
24346 ))),
24347 }
24348 } else {
24349 Ok(e)
24350 }
24351 }
24352
24353 Action::RegexpLikeExasolAnchor => {
24354 // RegexpLike -> Exasol: wrap pattern with .*...*
24355 // Exasol REGEXP_LIKE does full-string match, but RLIKE/REGEXP from other
24356 // dialects does partial match, so we need to anchor with .* on both sides
24357 if let Expression::RegexpLike(mut f) = e {
24358 match &f.pattern {
24359 Expression::Literal(lit)
24360 if matches!(lit.as_ref(), Literal::String(_)) =>
24361 {
24362 let Literal::String(s) = lit.as_ref() else {
24363 unreachable!()
24364 };
24365 // String literal: wrap with .*...*
24366 f.pattern = Expression::Literal(Box::new(Literal::String(
24367 format!(".*{}.*", s),
24368 )));
24369 }
24370 _ => {
24371 // Non-literal: wrap with CONCAT('.*', pattern, '.*')
24372 f.pattern =
24373 Expression::Paren(Box::new(crate::expressions::Paren {
24374 this: Expression::Concat(Box::new(
24375 crate::expressions::BinaryOp {
24376 left: Expression::Concat(Box::new(
24377 crate::expressions::BinaryOp {
24378 left: Expression::Literal(Box::new(
24379 Literal::String(".*".to_string()),
24380 )),
24381 right: f.pattern,
24382 left_comments: vec![],
24383 operator_comments: vec![],
24384 trailing_comments: vec![],
24385 inferred_type: None,
24386 },
24387 )),
24388 right: Expression::Literal(Box::new(
24389 Literal::String(".*".to_string()),
24390 )),
24391 left_comments: vec![],
24392 operator_comments: vec![],
24393 trailing_comments: vec![],
24394 inferred_type: None,
24395 },
24396 )),
24397 trailing_comments: vec![],
24398 }));
24399 }
24400 }
24401 Ok(Expression::RegexpLike(f))
24402 } else {
24403 Ok(e)
24404 }
24405 }
24406
24407 Action::ArrayPositionSnowflakeSwap => {
24408 // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
24409 if let Expression::ArrayPosition(f) = e {
24410 Ok(Expression::ArrayPosition(Box::new(
24411 crate::expressions::BinaryFunc {
24412 this: f.expression,
24413 expression: f.this,
24414 original_name: f.original_name,
24415 inferred_type: f.inferred_type,
24416 },
24417 )))
24418 } else {
24419 Ok(e)
24420 }
24421 }
24422
24423 Action::SnowflakeArrayPositionToDuckDB => {
24424 // Snowflake ARRAY_POSITION(value, array) -> DuckDB ARRAY_POSITION(array, value) - 1
24425 // Snowflake uses 0-based indexing, DuckDB uses 1-based
24426 // The parser has this=value, expression=array (Snowflake order)
24427 if let Expression::ArrayPosition(f) = e {
24428 // Create ARRAY_POSITION(array, value) in standard order
24429 let standard_pos =
24430 Expression::ArrayPosition(Box::new(crate::expressions::BinaryFunc {
24431 this: f.expression, // array
24432 expression: f.this, // value
24433 original_name: f.original_name,
24434 inferred_type: f.inferred_type,
24435 }));
24436 // Subtract 1 for zero-based indexing
24437 Ok(Expression::Sub(Box::new(BinaryOp {
24438 left: standard_pos,
24439 right: Expression::number(1),
24440 left_comments: vec![],
24441 operator_comments: vec![],
24442 trailing_comments: vec![],
24443 inferred_type: None,
24444 })))
24445 } else {
24446 Ok(e)
24447 }
24448 }
24449
24450 Action::ArrayDistinctConvert => {
24451 // ARRAY_DISTINCT(arr) -> DuckDB NULL-aware CASE:
24452 // CASE WHEN ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
24453 // THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(arr, _u -> NOT _u IS NULL)), NULL)
24454 // ELSE LIST_DISTINCT(arr)
24455 // END
24456 if let Expression::ArrayDistinct(f) = e {
24457 let arr = f.this;
24458
24459 // ARRAY_LENGTH(arr)
24460 let array_length = Expression::Function(Box::new(Function::new(
24461 "ARRAY_LENGTH".to_string(),
24462 vec![arr.clone()],
24463 )));
24464 // LIST_COUNT(arr)
24465 let list_count = Expression::Function(Box::new(Function::new(
24466 "LIST_COUNT".to_string(),
24467 vec![arr.clone()],
24468 )));
24469 // ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
24470 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
24471 left: array_length,
24472 right: list_count,
24473 left_comments: vec![],
24474 operator_comments: vec![],
24475 trailing_comments: vec![],
24476 inferred_type: None,
24477 }));
24478
24479 // _u column
24480 let u_col = Expression::column("_u");
24481 // NOT _u IS NULL
24482 let u_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
24483 this: u_col.clone(),
24484 not: false,
24485 postfix_form: false,
24486 }));
24487 let not_u_is_null =
24488 Expression::Not(Box::new(crate::expressions::UnaryOp {
24489 this: u_is_null,
24490 inferred_type: None,
24491 }));
24492 // _u -> NOT _u IS NULL
24493 let filter_lambda =
24494 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24495 parameters: vec![crate::expressions::Identifier::new("_u")],
24496 body: not_u_is_null,
24497 colon: false,
24498 parameter_types: vec![],
24499 }));
24500 // LIST_FILTER(arr, _u -> NOT _u IS NULL)
24501 let list_filter = Expression::Function(Box::new(Function::new(
24502 "LIST_FILTER".to_string(),
24503 vec![arr.clone(), filter_lambda],
24504 )));
24505 // LIST_DISTINCT(LIST_FILTER(arr, ...))
24506 let list_distinct_filtered = Expression::Function(Box::new(Function::new(
24507 "LIST_DISTINCT".to_string(),
24508 vec![list_filter],
24509 )));
24510 // LIST_APPEND(LIST_DISTINCT(LIST_FILTER(...)), NULL)
24511 let list_append = Expression::Function(Box::new(Function::new(
24512 "LIST_APPEND".to_string(),
24513 vec![list_distinct_filtered, Expression::Null(Null)],
24514 )));
24515
24516 // LIST_DISTINCT(arr)
24517 let list_distinct = Expression::Function(Box::new(Function::new(
24518 "LIST_DISTINCT".to_string(),
24519 vec![arr],
24520 )));
24521
24522 // CASE WHEN neq THEN list_append ELSE list_distinct END
24523 Ok(Expression::Case(Box::new(Case {
24524 operand: None,
24525 whens: vec![(neq, list_append)],
24526 else_: Some(list_distinct),
24527 comments: Vec::new(),
24528 inferred_type: None,
24529 })))
24530 } else {
24531 Ok(e)
24532 }
24533 }
24534
24535 Action::ArrayDistinctClickHouse => {
24536 // ARRAY_DISTINCT(arr) -> arrayDistinct(arr) for ClickHouse
24537 if let Expression::ArrayDistinct(f) = e {
24538 Ok(Expression::Function(Box::new(Function::new(
24539 "arrayDistinct".to_string(),
24540 vec![f.this],
24541 ))))
24542 } else {
24543 Ok(e)
24544 }
24545 }
24546
24547 Action::ArrayContainsDuckDBConvert => {
24548 // Snowflake ARRAY_CONTAINS(value, array) -> DuckDB NULL-aware:
24549 // CASE WHEN value IS NULL
24550 // THEN NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
24551 // ELSE ARRAY_CONTAINS(array, value)
24552 // END
24553 // Note: In Rust AST from Snowflake parse, this=value (first arg), expression=array (second arg)
24554 if let Expression::ArrayContains(f) = e {
24555 let value = f.this;
24556 let array = f.expression;
24557
24558 // value IS NULL
24559 let value_is_null =
24560 Expression::IsNull(Box::new(crate::expressions::IsNull {
24561 this: value.clone(),
24562 not: false,
24563 postfix_form: false,
24564 }));
24565
24566 // ARRAY_LENGTH(array)
24567 let array_length = Expression::Function(Box::new(Function::new(
24568 "ARRAY_LENGTH".to_string(),
24569 vec![array.clone()],
24570 )));
24571 // LIST_COUNT(array)
24572 let list_count = Expression::Function(Box::new(Function::new(
24573 "LIST_COUNT".to_string(),
24574 vec![array.clone()],
24575 )));
24576 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
24577 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
24578 left: array_length,
24579 right: list_count,
24580 left_comments: vec![],
24581 operator_comments: vec![],
24582 trailing_comments: vec![],
24583 inferred_type: None,
24584 }));
24585 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
24586 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
24587 this: Box::new(neq),
24588 expression: Box::new(Expression::Boolean(
24589 crate::expressions::BooleanLiteral { value: false },
24590 )),
24591 }));
24592
24593 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
24594 let array_contains = Expression::Function(Box::new(Function::new(
24595 "ARRAY_CONTAINS".to_string(),
24596 vec![array, value],
24597 )));
24598
24599 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
24600 Ok(Expression::Case(Box::new(Case {
24601 operand: None,
24602 whens: vec![(value_is_null, nullif)],
24603 else_: Some(array_contains),
24604 comments: Vec::new(),
24605 inferred_type: None,
24606 })))
24607 } else {
24608 Ok(e)
24609 }
24610 }
24611
24612 Action::StrPositionExpand => {
24613 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
24614 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
24615 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
24616 if let Expression::StrPosition(sp) = e {
24617 let crate::expressions::StrPosition {
24618 this,
24619 substr,
24620 position,
24621 occurrence,
24622 } = *sp;
24623 let string = *this;
24624 let substr_expr = match substr {
24625 Some(s) => *s,
24626 None => Expression::Null(Null),
24627 };
24628 let pos = match position {
24629 Some(p) => *p,
24630 None => Expression::number(1),
24631 };
24632
24633 // SUBSTRING(string, pos)
24634 let substring_call = Expression::Function(Box::new(Function::new(
24635 "SUBSTRING".to_string(),
24636 vec![string.clone(), pos.clone()],
24637 )));
24638 // STRPOS(SUBSTRING(string, pos), substr)
24639 let strpos_call = Expression::Function(Box::new(Function::new(
24640 "STRPOS".to_string(),
24641 vec![substring_call, substr_expr.clone()],
24642 )));
24643 // STRPOS(...) + pos - 1
24644 let pos_adjusted =
24645 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
24646 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
24647 strpos_call.clone(),
24648 pos.clone(),
24649 ))),
24650 Expression::number(1),
24651 )));
24652 // STRPOS(...) = 0
24653 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
24654 strpos_call.clone(),
24655 Expression::number(0),
24656 )));
24657
24658 match target {
24659 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24660 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
24661 Ok(Expression::Function(Box::new(Function::new(
24662 "IF".to_string(),
24663 vec![is_zero, Expression::number(0), pos_adjusted],
24664 ))))
24665 }
24666 DialectType::DuckDB => {
24667 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
24668 Ok(Expression::Case(Box::new(Case {
24669 operand: None,
24670 whens: vec![(is_zero, Expression::number(0))],
24671 else_: Some(pos_adjusted),
24672 comments: Vec::new(),
24673 inferred_type: None,
24674 })))
24675 }
24676 _ => {
24677 // Reconstruct StrPosition
24678 Ok(Expression::StrPosition(Box::new(
24679 crate::expressions::StrPosition {
24680 this: Box::new(string),
24681 substr: Some(Box::new(substr_expr)),
24682 position: Some(Box::new(pos)),
24683 occurrence,
24684 },
24685 )))
24686 }
24687 }
24688 } else {
24689 Ok(e)
24690 }
24691 }
24692
24693 Action::MonthsBetweenConvert => {
24694 if let Expression::MonthsBetween(mb) = e {
24695 let crate::expressions::BinaryFunc {
24696 this: end_date,
24697 expression: start_date,
24698 ..
24699 } = *mb;
24700 match target {
24701 DialectType::DuckDB => {
24702 let cast_end = Self::ensure_cast_date(end_date);
24703 let cast_start = Self::ensure_cast_date(start_date);
24704 let dd = Expression::Function(Box::new(Function::new(
24705 "DATE_DIFF".to_string(),
24706 vec![
24707 Expression::string("MONTH"),
24708 cast_start.clone(),
24709 cast_end.clone(),
24710 ],
24711 )));
24712 let day_end = Expression::Function(Box::new(Function::new(
24713 "DAY".to_string(),
24714 vec![cast_end.clone()],
24715 )));
24716 let day_start = Expression::Function(Box::new(Function::new(
24717 "DAY".to_string(),
24718 vec![cast_start.clone()],
24719 )));
24720 let last_day_end = Expression::Function(Box::new(Function::new(
24721 "LAST_DAY".to_string(),
24722 vec![cast_end.clone()],
24723 )));
24724 let last_day_start = Expression::Function(Box::new(Function::new(
24725 "LAST_DAY".to_string(),
24726 vec![cast_start.clone()],
24727 )));
24728 let day_last_end = Expression::Function(Box::new(Function::new(
24729 "DAY".to_string(),
24730 vec![last_day_end],
24731 )));
24732 let day_last_start = Expression::Function(Box::new(Function::new(
24733 "DAY".to_string(),
24734 vec![last_day_start],
24735 )));
24736 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
24737 day_end.clone(),
24738 day_last_end,
24739 )));
24740 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
24741 day_start.clone(),
24742 day_last_start,
24743 )));
24744 let both_cond =
24745 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
24746 let day_diff =
24747 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
24748 let day_diff_paren =
24749 Expression::Paren(Box::new(crate::expressions::Paren {
24750 this: day_diff,
24751 trailing_comments: Vec::new(),
24752 }));
24753 let frac = Expression::Div(Box::new(BinaryOp::new(
24754 day_diff_paren,
24755 Expression::Literal(Box::new(Literal::Number(
24756 "31.0".to_string(),
24757 ))),
24758 )));
24759 let case_expr = Expression::Case(Box::new(Case {
24760 operand: None,
24761 whens: vec![(both_cond, Expression::number(0))],
24762 else_: Some(frac),
24763 comments: Vec::new(),
24764 inferred_type: None,
24765 }));
24766 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
24767 }
24768 DialectType::Snowflake | DialectType::Redshift => {
24769 let unit = Expression::Identifier(Identifier::new("MONTH"));
24770 Ok(Expression::Function(Box::new(Function::new(
24771 "DATEDIFF".to_string(),
24772 vec![unit, start_date, end_date],
24773 ))))
24774 }
24775 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24776 Ok(Expression::Function(Box::new(Function::new(
24777 "DATE_DIFF".to_string(),
24778 vec![Expression::string("MONTH"), start_date, end_date],
24779 ))))
24780 }
24781 _ => Ok(Expression::MonthsBetween(Box::new(
24782 crate::expressions::BinaryFunc {
24783 this: end_date,
24784 expression: start_date,
24785 original_name: None,
24786 inferred_type: None,
24787 },
24788 ))),
24789 }
24790 } else {
24791 Ok(e)
24792 }
24793 }
24794
24795 Action::AddMonthsConvert => {
24796 if let Expression::AddMonths(am) = e {
24797 let date = am.this;
24798 let val = am.expression;
24799 match target {
24800 DialectType::TSQL | DialectType::Fabric => {
24801 let cast_date = Self::ensure_cast_datetime2(date);
24802 Ok(Expression::Function(Box::new(Function::new(
24803 "DATEADD".to_string(),
24804 vec![
24805 Expression::Identifier(Identifier::new("MONTH")),
24806 val,
24807 cast_date,
24808 ],
24809 ))))
24810 }
24811 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
24812 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
24813 // Optionally wrapped in CAST(... AS type) if the input had a specific type
24814
24815 // Determine the cast type from the date expression
24816 let (cast_date, return_type) = match &date {
24817 Expression::Literal(lit)
24818 if matches!(lit.as_ref(), Literal::String(_)) =>
24819 {
24820 // String literal: CAST(str AS TIMESTAMP), no outer CAST
24821 (
24822 Expression::Cast(Box::new(Cast {
24823 this: date.clone(),
24824 to: DataType::Timestamp {
24825 precision: None,
24826 timezone: false,
24827 },
24828 trailing_comments: Vec::new(),
24829 double_colon_syntax: false,
24830 format: None,
24831 default: None,
24832 inferred_type: None,
24833 })),
24834 None,
24835 )
24836 }
24837 Expression::Cast(c) => {
24838 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
24839 (date.clone(), Some(c.to.clone()))
24840 }
24841 _ => {
24842 // Expression or NULL::TYPE - keep as-is, check for cast type
24843 if let Expression::Cast(c) = &date {
24844 (date.clone(), Some(c.to.clone()))
24845 } else {
24846 (date.clone(), None)
24847 }
24848 }
24849 };
24850
24851 // Build the interval expression
24852 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
24853 // For integer values, use INTERVAL val MONTH
24854 let is_non_integer_val = match &val {
24855 Expression::Literal(lit)
24856 if matches!(lit.as_ref(), Literal::Number(_)) =>
24857 {
24858 let Literal::Number(n) = lit.as_ref() else {
24859 unreachable!()
24860 };
24861 n.contains('.')
24862 }
24863 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
24864 Expression::Neg(n) => {
24865 if let Expression::Literal(lit) = &n.this {
24866 if let Literal::Number(s) = lit.as_ref() {
24867 s.contains('.')
24868 } else {
24869 false
24870 }
24871 } else {
24872 false
24873 }
24874 }
24875 _ => false,
24876 };
24877
24878 let add_interval = if is_non_integer_val {
24879 // TO_MONTHS(CAST(ROUND(val) AS INT))
24880 let round_val = Expression::Function(Box::new(Function::new(
24881 "ROUND".to_string(),
24882 vec![val.clone()],
24883 )));
24884 let cast_int = Expression::Cast(Box::new(Cast {
24885 this: round_val,
24886 to: DataType::Int {
24887 length: None,
24888 integer_spelling: false,
24889 },
24890 trailing_comments: Vec::new(),
24891 double_colon_syntax: false,
24892 format: None,
24893 default: None,
24894 inferred_type: None,
24895 }));
24896 Expression::Function(Box::new(Function::new(
24897 "TO_MONTHS".to_string(),
24898 vec![cast_int],
24899 )))
24900 } else {
24901 // INTERVAL val MONTH
24902 // For negative numbers, wrap in parens
24903 let interval_val = match &val {
24904 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n.starts_with('-')) =>
24905 {
24906 let Literal::Number(_) = lit.as_ref() else {
24907 unreachable!()
24908 };
24909 Expression::Paren(Box::new(Paren {
24910 this: val.clone(),
24911 trailing_comments: Vec::new(),
24912 }))
24913 }
24914 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
24915 this: val.clone(),
24916 trailing_comments: Vec::new(),
24917 })),
24918 Expression::Null(_) => Expression::Paren(Box::new(Paren {
24919 this: val.clone(),
24920 trailing_comments: Vec::new(),
24921 })),
24922 _ => val.clone(),
24923 };
24924 Expression::Interval(Box::new(crate::expressions::Interval {
24925 this: Some(interval_val),
24926 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24927 unit: crate::expressions::IntervalUnit::Month,
24928 use_plural: false,
24929 }),
24930 }))
24931 };
24932
24933 // Build: date + interval
24934 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
24935 cast_date.clone(),
24936 add_interval.clone(),
24937 )));
24938
24939 // Build LAST_DAY(date)
24940 let last_day_date = Expression::Function(Box::new(Function::new(
24941 "LAST_DAY".to_string(),
24942 vec![cast_date.clone()],
24943 )));
24944
24945 // Build LAST_DAY(date + interval)
24946 let last_day_date_plus =
24947 Expression::Function(Box::new(Function::new(
24948 "LAST_DAY".to_string(),
24949 vec![date_plus_interval.clone()],
24950 )));
24951
24952 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
24953 let case_expr = Expression::Case(Box::new(Case {
24954 operand: None,
24955 whens: vec![(
24956 Expression::Eq(Box::new(BinaryOp::new(
24957 last_day_date,
24958 cast_date.clone(),
24959 ))),
24960 last_day_date_plus,
24961 )],
24962 else_: Some(date_plus_interval),
24963 comments: Vec::new(),
24964 inferred_type: None,
24965 }));
24966
24967 // Wrap in CAST(... AS type) if needed
24968 if let Some(dt) = return_type {
24969 Ok(Expression::Cast(Box::new(Cast {
24970 this: case_expr,
24971 to: dt,
24972 trailing_comments: Vec::new(),
24973 double_colon_syntax: false,
24974 format: None,
24975 default: None,
24976 inferred_type: None,
24977 })))
24978 } else {
24979 Ok(case_expr)
24980 }
24981 }
24982 DialectType::DuckDB => {
24983 // Non-Snowflake source: simple date + INTERVAL
24984 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
24985 {
24986 Expression::Cast(Box::new(Cast {
24987 this: date,
24988 to: DataType::Timestamp {
24989 precision: None,
24990 timezone: false,
24991 },
24992 trailing_comments: Vec::new(),
24993 double_colon_syntax: false,
24994 format: None,
24995 default: None,
24996 inferred_type: None,
24997 }))
24998 } else {
24999 date
25000 };
25001 let interval =
25002 Expression::Interval(Box::new(crate::expressions::Interval {
25003 this: Some(val),
25004 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
25005 unit: crate::expressions::IntervalUnit::Month,
25006 use_plural: false,
25007 }),
25008 }));
25009 Ok(Expression::Add(Box::new(BinaryOp::new(
25010 cast_date, interval,
25011 ))))
25012 }
25013 DialectType::Snowflake => {
25014 // Keep ADD_MONTHS when source is also Snowflake
25015 if matches!(source, DialectType::Snowflake) {
25016 Ok(Expression::Function(Box::new(Function::new(
25017 "ADD_MONTHS".to_string(),
25018 vec![date, val],
25019 ))))
25020 } else {
25021 Ok(Expression::Function(Box::new(Function::new(
25022 "DATEADD".to_string(),
25023 vec![
25024 Expression::Identifier(Identifier::new("MONTH")),
25025 val,
25026 date,
25027 ],
25028 ))))
25029 }
25030 }
25031 DialectType::Redshift => {
25032 Ok(Expression::Function(Box::new(Function::new(
25033 "DATEADD".to_string(),
25034 vec![
25035 Expression::Identifier(Identifier::new("MONTH")),
25036 val,
25037 date,
25038 ],
25039 ))))
25040 }
25041 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25042 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
25043 {
25044 Expression::Cast(Box::new(Cast {
25045 this: date,
25046 to: DataType::Timestamp {
25047 precision: None,
25048 timezone: false,
25049 },
25050 trailing_comments: Vec::new(),
25051 double_colon_syntax: false,
25052 format: None,
25053 default: None,
25054 inferred_type: None,
25055 }))
25056 } else {
25057 date
25058 };
25059 Ok(Expression::Function(Box::new(Function::new(
25060 "DATE_ADD".to_string(),
25061 vec![Expression::string("MONTH"), val, cast_date],
25062 ))))
25063 }
25064 DialectType::BigQuery => {
25065 let interval =
25066 Expression::Interval(Box::new(crate::expressions::Interval {
25067 this: Some(val),
25068 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
25069 unit: crate::expressions::IntervalUnit::Month,
25070 use_plural: false,
25071 }),
25072 }));
25073 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
25074 {
25075 Expression::Cast(Box::new(Cast {
25076 this: date,
25077 to: DataType::Custom {
25078 name: "DATETIME".to_string(),
25079 },
25080 trailing_comments: Vec::new(),
25081 double_colon_syntax: false,
25082 format: None,
25083 default: None,
25084 inferred_type: None,
25085 }))
25086 } else {
25087 date
25088 };
25089 Ok(Expression::Function(Box::new(Function::new(
25090 "DATE_ADD".to_string(),
25091 vec![cast_date, interval],
25092 ))))
25093 }
25094 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
25095 Ok(Expression::Function(Box::new(Function::new(
25096 "ADD_MONTHS".to_string(),
25097 vec![date, val],
25098 ))))
25099 }
25100 _ => {
25101 // Default: keep as AddMonths expression
25102 Ok(Expression::AddMonths(Box::new(
25103 crate::expressions::BinaryFunc {
25104 this: date,
25105 expression: val,
25106 original_name: None,
25107 inferred_type: None,
25108 },
25109 )))
25110 }
25111 }
25112 } else {
25113 Ok(e)
25114 }
25115 }
25116
25117 Action::PercentileContConvert => {
25118 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
25119 // Presto/Trino: APPROX_PERCENTILE(col, p)
25120 // Spark/Databricks: PERCENTILE_APPROX(col, p)
25121 if let Expression::WithinGroup(wg) = e {
25122 // Extract percentile value and order by column
25123 let (percentile, _is_disc) = match &wg.this {
25124 Expression::Function(f) => {
25125 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
25126 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
25127 Box::new(Literal::Number("0.5".to_string())),
25128 ));
25129 (pct, is_disc)
25130 }
25131 Expression::AggregateFunction(af) => {
25132 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
25133 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
25134 Box::new(Literal::Number("0.5".to_string())),
25135 ));
25136 (pct, is_disc)
25137 }
25138 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
25139 _ => return Ok(Expression::WithinGroup(wg)),
25140 };
25141 let col = wg.order_by.first().map(|o| o.this.clone()).unwrap_or(
25142 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
25143 );
25144
25145 let func_name = match target {
25146 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25147 "APPROX_PERCENTILE"
25148 }
25149 _ => "PERCENTILE_APPROX", // Spark, Databricks
25150 };
25151 Ok(Expression::Function(Box::new(Function::new(
25152 func_name.to_string(),
25153 vec![col, percentile],
25154 ))))
25155 } else {
25156 Ok(e)
25157 }
25158 }
25159
25160 Action::CurrentUserSparkParens => {
25161 // CURRENT_USER -> CURRENT_USER() for Spark
25162 if let Expression::CurrentUser(_) = e {
25163 Ok(Expression::Function(Box::new(Function::new(
25164 "CURRENT_USER".to_string(),
25165 vec![],
25166 ))))
25167 } else {
25168 Ok(e)
25169 }
25170 }
25171
25172 Action::SparkDateFuncCast => {
25173 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
25174 let cast_arg = |arg: Expression| -> Expression {
25175 match target {
25176 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25177 Self::double_cast_timestamp_date(arg)
25178 }
25179 _ => {
25180 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
25181 Self::ensure_cast_date(arg)
25182 }
25183 }
25184 };
25185 match e {
25186 Expression::Month(f) => Ok(Expression::Month(Box::new(
25187 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
25188 ))),
25189 Expression::Year(f) => Ok(Expression::Year(Box::new(
25190 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
25191 ))),
25192 Expression::Day(f) => Ok(Expression::Day(Box::new(
25193 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
25194 ))),
25195 other => Ok(other),
25196 }
25197 }
25198
25199 Action::MapFromArraysConvert => {
25200 // Expression::MapFromArrays -> target-specific
25201 if let Expression::MapFromArrays(mfa) = e {
25202 let keys = mfa.this;
25203 let values = mfa.expression;
25204 match target {
25205 DialectType::Snowflake => Ok(Expression::Function(Box::new(
25206 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
25207 ))),
25208 _ => {
25209 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
25210 Ok(Expression::Function(Box::new(Function::new(
25211 "MAP".to_string(),
25212 vec![keys, values],
25213 ))))
25214 }
25215 }
25216 } else {
25217 Ok(e)
25218 }
25219 }
25220
25221 Action::AnyToExists => {
25222 if let Expression::Any(q) = e {
25223 if let Some(op) = q.op.clone() {
25224 let lambda_param = crate::expressions::Identifier::new("x");
25225 let rhs = Expression::Identifier(lambda_param.clone());
25226 let body = match op {
25227 crate::expressions::QuantifiedOp::Eq => {
25228 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
25229 }
25230 crate::expressions::QuantifiedOp::Neq => {
25231 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
25232 }
25233 crate::expressions::QuantifiedOp::Lt => {
25234 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
25235 }
25236 crate::expressions::QuantifiedOp::Lte => {
25237 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
25238 }
25239 crate::expressions::QuantifiedOp::Gt => {
25240 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
25241 }
25242 crate::expressions::QuantifiedOp::Gte => {
25243 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
25244 }
25245 };
25246 let lambda =
25247 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25248 parameters: vec![lambda_param],
25249 body,
25250 colon: false,
25251 parameter_types: Vec::new(),
25252 }));
25253 Ok(Expression::Function(Box::new(Function::new(
25254 "EXISTS".to_string(),
25255 vec![q.subquery, lambda],
25256 ))))
25257 } else {
25258 Ok(Expression::Any(q))
25259 }
25260 } else {
25261 Ok(e)
25262 }
25263 }
25264
25265 Action::GenerateSeriesConvert => {
25266 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
25267 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
25268 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
25269 if let Expression::Function(f) = e {
25270 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
25271 let start = f.args[0].clone();
25272 let end = f.args[1].clone();
25273 let step = f.args.get(2).cloned();
25274
25275 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
25276 let step = step.map(|s| Self::normalize_interval_string(s, target));
25277
25278 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
25279 let maybe_cast_timestamp = |arg: Expression| -> Expression {
25280 if matches!(
25281 target,
25282 DialectType::Presto
25283 | DialectType::Trino
25284 | DialectType::Athena
25285 | DialectType::Spark
25286 | DialectType::Databricks
25287 | DialectType::Hive
25288 ) {
25289 match &arg {
25290 Expression::CurrentTimestamp(_) => {
25291 Expression::Cast(Box::new(Cast {
25292 this: arg,
25293 to: DataType::Timestamp {
25294 precision: None,
25295 timezone: false,
25296 },
25297 trailing_comments: Vec::new(),
25298 double_colon_syntax: false,
25299 format: None,
25300 default: None,
25301 inferred_type: None,
25302 }))
25303 }
25304 _ => arg,
25305 }
25306 } else {
25307 arg
25308 }
25309 };
25310
25311 let start = maybe_cast_timestamp(start);
25312 let end = maybe_cast_timestamp(end);
25313
25314 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
25315 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
25316 let mut gs_args = vec![start, end];
25317 if let Some(step) = step {
25318 gs_args.push(step);
25319 }
25320 return Ok(Expression::Function(Box::new(Function::new(
25321 "GENERATE_SERIES".to_string(),
25322 gs_args,
25323 ))));
25324 }
25325
25326 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
25327 if matches!(target, DialectType::DuckDB) {
25328 let mut gs_args = vec![start, end];
25329 if let Some(step) = step {
25330 gs_args.push(step);
25331 }
25332 let gs = Expression::Function(Box::new(Function::new(
25333 "GENERATE_SERIES".to_string(),
25334 gs_args,
25335 )));
25336 return Ok(Expression::Function(Box::new(Function::new(
25337 "UNNEST".to_string(),
25338 vec![gs],
25339 ))));
25340 }
25341
25342 let mut seq_args = vec![start, end];
25343 if let Some(step) = step {
25344 seq_args.push(step);
25345 }
25346
25347 let seq = Expression::Function(Box::new(Function::new(
25348 "SEQUENCE".to_string(),
25349 seq_args,
25350 )));
25351
25352 match target {
25353 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25354 // Wrap in UNNEST
25355 Ok(Expression::Function(Box::new(Function::new(
25356 "UNNEST".to_string(),
25357 vec![seq],
25358 ))))
25359 }
25360 DialectType::Spark
25361 | DialectType::Databricks
25362 | DialectType::Hive => {
25363 // Wrap in EXPLODE
25364 Ok(Expression::Function(Box::new(Function::new(
25365 "EXPLODE".to_string(),
25366 vec![seq],
25367 ))))
25368 }
25369 _ => {
25370 // Just SEQUENCE for others
25371 Ok(seq)
25372 }
25373 }
25374 } else {
25375 Ok(Expression::Function(f))
25376 }
25377 } else {
25378 Ok(e)
25379 }
25380 }
25381
25382 Action::ConcatCoalesceWrap => {
25383 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
25384 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
25385 if let Expression::Function(f) = e {
25386 if f.name.eq_ignore_ascii_case("CONCAT") {
25387 let new_args: Vec<Expression> = f
25388 .args
25389 .into_iter()
25390 .map(|arg| {
25391 let cast_arg = if matches!(
25392 target,
25393 DialectType::Presto
25394 | DialectType::Trino
25395 | DialectType::Athena
25396 ) {
25397 Expression::Cast(Box::new(Cast {
25398 this: arg,
25399 to: DataType::VarChar {
25400 length: None,
25401 parenthesized_length: false,
25402 },
25403 trailing_comments: Vec::new(),
25404 double_colon_syntax: false,
25405 format: None,
25406 default: None,
25407 inferred_type: None,
25408 }))
25409 } else {
25410 arg
25411 };
25412 Expression::Function(Box::new(Function::new(
25413 "COALESCE".to_string(),
25414 vec![cast_arg, Expression::string("")],
25415 )))
25416 })
25417 .collect();
25418 Ok(Expression::Function(Box::new(Function::new(
25419 "CONCAT".to_string(),
25420 new_args,
25421 ))))
25422 } else {
25423 Ok(Expression::Function(f))
25424 }
25425 } else {
25426 Ok(e)
25427 }
25428 }
25429
25430 Action::PipeConcatToConcat => {
25431 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
25432 if let Expression::Concat(op) = e {
25433 let cast_left = Expression::Cast(Box::new(Cast {
25434 this: op.left,
25435 to: DataType::VarChar {
25436 length: None,
25437 parenthesized_length: false,
25438 },
25439 trailing_comments: Vec::new(),
25440 double_colon_syntax: false,
25441 format: None,
25442 default: None,
25443 inferred_type: None,
25444 }));
25445 let cast_right = Expression::Cast(Box::new(Cast {
25446 this: op.right,
25447 to: DataType::VarChar {
25448 length: None,
25449 parenthesized_length: false,
25450 },
25451 trailing_comments: Vec::new(),
25452 double_colon_syntax: false,
25453 format: None,
25454 default: None,
25455 inferred_type: None,
25456 }));
25457 Ok(Expression::Function(Box::new(Function::new(
25458 "CONCAT".to_string(),
25459 vec![cast_left, cast_right],
25460 ))))
25461 } else {
25462 Ok(e)
25463 }
25464 }
25465
25466 Action::DivFuncConvert => {
25467 // DIV(a, b) -> target-specific integer division
25468 if let Expression::Function(f) = e {
25469 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
25470 let a = f.args[0].clone();
25471 let b = f.args[1].clone();
25472 match target {
25473 DialectType::DuckDB => {
25474 // DIV(a, b) -> CAST(a // b AS DECIMAL)
25475 let int_div = Expression::IntDiv(Box::new(
25476 crate::expressions::BinaryFunc {
25477 this: a,
25478 expression: b,
25479 original_name: None,
25480 inferred_type: None,
25481 },
25482 ));
25483 Ok(Expression::Cast(Box::new(Cast {
25484 this: int_div,
25485 to: DataType::Decimal {
25486 precision: None,
25487 scale: None,
25488 },
25489 trailing_comments: Vec::new(),
25490 double_colon_syntax: false,
25491 format: None,
25492 default: None,
25493 inferred_type: None,
25494 })))
25495 }
25496 DialectType::BigQuery => {
25497 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
25498 let div_func = Expression::Function(Box::new(Function::new(
25499 "DIV".to_string(),
25500 vec![a, b],
25501 )));
25502 Ok(Expression::Cast(Box::new(Cast {
25503 this: div_func,
25504 to: DataType::Custom {
25505 name: "NUMERIC".to_string(),
25506 },
25507 trailing_comments: Vec::new(),
25508 double_colon_syntax: false,
25509 format: None,
25510 default: None,
25511 inferred_type: None,
25512 })))
25513 }
25514 DialectType::SQLite => {
25515 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
25516 let cast_a = Expression::Cast(Box::new(Cast {
25517 this: a,
25518 to: DataType::Custom {
25519 name: "REAL".to_string(),
25520 },
25521 trailing_comments: Vec::new(),
25522 double_colon_syntax: false,
25523 format: None,
25524 default: None,
25525 inferred_type: None,
25526 }));
25527 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
25528 let cast_int = Expression::Cast(Box::new(Cast {
25529 this: div,
25530 to: DataType::Int {
25531 length: None,
25532 integer_spelling: true,
25533 },
25534 trailing_comments: Vec::new(),
25535 double_colon_syntax: false,
25536 format: None,
25537 default: None,
25538 inferred_type: None,
25539 }));
25540 Ok(Expression::Cast(Box::new(Cast {
25541 this: cast_int,
25542 to: DataType::Custom {
25543 name: "REAL".to_string(),
25544 },
25545 trailing_comments: Vec::new(),
25546 double_colon_syntax: false,
25547 format: None,
25548 default: None,
25549 inferred_type: None,
25550 })))
25551 }
25552 _ => Ok(Expression::Function(f)),
25553 }
25554 } else {
25555 Ok(Expression::Function(f))
25556 }
25557 } else {
25558 Ok(e)
25559 }
25560 }
25561
25562 Action::JsonObjectAggConvert => {
25563 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
25564 match e {
25565 Expression::Function(f) => Ok(Expression::Function(Box::new(
25566 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
25567 ))),
25568 Expression::AggregateFunction(af) => {
25569 // AggregateFunction stores all args in the `args` vec
25570 Ok(Expression::Function(Box::new(Function::new(
25571 "JSON_GROUP_OBJECT".to_string(),
25572 af.args,
25573 ))))
25574 }
25575 other => Ok(other),
25576 }
25577 }
25578
25579 Action::JsonbExistsConvert => {
25580 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
25581 if let Expression::Function(f) = e {
25582 if f.args.len() == 2 {
25583 let json_expr = f.args[0].clone();
25584 let key = match &f.args[1] {
25585 Expression::Literal(lit)
25586 if matches!(
25587 lit.as_ref(),
25588 crate::expressions::Literal::String(_)
25589 ) =>
25590 {
25591 let crate::expressions::Literal::String(s) = lit.as_ref()
25592 else {
25593 unreachable!()
25594 };
25595 format!("$.{}", s)
25596 }
25597 _ => return Ok(Expression::Function(f)),
25598 };
25599 Ok(Expression::Function(Box::new(Function::new(
25600 "JSON_EXISTS".to_string(),
25601 vec![json_expr, Expression::string(&key)],
25602 ))))
25603 } else {
25604 Ok(Expression::Function(f))
25605 }
25606 } else {
25607 Ok(e)
25608 }
25609 }
25610
25611 Action::DateBinConvert => {
25612 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
25613 if let Expression::Function(f) = e {
25614 Ok(Expression::Function(Box::new(Function::new(
25615 "TIME_BUCKET".to_string(),
25616 f.args,
25617 ))))
25618 } else {
25619 Ok(e)
25620 }
25621 }
25622
25623 Action::MysqlCastCharToText => {
25624 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
25625 if let Expression::Cast(mut c) = e {
25626 c.to = DataType::Text;
25627 Ok(Expression::Cast(c))
25628 } else {
25629 Ok(e)
25630 }
25631 }
25632
25633 Action::SparkCastVarcharToString => {
25634 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
25635 match e {
25636 Expression::Cast(mut c) => {
25637 c.to = Self::normalize_varchar_to_string(c.to);
25638 Ok(Expression::Cast(c))
25639 }
25640 Expression::TryCast(mut c) => {
25641 c.to = Self::normalize_varchar_to_string(c.to);
25642 Ok(Expression::TryCast(c))
25643 }
25644 _ => Ok(e),
25645 }
25646 }
25647
25648 Action::MinMaxToLeastGreatest => {
25649 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
25650 if let Expression::Function(f) = e {
25651 let new_name = if f.name.eq_ignore_ascii_case("MIN") {
25652 "LEAST"
25653 } else if f.name.eq_ignore_ascii_case("MAX") {
25654 "GREATEST"
25655 } else {
25656 return Ok(Expression::Function(f));
25657 };
25658 Ok(Expression::Function(Box::new(Function::new(
25659 new_name.to_string(),
25660 f.args,
25661 ))))
25662 } else {
25663 Ok(e)
25664 }
25665 }
25666
25667 Action::ClickHouseUniqToApproxCountDistinct => {
25668 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
25669 if let Expression::Function(f) = e {
25670 Ok(Expression::Function(Box::new(Function::new(
25671 "APPROX_COUNT_DISTINCT".to_string(),
25672 f.args,
25673 ))))
25674 } else {
25675 Ok(e)
25676 }
25677 }
25678
25679 Action::ClickHouseAnyToAnyValue => {
25680 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
25681 if let Expression::Function(f) = e {
25682 Ok(Expression::Function(Box::new(Function::new(
25683 "ANY_VALUE".to_string(),
25684 f.args,
25685 ))))
25686 } else {
25687 Ok(e)
25688 }
25689 }
25690
25691 Action::OracleVarchar2ToVarchar => {
25692 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
25693 if let Expression::DataType(DataType::Custom { ref name }) = e {
25694 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
25695 let starts_varchar2 =
25696 name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2(");
25697 let starts_nvarchar2 =
25698 name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2(");
25699 let inner = if starts_varchar2 || starts_nvarchar2 {
25700 let start = if starts_nvarchar2 { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
25701 let end = name.len() - 1; // skip trailing ")"
25702 Some(&name[start..end])
25703 } else {
25704 Option::None
25705 };
25706 if let Some(inner_str) = inner {
25707 // Parse the number part, ignoring BYTE/CHAR qualifier
25708 let num_str = inner_str.split_whitespace().next().unwrap_or("");
25709 if let Ok(n) = num_str.parse::<u32>() {
25710 Ok(Expression::DataType(DataType::VarChar {
25711 length: Some(n),
25712 parenthesized_length: false,
25713 }))
25714 } else {
25715 Ok(e)
25716 }
25717 } else {
25718 // Plain VARCHAR2 / NVARCHAR2 without parens
25719 Ok(Expression::DataType(DataType::VarChar {
25720 length: Option::None,
25721 parenthesized_length: false,
25722 }))
25723 }
25724 } else {
25725 Ok(e)
25726 }
25727 }
25728
25729 Action::Nvl2Expand => {
25730 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
25731 // But keep as NVL2 for dialects that support it natively
25732 let nvl2_native = matches!(
25733 target,
25734 DialectType::Oracle
25735 | DialectType::Snowflake
25736 | DialectType::Redshift
25737 | DialectType::Teradata
25738 | DialectType::Spark
25739 | DialectType::Databricks
25740 );
25741 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
25742 if nvl2_native {
25743 return Ok(Expression::Nvl2(nvl2));
25744 }
25745 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
25746 } else if let Expression::Function(f) = e {
25747 if nvl2_native {
25748 return Ok(Expression::Function(Box::new(Function::new(
25749 "NVL2".to_string(),
25750 f.args,
25751 ))));
25752 }
25753 if f.args.len() < 2 {
25754 return Ok(Expression::Function(f));
25755 }
25756 let mut args = f.args;
25757 let a = args.remove(0);
25758 let b = args.remove(0);
25759 let c = if !args.is_empty() {
25760 Some(args.remove(0))
25761 } else {
25762 Option::None
25763 };
25764 (a, b, c)
25765 } else {
25766 return Ok(e);
25767 };
25768 // Build: NOT (a IS NULL)
25769 let is_null = Expression::IsNull(Box::new(IsNull {
25770 this: a,
25771 not: false,
25772 postfix_form: false,
25773 }));
25774 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
25775 this: is_null,
25776 inferred_type: None,
25777 }));
25778 Ok(Expression::Case(Box::new(Case {
25779 operand: Option::None,
25780 whens: vec![(not_null, b)],
25781 else_: c,
25782 comments: Vec::new(),
25783 inferred_type: None,
25784 })))
25785 }
25786
25787 Action::IfnullToCoalesce => {
25788 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
25789 if let Expression::Coalesce(mut cf) = e {
25790 cf.original_name = Option::None;
25791 Ok(Expression::Coalesce(cf))
25792 } else if let Expression::Function(f) = e {
25793 Ok(Expression::Function(Box::new(Function::new(
25794 "COALESCE".to_string(),
25795 f.args,
25796 ))))
25797 } else {
25798 Ok(e)
25799 }
25800 }
25801
25802 Action::IsAsciiConvert => {
25803 // IS_ASCII(x) -> dialect-specific ASCII check
25804 if let Expression::Function(f) = e {
25805 let arg = f.args.into_iter().next().unwrap();
25806 match target {
25807 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
25808 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
25809 Ok(Expression::Function(Box::new(Function::new(
25810 "REGEXP_LIKE".to_string(),
25811 vec![
25812 arg,
25813 Expression::Literal(Box::new(Literal::String(
25814 "^[[:ascii:]]*$".to_string(),
25815 ))),
25816 ],
25817 ))))
25818 }
25819 DialectType::PostgreSQL
25820 | DialectType::Redshift
25821 | DialectType::Materialize
25822 | DialectType::RisingWave => {
25823 // (x ~ '^[[:ascii:]]*$')
25824 Ok(Expression::Paren(Box::new(Paren {
25825 this: Expression::RegexpLike(Box::new(
25826 crate::expressions::RegexpFunc {
25827 this: arg,
25828 pattern: Expression::Literal(Box::new(
25829 Literal::String("^[[:ascii:]]*$".to_string()),
25830 )),
25831 flags: Option::None,
25832 },
25833 )),
25834 trailing_comments: Vec::new(),
25835 })))
25836 }
25837 DialectType::SQLite => {
25838 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
25839 let hex_lit = Expression::Literal(Box::new(Literal::HexString(
25840 "2a5b5e012d7f5d2a".to_string(),
25841 )));
25842 let cast_expr = Expression::Cast(Box::new(Cast {
25843 this: hex_lit,
25844 to: DataType::Text,
25845 trailing_comments: Vec::new(),
25846 double_colon_syntax: false,
25847 format: Option::None,
25848 default: Option::None,
25849 inferred_type: None,
25850 }));
25851 let glob = Expression::Glob(Box::new(BinaryOp {
25852 left: arg,
25853 right: cast_expr,
25854 left_comments: Vec::new(),
25855 operator_comments: Vec::new(),
25856 trailing_comments: Vec::new(),
25857 inferred_type: None,
25858 }));
25859 Ok(Expression::Paren(Box::new(Paren {
25860 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
25861 this: glob,
25862 inferred_type: None,
25863 })),
25864 trailing_comments: Vec::new(),
25865 })))
25866 }
25867 DialectType::TSQL | DialectType::Fabric => {
25868 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
25869 let hex_lit = Expression::Literal(Box::new(Literal::HexNumber(
25870 "255b5e002d7f5d25".to_string(),
25871 )));
25872 let convert_expr = Expression::Convert(Box::new(
25873 crate::expressions::ConvertFunc {
25874 this: hex_lit,
25875 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
25876 style: None,
25877 },
25878 ));
25879 let collated = Expression::Collation(Box::new(
25880 crate::expressions::CollationExpr {
25881 this: convert_expr,
25882 collation: "Latin1_General_BIN".to_string(),
25883 quoted: false,
25884 double_quoted: false,
25885 },
25886 ));
25887 let patindex = Expression::Function(Box::new(Function::new(
25888 "PATINDEX".to_string(),
25889 vec![collated, arg],
25890 )));
25891 let zero =
25892 Expression::Literal(Box::new(Literal::Number("0".to_string())));
25893 let eq_zero = Expression::Eq(Box::new(BinaryOp {
25894 left: patindex,
25895 right: zero,
25896 left_comments: Vec::new(),
25897 operator_comments: Vec::new(),
25898 trailing_comments: Vec::new(),
25899 inferred_type: None,
25900 }));
25901 Ok(Expression::Paren(Box::new(Paren {
25902 this: eq_zero,
25903 trailing_comments: Vec::new(),
25904 })))
25905 }
25906 DialectType::Oracle => {
25907 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
25908 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
25909 let s1 = Expression::Literal(Box::new(Literal::String(
25910 "^[".to_string(),
25911 )));
25912 let chr1 = Expression::Function(Box::new(Function::new(
25913 "CHR".to_string(),
25914 vec![Expression::Literal(Box::new(Literal::Number(
25915 "1".to_string(),
25916 )))],
25917 )));
25918 let dash =
25919 Expression::Literal(Box::new(Literal::String("-".to_string())));
25920 let chr127 = Expression::Function(Box::new(Function::new(
25921 "CHR".to_string(),
25922 vec![Expression::Literal(Box::new(Literal::Number(
25923 "127".to_string(),
25924 )))],
25925 )));
25926 let s2 = Expression::Literal(Box::new(Literal::String(
25927 "]*$".to_string(),
25928 )));
25929 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
25930 let concat1 =
25931 Expression::DPipe(Box::new(crate::expressions::DPipe {
25932 this: Box::new(s1),
25933 expression: Box::new(chr1),
25934 safe: None,
25935 }));
25936 let concat2 =
25937 Expression::DPipe(Box::new(crate::expressions::DPipe {
25938 this: Box::new(concat1),
25939 expression: Box::new(dash),
25940 safe: None,
25941 }));
25942 let concat3 =
25943 Expression::DPipe(Box::new(crate::expressions::DPipe {
25944 this: Box::new(concat2),
25945 expression: Box::new(chr127),
25946 safe: None,
25947 }));
25948 let concat4 =
25949 Expression::DPipe(Box::new(crate::expressions::DPipe {
25950 this: Box::new(concat3),
25951 expression: Box::new(s2),
25952 safe: None,
25953 }));
25954 let regexp_like = Expression::Function(Box::new(Function::new(
25955 "REGEXP_LIKE".to_string(),
25956 vec![arg, concat4],
25957 )));
25958 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
25959 let true_expr =
25960 Expression::Column(Box::new(crate::expressions::Column {
25961 name: Identifier {
25962 name: "TRUE".to_string(),
25963 quoted: false,
25964 trailing_comments: Vec::new(),
25965 span: None,
25966 },
25967 table: None,
25968 join_mark: false,
25969 trailing_comments: Vec::new(),
25970 span: None,
25971 inferred_type: None,
25972 }));
25973 let nvl = Expression::Function(Box::new(Function::new(
25974 "NVL".to_string(),
25975 vec![regexp_like, true_expr],
25976 )));
25977 Ok(nvl)
25978 }
25979 _ => Ok(Expression::Function(Box::new(Function::new(
25980 "IS_ASCII".to_string(),
25981 vec![arg],
25982 )))),
25983 }
25984 } else {
25985 Ok(e)
25986 }
25987 }
25988
25989 Action::StrPositionConvert => {
25990 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
25991 if let Expression::Function(f) = e {
25992 if f.args.len() < 2 {
25993 return Ok(Expression::Function(f));
25994 }
25995 let mut args = f.args;
25996
25997 let haystack = args.remove(0);
25998 let needle = args.remove(0);
25999 let position = if !args.is_empty() {
26000 Some(args.remove(0))
26001 } else {
26002 Option::None
26003 };
26004 let occurrence = if !args.is_empty() {
26005 Some(args.remove(0))
26006 } else {
26007 Option::None
26008 };
26009
26010 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
26011 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
26012 fn build_position_expansion(
26013 haystack: Expression,
26014 needle: Expression,
26015 pos: Expression,
26016 occurrence: Option<Expression>,
26017 inner_func: &str,
26018 wrapper: &str, // "CASE", "IF", "IIF"
26019 ) -> Expression {
26020 let substr = Expression::Function(Box::new(Function::new(
26021 "SUBSTRING".to_string(),
26022 vec![haystack, pos.clone()],
26023 )));
26024 let mut inner_args = vec![substr, needle];
26025 if let Some(occ) = occurrence {
26026 inner_args.push(occ);
26027 }
26028 let inner_call = Expression::Function(Box::new(Function::new(
26029 inner_func.to_string(),
26030 inner_args,
26031 )));
26032 let zero =
26033 Expression::Literal(Box::new(Literal::Number("0".to_string())));
26034 let one =
26035 Expression::Literal(Box::new(Literal::Number("1".to_string())));
26036 let eq_zero = Expression::Eq(Box::new(BinaryOp {
26037 left: inner_call.clone(),
26038 right: zero.clone(),
26039 left_comments: Vec::new(),
26040 operator_comments: Vec::new(),
26041 trailing_comments: Vec::new(),
26042 inferred_type: None,
26043 }));
26044 let add_pos = Expression::Add(Box::new(BinaryOp {
26045 left: inner_call,
26046 right: pos,
26047 left_comments: Vec::new(),
26048 operator_comments: Vec::new(),
26049 trailing_comments: Vec::new(),
26050 inferred_type: None,
26051 }));
26052 let sub_one = Expression::Sub(Box::new(BinaryOp {
26053 left: add_pos,
26054 right: one,
26055 left_comments: Vec::new(),
26056 operator_comments: Vec::new(),
26057 trailing_comments: Vec::new(),
26058 inferred_type: None,
26059 }));
26060
26061 match wrapper {
26062 "CASE" => Expression::Case(Box::new(Case {
26063 operand: Option::None,
26064 whens: vec![(eq_zero, zero)],
26065 else_: Some(sub_one),
26066 comments: Vec::new(),
26067 inferred_type: None,
26068 })),
26069 "IIF" => Expression::Function(Box::new(Function::new(
26070 "IIF".to_string(),
26071 vec![eq_zero, zero, sub_one],
26072 ))),
26073 _ => Expression::Function(Box::new(Function::new(
26074 "IF".to_string(),
26075 vec![eq_zero, zero, sub_one],
26076 ))),
26077 }
26078 }
26079
26080 match target {
26081 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
26082 DialectType::Athena
26083 | DialectType::DuckDB
26084 | DialectType::Presto
26085 | DialectType::Trino
26086 | DialectType::Drill => {
26087 if let Some(pos) = position {
26088 let wrapper = if matches!(target, DialectType::DuckDB) {
26089 "CASE"
26090 } else {
26091 "IF"
26092 };
26093 let result = build_position_expansion(
26094 haystack, needle, pos, occurrence, "STRPOS", wrapper,
26095 );
26096 if matches!(target, DialectType::Drill) {
26097 // Drill uses backtick-quoted `IF`
26098 if let Expression::Function(mut f) = result {
26099 f.name = "`IF`".to_string();
26100 Ok(Expression::Function(f))
26101 } else {
26102 Ok(result)
26103 }
26104 } else {
26105 Ok(result)
26106 }
26107 } else {
26108 Ok(Expression::Function(Box::new(Function::new(
26109 "STRPOS".to_string(),
26110 vec![haystack, needle],
26111 ))))
26112 }
26113 }
26114 // SQLite: IIF wrapper
26115 DialectType::SQLite => {
26116 if let Some(pos) = position {
26117 Ok(build_position_expansion(
26118 haystack, needle, pos, occurrence, "INSTR", "IIF",
26119 ))
26120 } else {
26121 Ok(Expression::Function(Box::new(Function::new(
26122 "INSTR".to_string(),
26123 vec![haystack, needle],
26124 ))))
26125 }
26126 }
26127 // INSTR group: Teradata, BigQuery, Oracle
26128 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
26129 let mut a = vec![haystack, needle];
26130 if let Some(pos) = position {
26131 a.push(pos);
26132 }
26133 if let Some(occ) = occurrence {
26134 a.push(occ);
26135 }
26136 Ok(Expression::Function(Box::new(Function::new(
26137 "INSTR".to_string(),
26138 a,
26139 ))))
26140 }
26141 // CHARINDEX group: Snowflake, TSQL
26142 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
26143 let mut a = vec![needle, haystack];
26144 if let Some(pos) = position {
26145 a.push(pos);
26146 }
26147 Ok(Expression::Function(Box::new(Function::new(
26148 "CHARINDEX".to_string(),
26149 a,
26150 ))))
26151 }
26152 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
26153 DialectType::PostgreSQL
26154 | DialectType::Materialize
26155 | DialectType::RisingWave
26156 | DialectType::Redshift => {
26157 if let Some(pos) = position {
26158 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
26159 // ELSE POSITION(...) + pos - 1 END
26160 let substr = Expression::Substring(Box::new(
26161 crate::expressions::SubstringFunc {
26162 this: haystack,
26163 start: pos.clone(),
26164 length: Option::None,
26165 from_for_syntax: true,
26166 },
26167 ));
26168 let pos_in = Expression::StrPosition(Box::new(
26169 crate::expressions::StrPosition {
26170 this: Box::new(substr),
26171 substr: Some(Box::new(needle)),
26172 position: Option::None,
26173 occurrence: Option::None,
26174 },
26175 ));
26176 let zero = Expression::Literal(Box::new(Literal::Number(
26177 "0".to_string(),
26178 )));
26179 let one = Expression::Literal(Box::new(Literal::Number(
26180 "1".to_string(),
26181 )));
26182 let eq_zero = Expression::Eq(Box::new(BinaryOp {
26183 left: pos_in.clone(),
26184 right: zero.clone(),
26185 left_comments: Vec::new(),
26186 operator_comments: Vec::new(),
26187 trailing_comments: Vec::new(),
26188 inferred_type: None,
26189 }));
26190 let add_pos = Expression::Add(Box::new(BinaryOp {
26191 left: pos_in,
26192 right: pos,
26193 left_comments: Vec::new(),
26194 operator_comments: Vec::new(),
26195 trailing_comments: Vec::new(),
26196 inferred_type: None,
26197 }));
26198 let sub_one = Expression::Sub(Box::new(BinaryOp {
26199 left: add_pos,
26200 right: one,
26201 left_comments: Vec::new(),
26202 operator_comments: Vec::new(),
26203 trailing_comments: Vec::new(),
26204 inferred_type: None,
26205 }));
26206 Ok(Expression::Case(Box::new(Case {
26207 operand: Option::None,
26208 whens: vec![(eq_zero, zero)],
26209 else_: Some(sub_one),
26210 comments: Vec::new(),
26211 inferred_type: None,
26212 })))
26213 } else {
26214 Ok(Expression::StrPosition(Box::new(
26215 crate::expressions::StrPosition {
26216 this: Box::new(haystack),
26217 substr: Some(Box::new(needle)),
26218 position: Option::None,
26219 occurrence: Option::None,
26220 },
26221 )))
26222 }
26223 }
26224 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
26225 DialectType::MySQL
26226 | DialectType::SingleStore
26227 | DialectType::TiDB
26228 | DialectType::Hive
26229 | DialectType::Spark
26230 | DialectType::Databricks
26231 | DialectType::Doris
26232 | DialectType::StarRocks => {
26233 let mut a = vec![needle, haystack];
26234 if let Some(pos) = position {
26235 a.push(pos);
26236 }
26237 Ok(Expression::Function(Box::new(Function::new(
26238 "LOCATE".to_string(),
26239 a,
26240 ))))
26241 }
26242 // ClickHouse: POSITION(haystack, needle[, position])
26243 DialectType::ClickHouse => {
26244 let mut a = vec![haystack, needle];
26245 if let Some(pos) = position {
26246 a.push(pos);
26247 }
26248 Ok(Expression::Function(Box::new(Function::new(
26249 "POSITION".to_string(),
26250 a,
26251 ))))
26252 }
26253 _ => {
26254 let mut a = vec![haystack, needle];
26255 if let Some(pos) = position {
26256 a.push(pos);
26257 }
26258 if let Some(occ) = occurrence {
26259 a.push(occ);
26260 }
26261 Ok(Expression::Function(Box::new(Function::new(
26262 "STR_POSITION".to_string(),
26263 a,
26264 ))))
26265 }
26266 }
26267 } else {
26268 Ok(e)
26269 }
26270 }
26271
26272 Action::ArraySumConvert => {
26273 // ARRAY_SUM(arr) -> dialect-specific
26274 if let Expression::Function(f) = e {
26275 let args = f.args;
26276 match target {
26277 DialectType::DuckDB => Ok(Expression::Function(Box::new(
26278 Function::new("LIST_SUM".to_string(), args),
26279 ))),
26280 DialectType::Spark | DialectType::Databricks => {
26281 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
26282 let arr = args.into_iter().next().unwrap();
26283 let zero =
26284 Expression::Literal(Box::new(Literal::Number("0".to_string())));
26285 let acc_id = Identifier::new("acc");
26286 let x_id = Identifier::new("x");
26287 let acc = Expression::Identifier(acc_id.clone());
26288 let x = Expression::Identifier(x_id.clone());
26289 let add = Expression::Add(Box::new(BinaryOp {
26290 left: acc.clone(),
26291 right: x,
26292 left_comments: Vec::new(),
26293 operator_comments: Vec::new(),
26294 trailing_comments: Vec::new(),
26295 inferred_type: None,
26296 }));
26297 let lambda1 =
26298 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26299 parameters: vec![acc_id.clone(), x_id],
26300 body: add,
26301 colon: false,
26302 parameter_types: Vec::new(),
26303 }));
26304 let lambda2 =
26305 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26306 parameters: vec![acc_id],
26307 body: acc,
26308 colon: false,
26309 parameter_types: Vec::new(),
26310 }));
26311 Ok(Expression::Function(Box::new(Function::new(
26312 "AGGREGATE".to_string(),
26313 vec![arr, zero, lambda1, lambda2],
26314 ))))
26315 }
26316 DialectType::Presto | DialectType::Athena => {
26317 // Presto/Athena keep ARRAY_SUM natively
26318 Ok(Expression::Function(Box::new(Function::new(
26319 "ARRAY_SUM".to_string(),
26320 args,
26321 ))))
26322 }
26323 DialectType::Trino => {
26324 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
26325 if args.len() == 1 {
26326 let arr = args.into_iter().next().unwrap();
26327 let zero = Expression::Literal(Box::new(Literal::Number(
26328 "0".to_string(),
26329 )));
26330 let acc_id = Identifier::new("acc");
26331 let x_id = Identifier::new("x");
26332 let acc = Expression::Identifier(acc_id.clone());
26333 let x = Expression::Identifier(x_id.clone());
26334 let add = Expression::Add(Box::new(BinaryOp {
26335 left: acc.clone(),
26336 right: x,
26337 left_comments: Vec::new(),
26338 operator_comments: Vec::new(),
26339 trailing_comments: Vec::new(),
26340 inferred_type: None,
26341 }));
26342 let lambda1 = Expression::Lambda(Box::new(
26343 crate::expressions::LambdaExpr {
26344 parameters: vec![acc_id.clone(), x_id],
26345 body: add,
26346 colon: false,
26347 parameter_types: Vec::new(),
26348 },
26349 ));
26350 let lambda2 = Expression::Lambda(Box::new(
26351 crate::expressions::LambdaExpr {
26352 parameters: vec![acc_id],
26353 body: acc,
26354 colon: false,
26355 parameter_types: Vec::new(),
26356 },
26357 ));
26358 Ok(Expression::Function(Box::new(Function::new(
26359 "REDUCE".to_string(),
26360 vec![arr, zero, lambda1, lambda2],
26361 ))))
26362 } else {
26363 Ok(Expression::Function(Box::new(Function::new(
26364 "ARRAY_SUM".to_string(),
26365 args,
26366 ))))
26367 }
26368 }
26369 DialectType::ClickHouse => {
26370 // arraySum(lambda, arr) or arraySum(arr)
26371 Ok(Expression::Function(Box::new(Function::new(
26372 "arraySum".to_string(),
26373 args,
26374 ))))
26375 }
26376 _ => Ok(Expression::Function(Box::new(Function::new(
26377 "ARRAY_SUM".to_string(),
26378 args,
26379 )))),
26380 }
26381 } else {
26382 Ok(e)
26383 }
26384 }
26385
26386 Action::ArraySizeConvert => {
26387 if let Expression::Function(f) = e {
26388 Ok(Expression::Function(Box::new(Function::new(
26389 "REPEATED_COUNT".to_string(),
26390 f.args,
26391 ))))
26392 } else {
26393 Ok(e)
26394 }
26395 }
26396
26397 Action::ArrayAnyConvert => {
26398 if let Expression::Function(f) = e {
26399 let mut args = f.args;
26400 if args.len() == 2 {
26401 let arr = args.remove(0);
26402 let lambda = args.remove(0);
26403
26404 // Extract lambda parameter name and body
26405 let (param_name, pred_body) =
26406 if let Expression::Lambda(ref lam) = lambda {
26407 let name = if let Some(p) = lam.parameters.first() {
26408 p.name.clone()
26409 } else {
26410 "x".to_string()
26411 };
26412 (name, lam.body.clone())
26413 } else {
26414 ("x".to_string(), lambda.clone())
26415 };
26416
26417 // Helper: build a function call Expression
26418 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
26419 Expression::Function(Box::new(Function::new(
26420 name.to_string(),
26421 args,
26422 )))
26423 };
26424
26425 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
26426 let build_filter_pattern = |len_func: &str,
26427 len_args_extra: Vec<Expression>,
26428 filter_expr: Expression|
26429 -> Expression {
26430 // len_func(arr, ...extra) = 0
26431 let mut len_arr_args = vec![arr.clone()];
26432 len_arr_args.extend(len_args_extra.clone());
26433 let len_arr = make_func(len_func, len_arr_args);
26434 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
26435 len_arr,
26436 Expression::number(0),
26437 )));
26438
26439 // len_func(filter_expr, ...extra) <> 0
26440 let mut len_filter_args = vec![filter_expr];
26441 len_filter_args.extend(len_args_extra);
26442 let len_filter = make_func(len_func, len_filter_args);
26443 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
26444 len_filter,
26445 Expression::number(0),
26446 )));
26447
26448 // (eq_zero OR neq_zero)
26449 let or_expr =
26450 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
26451 Expression::Paren(Box::new(Paren {
26452 this: or_expr,
26453 trailing_comments: Vec::new(),
26454 }))
26455 };
26456
26457 match target {
26458 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
26459 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
26460 }
26461 DialectType::ClickHouse => {
26462 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
26463 // ClickHouse arrayFilter takes lambda first, then array
26464 let filter_expr =
26465 make_func("arrayFilter", vec![lambda, arr.clone()]);
26466 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
26467 }
26468 DialectType::Databricks | DialectType::Spark => {
26469 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
26470 let filter_expr =
26471 make_func("FILTER", vec![arr.clone(), lambda]);
26472 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
26473 }
26474 DialectType::DuckDB => {
26475 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
26476 let filter_expr =
26477 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
26478 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
26479 }
26480 DialectType::Teradata => {
26481 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
26482 let filter_expr =
26483 make_func("FILTER", vec![arr.clone(), lambda]);
26484 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
26485 }
26486 DialectType::BigQuery => {
26487 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
26488 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
26489 let param_col = Expression::column(¶m_name);
26490 let unnest_expr = Expression::Unnest(Box::new(
26491 crate::expressions::UnnestFunc {
26492 this: arr.clone(),
26493 expressions: vec![],
26494 with_ordinality: false,
26495 alias: Some(Identifier::new(¶m_name)),
26496 offset_alias: None,
26497 },
26498 ));
26499 let mut sel = crate::expressions::Select::default();
26500 sel.expressions = vec![param_col];
26501 sel.from = Some(crate::expressions::From {
26502 expressions: vec![unnest_expr],
26503 });
26504 sel.where_clause =
26505 Some(crate::expressions::Where { this: pred_body });
26506 let array_subquery =
26507 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
26508 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
26509 }
26510 DialectType::PostgreSQL => {
26511 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
26512 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
26513 let param_col = Expression::column(¶m_name);
26514 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
26515 let unnest_with_alias =
26516 Expression::Alias(Box::new(crate::expressions::Alias {
26517 this: Expression::Unnest(Box::new(
26518 crate::expressions::UnnestFunc {
26519 this: arr.clone(),
26520 expressions: vec![],
26521 with_ordinality: false,
26522 alias: None,
26523 offset_alias: None,
26524 },
26525 )),
26526 alias: Identifier::new("_t0"),
26527 column_aliases: vec![Identifier::new(¶m_name)],
26528 pre_alias_comments: Vec::new(),
26529 trailing_comments: Vec::new(),
26530 inferred_type: None,
26531 }));
26532 let mut sel = crate::expressions::Select::default();
26533 sel.expressions = vec![param_col];
26534 sel.from = Some(crate::expressions::From {
26535 expressions: vec![unnest_with_alias],
26536 });
26537 sel.where_clause =
26538 Some(crate::expressions::Where { this: pred_body });
26539 let array_subquery =
26540 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
26541 Ok(build_filter_pattern(
26542 "ARRAY_LENGTH",
26543 vec![Expression::number(1)],
26544 array_subquery,
26545 ))
26546 }
26547 _ => Ok(Expression::Function(Box::new(Function::new(
26548 "ARRAY_ANY".to_string(),
26549 vec![arr, lambda],
26550 )))),
26551 }
26552 } else {
26553 Ok(Expression::Function(Box::new(Function::new(
26554 "ARRAY_ANY".to_string(),
26555 args,
26556 ))))
26557 }
26558 } else {
26559 Ok(e)
26560 }
26561 }
26562
26563 Action::DecodeSimplify => {
26564 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
26565 // For literal search values: CASE WHEN x = search THEN result
26566 // For NULL search: CASE WHEN x IS NULL THEN result
26567 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
26568 fn is_decode_literal(e: &Expression) -> bool {
26569 matches!(
26570 e,
26571 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
26572 )
26573 }
26574
26575 let build_decode_case =
26576 |this_expr: Expression,
26577 pairs: Vec<(Expression, Expression)>,
26578 default: Option<Expression>| {
26579 let whens: Vec<(Expression, Expression)> = pairs
26580 .into_iter()
26581 .map(|(search, result)| {
26582 if matches!(&search, Expression::Null(_)) {
26583 // NULL search -> IS NULL
26584 let condition = Expression::Is(Box::new(BinaryOp {
26585 left: this_expr.clone(),
26586 right: Expression::Null(crate::expressions::Null),
26587 left_comments: Vec::new(),
26588 operator_comments: Vec::new(),
26589 trailing_comments: Vec::new(),
26590 inferred_type: None,
26591 }));
26592 (condition, result)
26593 } else if is_decode_literal(&search)
26594 || is_decode_literal(&this_expr)
26595 {
26596 // At least one side is a literal -> simple equality (no NULL check needed)
26597 let eq = Expression::Eq(Box::new(BinaryOp {
26598 left: this_expr.clone(),
26599 right: search,
26600 left_comments: Vec::new(),
26601 operator_comments: Vec::new(),
26602 trailing_comments: Vec::new(),
26603 inferred_type: None,
26604 }));
26605 (eq, result)
26606 } else {
26607 // Non-literal -> null-safe comparison
26608 let needs_paren = matches!(
26609 &search,
26610 Expression::Eq(_)
26611 | Expression::Neq(_)
26612 | Expression::Gt(_)
26613 | Expression::Gte(_)
26614 | Expression::Lt(_)
26615 | Expression::Lte(_)
26616 );
26617 let search_ref = if needs_paren {
26618 Expression::Paren(Box::new(crate::expressions::Paren {
26619 this: search.clone(),
26620 trailing_comments: Vec::new(),
26621 }))
26622 } else {
26623 search.clone()
26624 };
26625 // Build: x = search OR (x IS NULL AND search IS NULL)
26626 let eq = Expression::Eq(Box::new(BinaryOp {
26627 left: this_expr.clone(),
26628 right: search_ref,
26629 left_comments: Vec::new(),
26630 operator_comments: Vec::new(),
26631 trailing_comments: Vec::new(),
26632 inferred_type: None,
26633 }));
26634 let search_in_null = if needs_paren {
26635 Expression::Paren(Box::new(crate::expressions::Paren {
26636 this: search.clone(),
26637 trailing_comments: Vec::new(),
26638 }))
26639 } else {
26640 search.clone()
26641 };
26642 let x_is_null = Expression::Is(Box::new(BinaryOp {
26643 left: this_expr.clone(),
26644 right: Expression::Null(crate::expressions::Null),
26645 left_comments: Vec::new(),
26646 operator_comments: Vec::new(),
26647 trailing_comments: Vec::new(),
26648 inferred_type: None,
26649 }));
26650 let search_is_null = Expression::Is(Box::new(BinaryOp {
26651 left: search_in_null,
26652 right: Expression::Null(crate::expressions::Null),
26653 left_comments: Vec::new(),
26654 operator_comments: Vec::new(),
26655 trailing_comments: Vec::new(),
26656 inferred_type: None,
26657 }));
26658 let both_null = Expression::And(Box::new(BinaryOp {
26659 left: x_is_null,
26660 right: search_is_null,
26661 left_comments: Vec::new(),
26662 operator_comments: Vec::new(),
26663 trailing_comments: Vec::new(),
26664 inferred_type: None,
26665 }));
26666 let condition = Expression::Or(Box::new(BinaryOp {
26667 left: eq,
26668 right: Expression::Paren(Box::new(
26669 crate::expressions::Paren {
26670 this: both_null,
26671 trailing_comments: Vec::new(),
26672 },
26673 )),
26674 left_comments: Vec::new(),
26675 operator_comments: Vec::new(),
26676 trailing_comments: Vec::new(),
26677 inferred_type: None,
26678 }));
26679 (condition, result)
26680 }
26681 })
26682 .collect();
26683 Expression::Case(Box::new(Case {
26684 operand: None,
26685 whens,
26686 else_: default,
26687 comments: Vec::new(),
26688 inferred_type: None,
26689 }))
26690 };
26691
26692 if let Expression::Decode(decode) = e {
26693 Ok(build_decode_case(
26694 decode.this,
26695 decode.search_results,
26696 decode.default,
26697 ))
26698 } else if let Expression::DecodeCase(dc) = e {
26699 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
26700 let mut exprs = dc.expressions;
26701 if exprs.len() < 3 {
26702 return Ok(Expression::DecodeCase(Box::new(
26703 crate::expressions::DecodeCase { expressions: exprs },
26704 )));
26705 }
26706 let this_expr = exprs.remove(0);
26707 let mut pairs = Vec::new();
26708 let mut default = None;
26709 let mut i = 0;
26710 while i + 1 < exprs.len() {
26711 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
26712 i += 2;
26713 }
26714 if i < exprs.len() {
26715 // Odd remaining element is the default
26716 default = Some(exprs[i].clone());
26717 }
26718 Ok(build_decode_case(this_expr, pairs, default))
26719 } else {
26720 Ok(e)
26721 }
26722 }
26723
26724 Action::CreateTableLikeToCtas => {
26725 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
26726 if let Expression::CreateTable(ct) = e {
26727 let like_source = ct.constraints.iter().find_map(|c| {
26728 if let crate::expressions::TableConstraint::Like { source, .. } = c {
26729 Some(source.clone())
26730 } else {
26731 None
26732 }
26733 });
26734 if let Some(source_table) = like_source {
26735 let mut new_ct = *ct;
26736 new_ct.constraints.clear();
26737 // Build: SELECT * FROM b LIMIT 0
26738 let select = Expression::Select(Box::new(crate::expressions::Select {
26739 expressions: vec![Expression::Star(crate::expressions::Star {
26740 table: None,
26741 except: None,
26742 replace: None,
26743 rename: None,
26744 trailing_comments: Vec::new(),
26745 span: None,
26746 })],
26747 from: Some(crate::expressions::From {
26748 expressions: vec![Expression::Table(Box::new(source_table))],
26749 }),
26750 limit: Some(crate::expressions::Limit {
26751 this: Expression::Literal(Box::new(Literal::Number(
26752 "0".to_string(),
26753 ))),
26754 percent: false,
26755 comments: Vec::new(),
26756 }),
26757 ..Default::default()
26758 }));
26759 new_ct.as_select = Some(select);
26760 Ok(Expression::CreateTable(Box::new(new_ct)))
26761 } else {
26762 Ok(Expression::CreateTable(ct))
26763 }
26764 } else {
26765 Ok(e)
26766 }
26767 }
26768
26769 Action::CreateTableLikeToSelectInto => {
26770 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
26771 if let Expression::CreateTable(ct) = e {
26772 let like_source = ct.constraints.iter().find_map(|c| {
26773 if let crate::expressions::TableConstraint::Like { source, .. } = c {
26774 Some(source.clone())
26775 } else {
26776 None
26777 }
26778 });
26779 if let Some(source_table) = like_source {
26780 let mut aliased_source = source_table;
26781 aliased_source.alias = Some(Identifier::new("temp"));
26782 // Build: SELECT TOP 0 * INTO a FROM b AS temp
26783 let select = Expression::Select(Box::new(crate::expressions::Select {
26784 expressions: vec![Expression::Star(crate::expressions::Star {
26785 table: None,
26786 except: None,
26787 replace: None,
26788 rename: None,
26789 trailing_comments: Vec::new(),
26790 span: None,
26791 })],
26792 from: Some(crate::expressions::From {
26793 expressions: vec![Expression::Table(Box::new(aliased_source))],
26794 }),
26795 into: Some(crate::expressions::SelectInto {
26796 this: Expression::Table(Box::new(ct.name.clone())),
26797 temporary: false,
26798 unlogged: false,
26799 bulk_collect: false,
26800 expressions: Vec::new(),
26801 }),
26802 top: Some(crate::expressions::Top {
26803 this: Expression::Literal(Box::new(Literal::Number(
26804 "0".to_string(),
26805 ))),
26806 percent: false,
26807 with_ties: false,
26808 parenthesized: false,
26809 }),
26810 ..Default::default()
26811 }));
26812 Ok(select)
26813 } else {
26814 Ok(Expression::CreateTable(ct))
26815 }
26816 } else {
26817 Ok(e)
26818 }
26819 }
26820
26821 Action::CreateTableLikeToAs => {
26822 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
26823 if let Expression::CreateTable(ct) = e {
26824 let like_source = ct.constraints.iter().find_map(|c| {
26825 if let crate::expressions::TableConstraint::Like { source, .. } = c {
26826 Some(source.clone())
26827 } else {
26828 None
26829 }
26830 });
26831 if let Some(source_table) = like_source {
26832 let mut new_ct = *ct;
26833 new_ct.constraints.clear();
26834 // AS b (just a table reference, not a SELECT)
26835 new_ct.as_select = Some(Expression::Table(Box::new(source_table)));
26836 Ok(Expression::CreateTable(Box::new(new_ct)))
26837 } else {
26838 Ok(Expression::CreateTable(ct))
26839 }
26840 } else {
26841 Ok(e)
26842 }
26843 }
26844
26845 Action::TsOrDsToDateConvert => {
26846 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
26847 if let Expression::Function(f) = e {
26848 let mut args = f.args;
26849 let this = args.remove(0);
26850 let fmt = if !args.is_empty() {
26851 match &args[0] {
26852 Expression::Literal(lit)
26853 if matches!(lit.as_ref(), Literal::String(_)) =>
26854 {
26855 let Literal::String(s) = lit.as_ref() else {
26856 unreachable!()
26857 };
26858 Some(s.clone())
26859 }
26860 _ => None,
26861 }
26862 } else {
26863 None
26864 };
26865 Ok(Expression::TsOrDsToDate(Box::new(
26866 crate::expressions::TsOrDsToDate {
26867 this: Box::new(this),
26868 format: fmt,
26869 safe: None,
26870 },
26871 )))
26872 } else {
26873 Ok(e)
26874 }
26875 }
26876
26877 Action::TsOrDsToDateStrConvert => {
26878 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
26879 if let Expression::Function(f) = e {
26880 let arg = f.args.into_iter().next().unwrap();
26881 let str_type = match target {
26882 DialectType::DuckDB
26883 | DialectType::PostgreSQL
26884 | DialectType::Materialize => DataType::Text,
26885 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
26886 DataType::Custom {
26887 name: "STRING".to_string(),
26888 }
26889 }
26890 DialectType::Presto
26891 | DialectType::Trino
26892 | DialectType::Athena
26893 | DialectType::Drill => DataType::VarChar {
26894 length: None,
26895 parenthesized_length: false,
26896 },
26897 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
26898 DataType::Custom {
26899 name: "STRING".to_string(),
26900 }
26901 }
26902 _ => DataType::VarChar {
26903 length: None,
26904 parenthesized_length: false,
26905 },
26906 };
26907 let cast_expr = Expression::Cast(Box::new(Cast {
26908 this: arg,
26909 to: str_type,
26910 double_colon_syntax: false,
26911 trailing_comments: Vec::new(),
26912 format: None,
26913 default: None,
26914 inferred_type: None,
26915 }));
26916 Ok(Expression::Substring(Box::new(
26917 crate::expressions::SubstringFunc {
26918 this: cast_expr,
26919 start: Expression::number(1),
26920 length: Some(Expression::number(10)),
26921 from_for_syntax: false,
26922 },
26923 )))
26924 } else {
26925 Ok(e)
26926 }
26927 }
26928
26929 Action::DateStrToDateConvert => {
26930 // DATE_STR_TO_DATE(x) -> dialect-specific
26931 if let Expression::Function(f) = e {
26932 let arg = f.args.into_iter().next().unwrap();
26933 match target {
26934 DialectType::SQLite => {
26935 // SQLite: just the bare expression (dates are strings)
26936 Ok(arg)
26937 }
26938 _ => Ok(Expression::Cast(Box::new(Cast {
26939 this: arg,
26940 to: DataType::Date,
26941 double_colon_syntax: false,
26942 trailing_comments: Vec::new(),
26943 format: None,
26944 default: None,
26945 inferred_type: None,
26946 }))),
26947 }
26948 } else {
26949 Ok(e)
26950 }
26951 }
26952
26953 Action::TimeStrToDateConvert => {
26954 // TIME_STR_TO_DATE(x) -> dialect-specific
26955 if let Expression::Function(f) = e {
26956 let arg = f.args.into_iter().next().unwrap();
26957 match target {
26958 DialectType::Hive
26959 | DialectType::Doris
26960 | DialectType::StarRocks
26961 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
26962 Function::new("TO_DATE".to_string(), vec![arg]),
26963 ))),
26964 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26965 // Presto: CAST(x AS TIMESTAMP)
26966 Ok(Expression::Cast(Box::new(Cast {
26967 this: arg,
26968 to: DataType::Timestamp {
26969 timezone: false,
26970 precision: None,
26971 },
26972 double_colon_syntax: false,
26973 trailing_comments: Vec::new(),
26974 format: None,
26975 default: None,
26976 inferred_type: None,
26977 })))
26978 }
26979 _ => {
26980 // Default: CAST(x AS DATE)
26981 Ok(Expression::Cast(Box::new(Cast {
26982 this: arg,
26983 to: DataType::Date,
26984 double_colon_syntax: false,
26985 trailing_comments: Vec::new(),
26986 format: None,
26987 default: None,
26988 inferred_type: None,
26989 })))
26990 }
26991 }
26992 } else {
26993 Ok(e)
26994 }
26995 }
26996
26997 Action::TimeStrToTimeConvert => {
26998 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
26999 if let Expression::Function(f) = e {
27000 let mut args = f.args;
27001 let this = args.remove(0);
27002 let zone = if !args.is_empty() {
27003 match &args[0] {
27004 Expression::Literal(lit)
27005 if matches!(lit.as_ref(), Literal::String(_)) =>
27006 {
27007 let Literal::String(s) = lit.as_ref() else {
27008 unreachable!()
27009 };
27010 Some(s.clone())
27011 }
27012 _ => None,
27013 }
27014 } else {
27015 None
27016 };
27017 let has_zone = zone.is_some();
27018
27019 match target {
27020 DialectType::SQLite => {
27021 // SQLite: just the bare expression
27022 Ok(this)
27023 }
27024 DialectType::MySQL => {
27025 if has_zone {
27026 // MySQL with zone: TIMESTAMP(x)
27027 Ok(Expression::Function(Box::new(Function::new(
27028 "TIMESTAMP".to_string(),
27029 vec![this],
27030 ))))
27031 } else {
27032 // MySQL: CAST(x AS DATETIME) or with precision
27033 // Use DataType::Custom to avoid MySQL's transform_cast converting
27034 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
27035 let precision = if let Expression::Literal(ref lit) = this {
27036 if let Literal::String(ref s) = lit.as_ref() {
27037 if let Some(dot_pos) = s.rfind('.') {
27038 let frac = &s[dot_pos + 1..];
27039 let digit_count = frac
27040 .chars()
27041 .take_while(|c| c.is_ascii_digit())
27042 .count();
27043 if digit_count > 0 {
27044 Some(digit_count)
27045 } else {
27046 None
27047 }
27048 } else {
27049 None
27050 }
27051 } else {
27052 None
27053 }
27054 } else {
27055 None
27056 };
27057 let type_name = match precision {
27058 Some(p) => format!("DATETIME({})", p),
27059 None => "DATETIME".to_string(),
27060 };
27061 Ok(Expression::Cast(Box::new(Cast {
27062 this,
27063 to: DataType::Custom { name: type_name },
27064 double_colon_syntax: false,
27065 trailing_comments: Vec::new(),
27066 format: None,
27067 default: None,
27068 inferred_type: None,
27069 })))
27070 }
27071 }
27072 DialectType::ClickHouse => {
27073 if has_zone {
27074 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
27075 // We need to strip the timezone offset from the literal if present
27076 let clean_this = if let Expression::Literal(ref lit) = this {
27077 if let Literal::String(ref s) = lit.as_ref() {
27078 // Strip timezone offset like "-08:00" or "+00:00"
27079 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
27080 if let Some(offset_pos) = re_offset {
27081 if offset_pos > 10 {
27082 // After the date part
27083 let trimmed = s[..offset_pos].to_string();
27084 Expression::Literal(Box::new(Literal::String(
27085 trimmed,
27086 )))
27087 } else {
27088 this.clone()
27089 }
27090 } else {
27091 this.clone()
27092 }
27093 } else {
27094 this.clone()
27095 }
27096 } else {
27097 this.clone()
27098 };
27099 let zone_str = zone.unwrap();
27100 // Build: CAST(x AS DateTime64(6, 'zone'))
27101 let type_name = format!("DateTime64(6, '{}')", zone_str);
27102 Ok(Expression::Cast(Box::new(Cast {
27103 this: clean_this,
27104 to: DataType::Custom { name: type_name },
27105 double_colon_syntax: false,
27106 trailing_comments: Vec::new(),
27107 format: None,
27108 default: None,
27109 inferred_type: None,
27110 })))
27111 } else {
27112 Ok(Expression::Cast(Box::new(Cast {
27113 this,
27114 to: DataType::Custom {
27115 name: "DateTime64(6)".to_string(),
27116 },
27117 double_colon_syntax: false,
27118 trailing_comments: Vec::new(),
27119 format: None,
27120 default: None,
27121 inferred_type: None,
27122 })))
27123 }
27124 }
27125 DialectType::BigQuery => {
27126 if has_zone {
27127 // BigQuery with zone: CAST(x AS TIMESTAMP)
27128 Ok(Expression::Cast(Box::new(Cast {
27129 this,
27130 to: DataType::Timestamp {
27131 timezone: false,
27132 precision: None,
27133 },
27134 double_colon_syntax: false,
27135 trailing_comments: Vec::new(),
27136 format: None,
27137 default: None,
27138 inferred_type: None,
27139 })))
27140 } else {
27141 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
27142 Ok(Expression::Cast(Box::new(Cast {
27143 this,
27144 to: DataType::Custom {
27145 name: "DATETIME".to_string(),
27146 },
27147 double_colon_syntax: false,
27148 trailing_comments: Vec::new(),
27149 format: None,
27150 default: None,
27151 inferred_type: None,
27152 })))
27153 }
27154 }
27155 DialectType::Doris => {
27156 // Doris: CAST(x AS DATETIME)
27157 Ok(Expression::Cast(Box::new(Cast {
27158 this,
27159 to: DataType::Custom {
27160 name: "DATETIME".to_string(),
27161 },
27162 double_colon_syntax: false,
27163 trailing_comments: Vec::new(),
27164 format: None,
27165 default: None,
27166 inferred_type: None,
27167 })))
27168 }
27169 DialectType::TSQL | DialectType::Fabric => {
27170 if has_zone {
27171 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
27172 let cast_expr = Expression::Cast(Box::new(Cast {
27173 this,
27174 to: DataType::Custom {
27175 name: "DATETIMEOFFSET".to_string(),
27176 },
27177 double_colon_syntax: false,
27178 trailing_comments: Vec::new(),
27179 format: None,
27180 default: None,
27181 inferred_type: None,
27182 }));
27183 Ok(Expression::AtTimeZone(Box::new(
27184 crate::expressions::AtTimeZone {
27185 this: cast_expr,
27186 zone: Expression::Literal(Box::new(Literal::String(
27187 "UTC".to_string(),
27188 ))),
27189 },
27190 )))
27191 } else {
27192 // TSQL: CAST(x AS DATETIME2)
27193 Ok(Expression::Cast(Box::new(Cast {
27194 this,
27195 to: DataType::Custom {
27196 name: "DATETIME2".to_string(),
27197 },
27198 double_colon_syntax: false,
27199 trailing_comments: Vec::new(),
27200 format: None,
27201 default: None,
27202 inferred_type: None,
27203 })))
27204 }
27205 }
27206 DialectType::DuckDB => {
27207 if has_zone {
27208 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
27209 Ok(Expression::Cast(Box::new(Cast {
27210 this,
27211 to: DataType::Timestamp {
27212 timezone: true,
27213 precision: None,
27214 },
27215 double_colon_syntax: false,
27216 trailing_comments: Vec::new(),
27217 format: None,
27218 default: None,
27219 inferred_type: None,
27220 })))
27221 } else {
27222 // DuckDB: CAST(x AS TIMESTAMP)
27223 Ok(Expression::Cast(Box::new(Cast {
27224 this,
27225 to: DataType::Timestamp {
27226 timezone: false,
27227 precision: None,
27228 },
27229 double_colon_syntax: false,
27230 trailing_comments: Vec::new(),
27231 format: None,
27232 default: None,
27233 inferred_type: None,
27234 })))
27235 }
27236 }
27237 DialectType::PostgreSQL
27238 | DialectType::Materialize
27239 | DialectType::RisingWave => {
27240 if has_zone {
27241 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
27242 Ok(Expression::Cast(Box::new(Cast {
27243 this,
27244 to: DataType::Timestamp {
27245 timezone: true,
27246 precision: None,
27247 },
27248 double_colon_syntax: false,
27249 trailing_comments: Vec::new(),
27250 format: None,
27251 default: None,
27252 inferred_type: None,
27253 })))
27254 } else {
27255 // PostgreSQL: CAST(x AS TIMESTAMP)
27256 Ok(Expression::Cast(Box::new(Cast {
27257 this,
27258 to: DataType::Timestamp {
27259 timezone: false,
27260 precision: None,
27261 },
27262 double_colon_syntax: false,
27263 trailing_comments: Vec::new(),
27264 format: None,
27265 default: None,
27266 inferred_type: None,
27267 })))
27268 }
27269 }
27270 DialectType::Snowflake => {
27271 if has_zone {
27272 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
27273 Ok(Expression::Cast(Box::new(Cast {
27274 this,
27275 to: DataType::Timestamp {
27276 timezone: true,
27277 precision: None,
27278 },
27279 double_colon_syntax: false,
27280 trailing_comments: Vec::new(),
27281 format: None,
27282 default: None,
27283 inferred_type: None,
27284 })))
27285 } else {
27286 // Snowflake: CAST(x AS TIMESTAMP)
27287 Ok(Expression::Cast(Box::new(Cast {
27288 this,
27289 to: DataType::Timestamp {
27290 timezone: false,
27291 precision: None,
27292 },
27293 double_colon_syntax: false,
27294 trailing_comments: Vec::new(),
27295 format: None,
27296 default: None,
27297 inferred_type: None,
27298 })))
27299 }
27300 }
27301 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27302 if has_zone {
27303 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
27304 // Check for precision from sub-second digits
27305 let precision = if let Expression::Literal(ref lit) = this {
27306 if let Literal::String(ref s) = lit.as_ref() {
27307 if let Some(dot_pos) = s.rfind('.') {
27308 let frac = &s[dot_pos + 1..];
27309 let digit_count = frac
27310 .chars()
27311 .take_while(|c| c.is_ascii_digit())
27312 .count();
27313 if digit_count > 0
27314 && matches!(target, DialectType::Trino)
27315 {
27316 Some(digit_count as u32)
27317 } else {
27318 None
27319 }
27320 } else {
27321 None
27322 }
27323 } else {
27324 None
27325 }
27326 } else {
27327 None
27328 };
27329 let dt = if let Some(prec) = precision {
27330 DataType::Timestamp {
27331 timezone: true,
27332 precision: Some(prec),
27333 }
27334 } else {
27335 DataType::Timestamp {
27336 timezone: true,
27337 precision: None,
27338 }
27339 };
27340 Ok(Expression::Cast(Box::new(Cast {
27341 this,
27342 to: dt,
27343 double_colon_syntax: false,
27344 trailing_comments: Vec::new(),
27345 format: None,
27346 default: None,
27347 inferred_type: None,
27348 })))
27349 } else {
27350 // Check for sub-second precision for Trino
27351 let precision = if let Expression::Literal(ref lit) = this {
27352 if let Literal::String(ref s) = lit.as_ref() {
27353 if let Some(dot_pos) = s.rfind('.') {
27354 let frac = &s[dot_pos + 1..];
27355 let digit_count = frac
27356 .chars()
27357 .take_while(|c| c.is_ascii_digit())
27358 .count();
27359 if digit_count > 0
27360 && matches!(target, DialectType::Trino)
27361 {
27362 Some(digit_count as u32)
27363 } else {
27364 None
27365 }
27366 } else {
27367 None
27368 }
27369 } else {
27370 None
27371 }
27372 } else {
27373 None
27374 };
27375 let dt = DataType::Timestamp {
27376 timezone: false,
27377 precision,
27378 };
27379 Ok(Expression::Cast(Box::new(Cast {
27380 this,
27381 to: dt,
27382 double_colon_syntax: false,
27383 trailing_comments: Vec::new(),
27384 format: None,
27385 default: None,
27386 inferred_type: None,
27387 })))
27388 }
27389 }
27390 DialectType::Redshift => {
27391 if has_zone {
27392 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
27393 Ok(Expression::Cast(Box::new(Cast {
27394 this,
27395 to: DataType::Timestamp {
27396 timezone: true,
27397 precision: None,
27398 },
27399 double_colon_syntax: false,
27400 trailing_comments: Vec::new(),
27401 format: None,
27402 default: None,
27403 inferred_type: None,
27404 })))
27405 } else {
27406 // Redshift: CAST(x AS TIMESTAMP)
27407 Ok(Expression::Cast(Box::new(Cast {
27408 this,
27409 to: DataType::Timestamp {
27410 timezone: false,
27411 precision: None,
27412 },
27413 double_colon_syntax: false,
27414 trailing_comments: Vec::new(),
27415 format: None,
27416 default: None,
27417 inferred_type: None,
27418 })))
27419 }
27420 }
27421 _ => {
27422 // Default: CAST(x AS TIMESTAMP)
27423 Ok(Expression::Cast(Box::new(Cast {
27424 this,
27425 to: DataType::Timestamp {
27426 timezone: false,
27427 precision: None,
27428 },
27429 double_colon_syntax: false,
27430 trailing_comments: Vec::new(),
27431 format: None,
27432 default: None,
27433 inferred_type: None,
27434 })))
27435 }
27436 }
27437 } else {
27438 Ok(e)
27439 }
27440 }
27441
27442 Action::DateToDateStrConvert => {
27443 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
27444 if let Expression::Function(f) = e {
27445 let arg = f.args.into_iter().next().unwrap();
27446 let str_type = match target {
27447 DialectType::DuckDB => DataType::Text,
27448 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27449 DataType::Custom {
27450 name: "STRING".to_string(),
27451 }
27452 }
27453 DialectType::Presto
27454 | DialectType::Trino
27455 | DialectType::Athena
27456 | DialectType::Drill => DataType::VarChar {
27457 length: None,
27458 parenthesized_length: false,
27459 },
27460 _ => DataType::VarChar {
27461 length: None,
27462 parenthesized_length: false,
27463 },
27464 };
27465 Ok(Expression::Cast(Box::new(Cast {
27466 this: arg,
27467 to: str_type,
27468 double_colon_syntax: false,
27469 trailing_comments: Vec::new(),
27470 format: None,
27471 default: None,
27472 inferred_type: None,
27473 })))
27474 } else {
27475 Ok(e)
27476 }
27477 }
27478
27479 Action::DateToDiConvert => {
27480 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
27481 if let Expression::Function(f) = e {
27482 let arg = f.args.into_iter().next().unwrap();
27483 let inner = match target {
27484 DialectType::DuckDB => {
27485 // STRFTIME(x, '%Y%m%d')
27486 Expression::Function(Box::new(Function::new(
27487 "STRFTIME".to_string(),
27488 vec![arg, Expression::string("%Y%m%d")],
27489 )))
27490 }
27491 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27492 // DATE_FORMAT(x, 'yyyyMMdd')
27493 Expression::Function(Box::new(Function::new(
27494 "DATE_FORMAT".to_string(),
27495 vec![arg, Expression::string("yyyyMMdd")],
27496 )))
27497 }
27498 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27499 // DATE_FORMAT(x, '%Y%m%d')
27500 Expression::Function(Box::new(Function::new(
27501 "DATE_FORMAT".to_string(),
27502 vec![arg, Expression::string("%Y%m%d")],
27503 )))
27504 }
27505 DialectType::Drill => {
27506 // TO_DATE(x, 'yyyyMMdd')
27507 Expression::Function(Box::new(Function::new(
27508 "TO_DATE".to_string(),
27509 vec![arg, Expression::string("yyyyMMdd")],
27510 )))
27511 }
27512 _ => {
27513 // Default: STRFTIME(x, '%Y%m%d')
27514 Expression::Function(Box::new(Function::new(
27515 "STRFTIME".to_string(),
27516 vec![arg, Expression::string("%Y%m%d")],
27517 )))
27518 }
27519 };
27520 // Use INT (not INTEGER) for Presto/Trino
27521 let int_type = match target {
27522 DialectType::Presto
27523 | DialectType::Trino
27524 | DialectType::Athena
27525 | DialectType::TSQL
27526 | DialectType::Fabric
27527 | DialectType::SQLite
27528 | DialectType::Redshift => DataType::Custom {
27529 name: "INT".to_string(),
27530 },
27531 _ => DataType::Int {
27532 length: None,
27533 integer_spelling: false,
27534 },
27535 };
27536 Ok(Expression::Cast(Box::new(Cast {
27537 this: inner,
27538 to: int_type,
27539 double_colon_syntax: false,
27540 trailing_comments: Vec::new(),
27541 format: None,
27542 default: None,
27543 inferred_type: None,
27544 })))
27545 } else {
27546 Ok(e)
27547 }
27548 }
27549
27550 Action::DiToDateConvert => {
27551 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
27552 if let Expression::Function(f) = e {
27553 let arg = f.args.into_iter().next().unwrap();
27554 match target {
27555 DialectType::DuckDB => {
27556 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
27557 let cast_text = Expression::Cast(Box::new(Cast {
27558 this: arg,
27559 to: DataType::Text,
27560 double_colon_syntax: false,
27561 trailing_comments: Vec::new(),
27562 format: None,
27563 default: None,
27564 inferred_type: None,
27565 }));
27566 let strptime = Expression::Function(Box::new(Function::new(
27567 "STRPTIME".to_string(),
27568 vec![cast_text, Expression::string("%Y%m%d")],
27569 )));
27570 Ok(Expression::Cast(Box::new(Cast {
27571 this: strptime,
27572 to: DataType::Date,
27573 double_colon_syntax: false,
27574 trailing_comments: Vec::new(),
27575 format: None,
27576 default: None,
27577 inferred_type: None,
27578 })))
27579 }
27580 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27581 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
27582 let cast_str = Expression::Cast(Box::new(Cast {
27583 this: arg,
27584 to: DataType::Custom {
27585 name: "STRING".to_string(),
27586 },
27587 double_colon_syntax: false,
27588 trailing_comments: Vec::new(),
27589 format: None,
27590 default: None,
27591 inferred_type: None,
27592 }));
27593 Ok(Expression::Function(Box::new(Function::new(
27594 "TO_DATE".to_string(),
27595 vec![cast_str, Expression::string("yyyyMMdd")],
27596 ))))
27597 }
27598 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27599 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
27600 let cast_varchar = Expression::Cast(Box::new(Cast {
27601 this: arg,
27602 to: DataType::VarChar {
27603 length: None,
27604 parenthesized_length: false,
27605 },
27606 double_colon_syntax: false,
27607 trailing_comments: Vec::new(),
27608 format: None,
27609 default: None,
27610 inferred_type: None,
27611 }));
27612 let date_parse = Expression::Function(Box::new(Function::new(
27613 "DATE_PARSE".to_string(),
27614 vec![cast_varchar, Expression::string("%Y%m%d")],
27615 )));
27616 Ok(Expression::Cast(Box::new(Cast {
27617 this: date_parse,
27618 to: DataType::Date,
27619 double_colon_syntax: false,
27620 trailing_comments: Vec::new(),
27621 format: None,
27622 default: None,
27623 inferred_type: None,
27624 })))
27625 }
27626 DialectType::Drill => {
27627 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
27628 let cast_varchar = Expression::Cast(Box::new(Cast {
27629 this: arg,
27630 to: DataType::VarChar {
27631 length: None,
27632 parenthesized_length: false,
27633 },
27634 double_colon_syntax: false,
27635 trailing_comments: Vec::new(),
27636 format: None,
27637 default: None,
27638 inferred_type: None,
27639 }));
27640 Ok(Expression::Function(Box::new(Function::new(
27641 "TO_DATE".to_string(),
27642 vec![cast_varchar, Expression::string("yyyyMMdd")],
27643 ))))
27644 }
27645 _ => Ok(Expression::Function(Box::new(Function::new(
27646 "DI_TO_DATE".to_string(),
27647 vec![arg],
27648 )))),
27649 }
27650 } else {
27651 Ok(e)
27652 }
27653 }
27654
27655 Action::TsOrDiToDiConvert => {
27656 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
27657 if let Expression::Function(f) = e {
27658 let arg = f.args.into_iter().next().unwrap();
27659 let str_type = match target {
27660 DialectType::DuckDB => DataType::Text,
27661 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27662 DataType::Custom {
27663 name: "STRING".to_string(),
27664 }
27665 }
27666 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27667 DataType::VarChar {
27668 length: None,
27669 parenthesized_length: false,
27670 }
27671 }
27672 _ => DataType::VarChar {
27673 length: None,
27674 parenthesized_length: false,
27675 },
27676 };
27677 let cast_str = Expression::Cast(Box::new(Cast {
27678 this: arg,
27679 to: str_type,
27680 double_colon_syntax: false,
27681 trailing_comments: Vec::new(),
27682 format: None,
27683 default: None,
27684 inferred_type: None,
27685 }));
27686 let replace_expr = Expression::Function(Box::new(Function::new(
27687 "REPLACE".to_string(),
27688 vec![cast_str, Expression::string("-"), Expression::string("")],
27689 )));
27690 let substr_name = match target {
27691 DialectType::DuckDB
27692 | DialectType::Hive
27693 | DialectType::Spark
27694 | DialectType::Databricks => "SUBSTR",
27695 _ => "SUBSTR",
27696 };
27697 let substr = Expression::Function(Box::new(Function::new(
27698 substr_name.to_string(),
27699 vec![replace_expr, Expression::number(1), Expression::number(8)],
27700 )));
27701 // Use INT (not INTEGER) for Presto/Trino etc.
27702 let int_type = match target {
27703 DialectType::Presto
27704 | DialectType::Trino
27705 | DialectType::Athena
27706 | DialectType::TSQL
27707 | DialectType::Fabric
27708 | DialectType::SQLite
27709 | DialectType::Redshift => DataType::Custom {
27710 name: "INT".to_string(),
27711 },
27712 _ => DataType::Int {
27713 length: None,
27714 integer_spelling: false,
27715 },
27716 };
27717 Ok(Expression::Cast(Box::new(Cast {
27718 this: substr,
27719 to: int_type,
27720 double_colon_syntax: false,
27721 trailing_comments: Vec::new(),
27722 format: None,
27723 default: None,
27724 inferred_type: None,
27725 })))
27726 } else {
27727 Ok(e)
27728 }
27729 }
27730
27731 Action::UnixToStrConvert => {
27732 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
27733 if let Expression::Function(f) = e {
27734 let mut args = f.args;
27735 let this = args.remove(0);
27736 let fmt_expr = if !args.is_empty() {
27737 Some(args.remove(0))
27738 } else {
27739 None
27740 };
27741
27742 // Check if format is a string literal
27743 let fmt_str = fmt_expr.as_ref().and_then(|f| {
27744 if let Expression::Literal(lit) = f {
27745 if let Literal::String(s) = lit.as_ref() {
27746 Some(s.clone())
27747 } else {
27748 None
27749 }
27750 } else {
27751 None
27752 }
27753 });
27754
27755 if let Some(fmt_string) = fmt_str {
27756 // String literal format -> use UnixToStr expression (generator handles it)
27757 Ok(Expression::UnixToStr(Box::new(
27758 crate::expressions::UnixToStr {
27759 this: Box::new(this),
27760 format: Some(fmt_string),
27761 },
27762 )))
27763 } else if let Some(fmt_e) = fmt_expr {
27764 // Non-literal format (e.g., identifier `y`) -> build target expression directly
27765 match target {
27766 DialectType::DuckDB => {
27767 // STRFTIME(TO_TIMESTAMP(x), y)
27768 let to_ts = Expression::Function(Box::new(Function::new(
27769 "TO_TIMESTAMP".to_string(),
27770 vec![this],
27771 )));
27772 Ok(Expression::Function(Box::new(Function::new(
27773 "STRFTIME".to_string(),
27774 vec![to_ts, fmt_e],
27775 ))))
27776 }
27777 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27778 // DATE_FORMAT(FROM_UNIXTIME(x), y)
27779 let from_unix = Expression::Function(Box::new(Function::new(
27780 "FROM_UNIXTIME".to_string(),
27781 vec![this],
27782 )));
27783 Ok(Expression::Function(Box::new(Function::new(
27784 "DATE_FORMAT".to_string(),
27785 vec![from_unix, fmt_e],
27786 ))))
27787 }
27788 DialectType::Hive
27789 | DialectType::Spark
27790 | DialectType::Databricks
27791 | DialectType::Doris
27792 | DialectType::StarRocks => {
27793 // FROM_UNIXTIME(x, y)
27794 Ok(Expression::Function(Box::new(Function::new(
27795 "FROM_UNIXTIME".to_string(),
27796 vec![this, fmt_e],
27797 ))))
27798 }
27799 _ => {
27800 // Default: keep as UNIX_TO_STR(x, y)
27801 Ok(Expression::Function(Box::new(Function::new(
27802 "UNIX_TO_STR".to_string(),
27803 vec![this, fmt_e],
27804 ))))
27805 }
27806 }
27807 } else {
27808 Ok(Expression::UnixToStr(Box::new(
27809 crate::expressions::UnixToStr {
27810 this: Box::new(this),
27811 format: None,
27812 },
27813 )))
27814 }
27815 } else {
27816 Ok(e)
27817 }
27818 }
27819
27820 Action::UnixToTimeConvert => {
27821 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
27822 if let Expression::Function(f) = e {
27823 let arg = f.args.into_iter().next().unwrap();
27824 Ok(Expression::UnixToTime(Box::new(
27825 crate::expressions::UnixToTime {
27826 this: Box::new(arg),
27827 scale: None,
27828 zone: None,
27829 hours: None,
27830 minutes: None,
27831 format: None,
27832 target_type: None,
27833 },
27834 )))
27835 } else {
27836 Ok(e)
27837 }
27838 }
27839
27840 Action::UnixToTimeStrConvert => {
27841 // UNIX_TO_TIME_STR(x) -> dialect-specific
27842 if let Expression::Function(f) = e {
27843 let arg = f.args.into_iter().next().unwrap();
27844 match target {
27845 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27846 // FROM_UNIXTIME(x)
27847 Ok(Expression::Function(Box::new(Function::new(
27848 "FROM_UNIXTIME".to_string(),
27849 vec![arg],
27850 ))))
27851 }
27852 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27853 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
27854 let from_unix = Expression::Function(Box::new(Function::new(
27855 "FROM_UNIXTIME".to_string(),
27856 vec![arg],
27857 )));
27858 Ok(Expression::Cast(Box::new(Cast {
27859 this: from_unix,
27860 to: DataType::VarChar {
27861 length: None,
27862 parenthesized_length: false,
27863 },
27864 double_colon_syntax: false,
27865 trailing_comments: Vec::new(),
27866 format: None,
27867 default: None,
27868 inferred_type: None,
27869 })))
27870 }
27871 DialectType::DuckDB => {
27872 // CAST(TO_TIMESTAMP(x) AS TEXT)
27873 let to_ts = Expression::Function(Box::new(Function::new(
27874 "TO_TIMESTAMP".to_string(),
27875 vec![arg],
27876 )));
27877 Ok(Expression::Cast(Box::new(Cast {
27878 this: to_ts,
27879 to: DataType::Text,
27880 double_colon_syntax: false,
27881 trailing_comments: Vec::new(),
27882 format: None,
27883 default: None,
27884 inferred_type: None,
27885 })))
27886 }
27887 _ => Ok(Expression::Function(Box::new(Function::new(
27888 "UNIX_TO_TIME_STR".to_string(),
27889 vec![arg],
27890 )))),
27891 }
27892 } else {
27893 Ok(e)
27894 }
27895 }
27896
27897 Action::TimeToUnixConvert => {
27898 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
27899 if let Expression::Function(f) = e {
27900 let arg = f.args.into_iter().next().unwrap();
27901 Ok(Expression::TimeToUnix(Box::new(
27902 crate::expressions::UnaryFunc {
27903 this: arg,
27904 original_name: None,
27905 inferred_type: None,
27906 },
27907 )))
27908 } else {
27909 Ok(e)
27910 }
27911 }
27912
27913 Action::TimeToStrConvert => {
27914 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
27915 if let Expression::Function(f) = e {
27916 let mut args = f.args;
27917 let this = args.remove(0);
27918 let fmt = match args.remove(0) {
27919 Expression::Literal(lit)
27920 if matches!(lit.as_ref(), Literal::String(_)) =>
27921 {
27922 let Literal::String(s) = lit.as_ref() else {
27923 unreachable!()
27924 };
27925 s.clone()
27926 }
27927 other => {
27928 return Ok(Expression::Function(Box::new(Function::new(
27929 "TIME_TO_STR".to_string(),
27930 vec![this, other],
27931 ))));
27932 }
27933 };
27934 Ok(Expression::TimeToStr(Box::new(
27935 crate::expressions::TimeToStr {
27936 this: Box::new(this),
27937 format: fmt,
27938 culture: None,
27939 zone: None,
27940 },
27941 )))
27942 } else {
27943 Ok(e)
27944 }
27945 }
27946
27947 Action::StrToUnixConvert => {
27948 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
27949 if let Expression::Function(f) = e {
27950 let mut args = f.args;
27951 let this = args.remove(0);
27952 let fmt = match args.remove(0) {
27953 Expression::Literal(lit)
27954 if matches!(lit.as_ref(), Literal::String(_)) =>
27955 {
27956 let Literal::String(s) = lit.as_ref() else {
27957 unreachable!()
27958 };
27959 s.clone()
27960 }
27961 other => {
27962 return Ok(Expression::Function(Box::new(Function::new(
27963 "STR_TO_UNIX".to_string(),
27964 vec![this, other],
27965 ))));
27966 }
27967 };
27968 Ok(Expression::StrToUnix(Box::new(
27969 crate::expressions::StrToUnix {
27970 this: Some(Box::new(this)),
27971 format: Some(fmt),
27972 },
27973 )))
27974 } else {
27975 Ok(e)
27976 }
27977 }
27978
27979 Action::TimeStrToUnixConvert => {
27980 // TIME_STR_TO_UNIX(x) -> dialect-specific
27981 if let Expression::Function(f) = e {
27982 let arg = f.args.into_iter().next().unwrap();
27983 match target {
27984 DialectType::DuckDB => {
27985 // EPOCH(CAST(x AS TIMESTAMP))
27986 let cast_ts = Expression::Cast(Box::new(Cast {
27987 this: arg,
27988 to: DataType::Timestamp {
27989 timezone: false,
27990 precision: None,
27991 },
27992 double_colon_syntax: false,
27993 trailing_comments: Vec::new(),
27994 format: None,
27995 default: None,
27996 inferred_type: None,
27997 }));
27998 Ok(Expression::Function(Box::new(Function::new(
27999 "EPOCH".to_string(),
28000 vec![cast_ts],
28001 ))))
28002 }
28003 DialectType::Hive
28004 | DialectType::Doris
28005 | DialectType::StarRocks
28006 | DialectType::MySQL => {
28007 // UNIX_TIMESTAMP(x)
28008 Ok(Expression::Function(Box::new(Function::new(
28009 "UNIX_TIMESTAMP".to_string(),
28010 vec![arg],
28011 ))))
28012 }
28013 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28014 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
28015 let date_parse = Expression::Function(Box::new(Function::new(
28016 "DATE_PARSE".to_string(),
28017 vec![arg, Expression::string("%Y-%m-%d %T")],
28018 )));
28019 Ok(Expression::Function(Box::new(Function::new(
28020 "TO_UNIXTIME".to_string(),
28021 vec![date_parse],
28022 ))))
28023 }
28024 _ => Ok(Expression::Function(Box::new(Function::new(
28025 "TIME_STR_TO_UNIX".to_string(),
28026 vec![arg],
28027 )))),
28028 }
28029 } else {
28030 Ok(e)
28031 }
28032 }
28033
28034 Action::TimeToTimeStrConvert => {
28035 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
28036 if let Expression::Function(f) = e {
28037 let arg = f.args.into_iter().next().unwrap();
28038 let str_type = match target {
28039 DialectType::DuckDB => DataType::Text,
28040 DialectType::Hive
28041 | DialectType::Spark
28042 | DialectType::Databricks
28043 | DialectType::Doris
28044 | DialectType::StarRocks => DataType::Custom {
28045 name: "STRING".to_string(),
28046 },
28047 DialectType::Redshift => DataType::Custom {
28048 name: "VARCHAR(MAX)".to_string(),
28049 },
28050 _ => DataType::VarChar {
28051 length: None,
28052 parenthesized_length: false,
28053 },
28054 };
28055 Ok(Expression::Cast(Box::new(Cast {
28056 this: arg,
28057 to: str_type,
28058 double_colon_syntax: false,
28059 trailing_comments: Vec::new(),
28060 format: None,
28061 default: None,
28062 inferred_type: None,
28063 })))
28064 } else {
28065 Ok(e)
28066 }
28067 }
28068
28069 Action::DateTruncSwapArgs => {
28070 // DATE_TRUNC('unit', x) from Generic -> target-specific
28071 if let Expression::Function(f) = e {
28072 if f.args.len() == 2 {
28073 let unit_arg = f.args[0].clone();
28074 let expr_arg = f.args[1].clone();
28075 // Extract unit string from the first arg
28076 let unit_str = match &unit_arg {
28077 Expression::Literal(lit)
28078 if matches!(lit.as_ref(), Literal::String(_)) =>
28079 {
28080 let Literal::String(s) = lit.as_ref() else {
28081 unreachable!()
28082 };
28083 s.to_ascii_uppercase()
28084 }
28085 _ => return Ok(Expression::Function(f)),
28086 };
28087 match target {
28088 DialectType::BigQuery => {
28089 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
28090 let unit_ident =
28091 Expression::Column(Box::new(crate::expressions::Column {
28092 name: crate::expressions::Identifier::new(unit_str),
28093 table: None,
28094 join_mark: false,
28095 trailing_comments: Vec::new(),
28096 span: None,
28097 inferred_type: None,
28098 }));
28099 Ok(Expression::Function(Box::new(Function::new(
28100 "DATE_TRUNC".to_string(),
28101 vec![expr_arg, unit_ident],
28102 ))))
28103 }
28104 DialectType::Doris => {
28105 // Doris: DATE_TRUNC(x, 'UNIT')
28106 Ok(Expression::Function(Box::new(Function::new(
28107 "DATE_TRUNC".to_string(),
28108 vec![expr_arg, Expression::string(&unit_str)],
28109 ))))
28110 }
28111 DialectType::StarRocks => {
28112 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
28113 Ok(Expression::Function(Box::new(Function::new(
28114 "DATE_TRUNC".to_string(),
28115 vec![Expression::string(&unit_str), expr_arg],
28116 ))))
28117 }
28118 DialectType::Spark | DialectType::Databricks => {
28119 // Spark: TRUNC(x, 'UNIT')
28120 Ok(Expression::Function(Box::new(Function::new(
28121 "TRUNC".to_string(),
28122 vec![expr_arg, Expression::string(&unit_str)],
28123 ))))
28124 }
28125 DialectType::MySQL => {
28126 // MySQL: complex expansion based on unit
28127 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
28128 }
28129 _ => Ok(Expression::Function(f)),
28130 }
28131 } else {
28132 Ok(Expression::Function(f))
28133 }
28134 } else {
28135 Ok(e)
28136 }
28137 }
28138
28139 Action::TimestampTruncConvert => {
28140 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
28141 if let Expression::Function(f) = e {
28142 if f.args.len() >= 2 {
28143 let expr_arg = f.args[0].clone();
28144 let unit_arg = f.args[1].clone();
28145 let tz_arg = if f.args.len() >= 3 {
28146 Some(f.args[2].clone())
28147 } else {
28148 None
28149 };
28150 // Extract unit string
28151 let unit_str = match &unit_arg {
28152 Expression::Literal(lit)
28153 if matches!(lit.as_ref(), Literal::String(_)) =>
28154 {
28155 let Literal::String(s) = lit.as_ref() else {
28156 unreachable!()
28157 };
28158 s.to_ascii_uppercase()
28159 }
28160 Expression::Column(c) => c.name.name.to_ascii_uppercase(),
28161 _ => {
28162 return Ok(Expression::Function(f));
28163 }
28164 };
28165 match target {
28166 DialectType::Spark | DialectType::Databricks => {
28167 // Spark: DATE_TRUNC('UNIT', x)
28168 Ok(Expression::Function(Box::new(Function::new(
28169 "DATE_TRUNC".to_string(),
28170 vec![Expression::string(&unit_str), expr_arg],
28171 ))))
28172 }
28173 DialectType::Doris | DialectType::StarRocks => {
28174 // Doris: DATE_TRUNC(x, 'UNIT')
28175 Ok(Expression::Function(Box::new(Function::new(
28176 "DATE_TRUNC".to_string(),
28177 vec![expr_arg, Expression::string(&unit_str)],
28178 ))))
28179 }
28180 DialectType::BigQuery => {
28181 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
28182 let unit_ident =
28183 Expression::Column(Box::new(crate::expressions::Column {
28184 name: crate::expressions::Identifier::new(unit_str),
28185 table: None,
28186 join_mark: false,
28187 trailing_comments: Vec::new(),
28188 span: None,
28189 inferred_type: None,
28190 }));
28191 let mut args = vec![expr_arg, unit_ident];
28192 if let Some(tz) = tz_arg {
28193 args.push(tz);
28194 }
28195 Ok(Expression::Function(Box::new(Function::new(
28196 "TIMESTAMP_TRUNC".to_string(),
28197 args,
28198 ))))
28199 }
28200 DialectType::DuckDB => {
28201 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
28202 if let Some(tz) = tz_arg {
28203 let tz_str = match &tz {
28204 Expression::Literal(lit)
28205 if matches!(lit.as_ref(), Literal::String(_)) =>
28206 {
28207 let Literal::String(s) = lit.as_ref() else {
28208 unreachable!()
28209 };
28210 s.clone()
28211 }
28212 _ => "UTC".to_string(),
28213 };
28214 // x AT TIME ZONE 'tz'
28215 let at_tz = Expression::AtTimeZone(Box::new(
28216 crate::expressions::AtTimeZone {
28217 this: expr_arg,
28218 zone: Expression::string(&tz_str),
28219 },
28220 ));
28221 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
28222 let trunc = Expression::Function(Box::new(Function::new(
28223 "DATE_TRUNC".to_string(),
28224 vec![Expression::string(&unit_str), at_tz],
28225 )));
28226 // DATE_TRUNC(...) AT TIME ZONE 'tz'
28227 Ok(Expression::AtTimeZone(Box::new(
28228 crate::expressions::AtTimeZone {
28229 this: trunc,
28230 zone: Expression::string(&tz_str),
28231 },
28232 )))
28233 } else {
28234 Ok(Expression::Function(Box::new(Function::new(
28235 "DATE_TRUNC".to_string(),
28236 vec![Expression::string(&unit_str), expr_arg],
28237 ))))
28238 }
28239 }
28240 DialectType::Presto
28241 | DialectType::Trino
28242 | DialectType::Athena
28243 | DialectType::Snowflake => {
28244 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
28245 Ok(Expression::Function(Box::new(Function::new(
28246 "DATE_TRUNC".to_string(),
28247 vec![Expression::string(&unit_str), expr_arg],
28248 ))))
28249 }
28250 _ => {
28251 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
28252 let mut args = vec![Expression::string(&unit_str), expr_arg];
28253 if let Some(tz) = tz_arg {
28254 args.push(tz);
28255 }
28256 Ok(Expression::Function(Box::new(Function::new(
28257 "DATE_TRUNC".to_string(),
28258 args,
28259 ))))
28260 }
28261 }
28262 } else {
28263 Ok(Expression::Function(f))
28264 }
28265 } else {
28266 Ok(e)
28267 }
28268 }
28269
28270 Action::StrToDateConvert => {
28271 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
28272 if let Expression::Function(f) = e {
28273 if f.args.len() == 2 {
28274 let mut args = f.args;
28275 let this = args.remove(0);
28276 let fmt_expr = args.remove(0);
28277 let fmt_str = match &fmt_expr {
28278 Expression::Literal(lit)
28279 if matches!(lit.as_ref(), Literal::String(_)) =>
28280 {
28281 let Literal::String(s) = lit.as_ref() else {
28282 unreachable!()
28283 };
28284 Some(s.clone())
28285 }
28286 _ => None,
28287 };
28288 let default_date = "%Y-%m-%d";
28289 let default_time = "%Y-%m-%d %H:%M:%S";
28290 let is_default = fmt_str
28291 .as_ref()
28292 .map_or(false, |f| f == default_date || f == default_time);
28293
28294 if is_default {
28295 // Default format: handle per-dialect
28296 match target {
28297 DialectType::MySQL
28298 | DialectType::Doris
28299 | DialectType::StarRocks => {
28300 // Keep STR_TO_DATE(x, fmt) as-is
28301 Ok(Expression::Function(Box::new(Function::new(
28302 "STR_TO_DATE".to_string(),
28303 vec![this, fmt_expr],
28304 ))))
28305 }
28306 DialectType::Hive => {
28307 // Hive: CAST(x AS DATE)
28308 Ok(Expression::Cast(Box::new(Cast {
28309 this,
28310 to: DataType::Date,
28311 double_colon_syntax: false,
28312 trailing_comments: Vec::new(),
28313 format: None,
28314 default: None,
28315 inferred_type: None,
28316 })))
28317 }
28318 DialectType::Presto
28319 | DialectType::Trino
28320 | DialectType::Athena => {
28321 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
28322 let date_parse =
28323 Expression::Function(Box::new(Function::new(
28324 "DATE_PARSE".to_string(),
28325 vec![this, fmt_expr],
28326 )));
28327 Ok(Expression::Cast(Box::new(Cast {
28328 this: date_parse,
28329 to: DataType::Date,
28330 double_colon_syntax: false,
28331 trailing_comments: Vec::new(),
28332 format: None,
28333 default: None,
28334 inferred_type: None,
28335 })))
28336 }
28337 _ => {
28338 // Others: TsOrDsToDate (delegates to generator)
28339 Ok(Expression::TsOrDsToDate(Box::new(
28340 crate::expressions::TsOrDsToDate {
28341 this: Box::new(this),
28342 format: None,
28343 safe: None,
28344 },
28345 )))
28346 }
28347 }
28348 } else if let Some(fmt) = fmt_str {
28349 match target {
28350 DialectType::Doris
28351 | DialectType::StarRocks
28352 | DialectType::MySQL => {
28353 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
28354 let mut normalized = fmt.clone();
28355 normalized = normalized.replace("%-d", "%e");
28356 normalized = normalized.replace("%-m", "%c");
28357 normalized = normalized.replace("%H:%M:%S", "%T");
28358 Ok(Expression::Function(Box::new(Function::new(
28359 "STR_TO_DATE".to_string(),
28360 vec![this, Expression::string(&normalized)],
28361 ))))
28362 }
28363 DialectType::Hive => {
28364 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
28365 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
28366 let unix_ts =
28367 Expression::Function(Box::new(Function::new(
28368 "UNIX_TIMESTAMP".to_string(),
28369 vec![this, Expression::string(&java_fmt)],
28370 )));
28371 let from_unix =
28372 Expression::Function(Box::new(Function::new(
28373 "FROM_UNIXTIME".to_string(),
28374 vec![unix_ts],
28375 )));
28376 Ok(Expression::Cast(Box::new(Cast {
28377 this: from_unix,
28378 to: DataType::Date,
28379 double_colon_syntax: false,
28380 trailing_comments: Vec::new(),
28381 format: None,
28382 default: None,
28383 inferred_type: None,
28384 })))
28385 }
28386 DialectType::Spark | DialectType::Databricks => {
28387 // Spark: TO_DATE(x, java_fmt)
28388 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
28389 Ok(Expression::Function(Box::new(Function::new(
28390 "TO_DATE".to_string(),
28391 vec![this, Expression::string(&java_fmt)],
28392 ))))
28393 }
28394 DialectType::Drill => {
28395 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
28396 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
28397 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
28398 let java_fmt = java_fmt.replace('T', "'T'");
28399 Ok(Expression::Function(Box::new(Function::new(
28400 "TO_DATE".to_string(),
28401 vec![this, Expression::string(&java_fmt)],
28402 ))))
28403 }
28404 _ => {
28405 // For other dialects: use TsOrDsToDate which delegates to generator
28406 Ok(Expression::TsOrDsToDate(Box::new(
28407 crate::expressions::TsOrDsToDate {
28408 this: Box::new(this),
28409 format: Some(fmt),
28410 safe: None,
28411 },
28412 )))
28413 }
28414 }
28415 } else {
28416 // Non-string format - keep as-is
28417 let mut new_args = Vec::new();
28418 new_args.push(this);
28419 new_args.push(fmt_expr);
28420 Ok(Expression::Function(Box::new(Function::new(
28421 "STR_TO_DATE".to_string(),
28422 new_args,
28423 ))))
28424 }
28425 } else {
28426 Ok(Expression::Function(f))
28427 }
28428 } else {
28429 Ok(e)
28430 }
28431 }
28432
28433 Action::TsOrDsAddConvert => {
28434 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
28435 if let Expression::Function(f) = e {
28436 if f.args.len() == 3 {
28437 let mut args = f.args;
28438 let x = args.remove(0);
28439 let n = args.remove(0);
28440 let unit_expr = args.remove(0);
28441 let unit_str = match &unit_expr {
28442 Expression::Literal(lit)
28443 if matches!(lit.as_ref(), Literal::String(_)) =>
28444 {
28445 let Literal::String(s) = lit.as_ref() else {
28446 unreachable!()
28447 };
28448 s.to_ascii_uppercase()
28449 }
28450 _ => "DAY".to_string(),
28451 };
28452
28453 match target {
28454 DialectType::Hive
28455 | DialectType::Spark
28456 | DialectType::Databricks => {
28457 // DATE_ADD(x, n) - only supports DAY unit
28458 Ok(Expression::Function(Box::new(Function::new(
28459 "DATE_ADD".to_string(),
28460 vec![x, n],
28461 ))))
28462 }
28463 DialectType::MySQL => {
28464 // DATE_ADD(x, INTERVAL n UNIT)
28465 let iu = match unit_str.as_str() {
28466 "YEAR" => crate::expressions::IntervalUnit::Year,
28467 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
28468 "MONTH" => crate::expressions::IntervalUnit::Month,
28469 "WEEK" => crate::expressions::IntervalUnit::Week,
28470 "HOUR" => crate::expressions::IntervalUnit::Hour,
28471 "MINUTE" => crate::expressions::IntervalUnit::Minute,
28472 "SECOND" => crate::expressions::IntervalUnit::Second,
28473 _ => crate::expressions::IntervalUnit::Day,
28474 };
28475 let interval = Expression::Interval(Box::new(
28476 crate::expressions::Interval {
28477 this: Some(n),
28478 unit: Some(
28479 crate::expressions::IntervalUnitSpec::Simple {
28480 unit: iu,
28481 use_plural: false,
28482 },
28483 ),
28484 },
28485 ));
28486 Ok(Expression::Function(Box::new(Function::new(
28487 "DATE_ADD".to_string(),
28488 vec![x, interval],
28489 ))))
28490 }
28491 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28492 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
28493 let cast_ts = Expression::Cast(Box::new(Cast {
28494 this: x,
28495 to: DataType::Timestamp {
28496 precision: None,
28497 timezone: false,
28498 },
28499 double_colon_syntax: false,
28500 trailing_comments: Vec::new(),
28501 format: None,
28502 default: None,
28503 inferred_type: None,
28504 }));
28505 let cast_date = Expression::Cast(Box::new(Cast {
28506 this: cast_ts,
28507 to: DataType::Date,
28508 double_colon_syntax: false,
28509 trailing_comments: Vec::new(),
28510 format: None,
28511 default: None,
28512 inferred_type: None,
28513 }));
28514 Ok(Expression::Function(Box::new(Function::new(
28515 "DATE_ADD".to_string(),
28516 vec![Expression::string(&unit_str), n, cast_date],
28517 ))))
28518 }
28519 DialectType::DuckDB => {
28520 // CAST(x AS DATE) + INTERVAL n UNIT
28521 let cast_date = Expression::Cast(Box::new(Cast {
28522 this: x,
28523 to: DataType::Date,
28524 double_colon_syntax: false,
28525 trailing_comments: Vec::new(),
28526 format: None,
28527 default: None,
28528 inferred_type: None,
28529 }));
28530 let iu = match unit_str.as_str() {
28531 "YEAR" => crate::expressions::IntervalUnit::Year,
28532 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
28533 "MONTH" => crate::expressions::IntervalUnit::Month,
28534 "WEEK" => crate::expressions::IntervalUnit::Week,
28535 "HOUR" => crate::expressions::IntervalUnit::Hour,
28536 "MINUTE" => crate::expressions::IntervalUnit::Minute,
28537 "SECOND" => crate::expressions::IntervalUnit::Second,
28538 _ => crate::expressions::IntervalUnit::Day,
28539 };
28540 let interval = Expression::Interval(Box::new(
28541 crate::expressions::Interval {
28542 this: Some(n),
28543 unit: Some(
28544 crate::expressions::IntervalUnitSpec::Simple {
28545 unit: iu,
28546 use_plural: false,
28547 },
28548 ),
28549 },
28550 ));
28551 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
28552 left: cast_date,
28553 right: interval,
28554 left_comments: Vec::new(),
28555 operator_comments: Vec::new(),
28556 trailing_comments: Vec::new(),
28557 inferred_type: None,
28558 })))
28559 }
28560 DialectType::Drill => {
28561 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
28562 let cast_date = Expression::Cast(Box::new(Cast {
28563 this: x,
28564 to: DataType::Date,
28565 double_colon_syntax: false,
28566 trailing_comments: Vec::new(),
28567 format: None,
28568 default: None,
28569 inferred_type: None,
28570 }));
28571 let iu = match unit_str.as_str() {
28572 "YEAR" => crate::expressions::IntervalUnit::Year,
28573 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
28574 "MONTH" => crate::expressions::IntervalUnit::Month,
28575 "WEEK" => crate::expressions::IntervalUnit::Week,
28576 "HOUR" => crate::expressions::IntervalUnit::Hour,
28577 "MINUTE" => crate::expressions::IntervalUnit::Minute,
28578 "SECOND" => crate::expressions::IntervalUnit::Second,
28579 _ => crate::expressions::IntervalUnit::Day,
28580 };
28581 let interval = Expression::Interval(Box::new(
28582 crate::expressions::Interval {
28583 this: Some(n),
28584 unit: Some(
28585 crate::expressions::IntervalUnitSpec::Simple {
28586 unit: iu,
28587 use_plural: false,
28588 },
28589 ),
28590 },
28591 ));
28592 Ok(Expression::Function(Box::new(Function::new(
28593 "DATE_ADD".to_string(),
28594 vec![cast_date, interval],
28595 ))))
28596 }
28597 _ => {
28598 // Default: keep as TS_OR_DS_ADD
28599 Ok(Expression::Function(Box::new(Function::new(
28600 "TS_OR_DS_ADD".to_string(),
28601 vec![x, n, unit_expr],
28602 ))))
28603 }
28604 }
28605 } else {
28606 Ok(Expression::Function(f))
28607 }
28608 } else {
28609 Ok(e)
28610 }
28611 }
28612
28613 Action::DateFromUnixDateConvert => {
28614 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
28615 if let Expression::Function(f) = e {
28616 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
28617 if matches!(
28618 target,
28619 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
28620 ) {
28621 return Ok(Expression::Function(Box::new(Function::new(
28622 "DATE_FROM_UNIX_DATE".to_string(),
28623 f.args,
28624 ))));
28625 }
28626 let n = f.args.into_iter().next().unwrap();
28627 let epoch_date = Expression::Cast(Box::new(Cast {
28628 this: Expression::string("1970-01-01"),
28629 to: DataType::Date,
28630 double_colon_syntax: false,
28631 trailing_comments: Vec::new(),
28632 format: None,
28633 default: None,
28634 inferred_type: None,
28635 }));
28636 match target {
28637 DialectType::DuckDB => {
28638 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
28639 let interval =
28640 Expression::Interval(Box::new(crate::expressions::Interval {
28641 this: Some(n),
28642 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28643 unit: crate::expressions::IntervalUnit::Day,
28644 use_plural: false,
28645 }),
28646 }));
28647 Ok(Expression::Add(Box::new(
28648 crate::expressions::BinaryOp::new(epoch_date, interval),
28649 )))
28650 }
28651 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28652 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
28653 Ok(Expression::Function(Box::new(Function::new(
28654 "DATE_ADD".to_string(),
28655 vec![Expression::string("DAY"), n, epoch_date],
28656 ))))
28657 }
28658 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
28659 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
28660 Ok(Expression::Function(Box::new(Function::new(
28661 "DATEADD".to_string(),
28662 vec![
28663 Expression::Identifier(Identifier::new("DAY")),
28664 n,
28665 epoch_date,
28666 ],
28667 ))))
28668 }
28669 DialectType::BigQuery => {
28670 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
28671 let interval =
28672 Expression::Interval(Box::new(crate::expressions::Interval {
28673 this: Some(n),
28674 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28675 unit: crate::expressions::IntervalUnit::Day,
28676 use_plural: false,
28677 }),
28678 }));
28679 Ok(Expression::Function(Box::new(Function::new(
28680 "DATE_ADD".to_string(),
28681 vec![epoch_date, interval],
28682 ))))
28683 }
28684 DialectType::MySQL
28685 | DialectType::Doris
28686 | DialectType::StarRocks
28687 | DialectType::Drill => {
28688 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
28689 let interval =
28690 Expression::Interval(Box::new(crate::expressions::Interval {
28691 this: Some(n),
28692 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28693 unit: crate::expressions::IntervalUnit::Day,
28694 use_plural: false,
28695 }),
28696 }));
28697 Ok(Expression::Function(Box::new(Function::new(
28698 "DATE_ADD".to_string(),
28699 vec![epoch_date, interval],
28700 ))))
28701 }
28702 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
28703 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
28704 Ok(Expression::Function(Box::new(Function::new(
28705 "DATE_ADD".to_string(),
28706 vec![epoch_date, n],
28707 ))))
28708 }
28709 DialectType::PostgreSQL
28710 | DialectType::Materialize
28711 | DialectType::RisingWave => {
28712 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
28713 let n_str = match &n {
28714 Expression::Literal(lit)
28715 if matches!(lit.as_ref(), Literal::Number(_)) =>
28716 {
28717 let Literal::Number(s) = lit.as_ref() else {
28718 unreachable!()
28719 };
28720 s.clone()
28721 }
28722 _ => Self::expr_to_string_static(&n),
28723 };
28724 let interval =
28725 Expression::Interval(Box::new(crate::expressions::Interval {
28726 this: Some(Expression::string(&format!("{} DAY", n_str))),
28727 unit: None,
28728 }));
28729 Ok(Expression::Add(Box::new(
28730 crate::expressions::BinaryOp::new(epoch_date, interval),
28731 )))
28732 }
28733 _ => {
28734 // Default: keep as-is
28735 Ok(Expression::Function(Box::new(Function::new(
28736 "DATE_FROM_UNIX_DATE".to_string(),
28737 vec![n],
28738 ))))
28739 }
28740 }
28741 } else {
28742 Ok(e)
28743 }
28744 }
28745
28746 Action::ArrayRemoveConvert => {
28747 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
28748 if let Expression::ArrayRemove(bf) = e {
28749 let arr = bf.this;
28750 let target_val = bf.expression;
28751 match target {
28752 DialectType::DuckDB => {
28753 let u_id = crate::expressions::Identifier::new("_u");
28754 let lambda =
28755 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
28756 parameters: vec![u_id.clone()],
28757 body: Expression::Neq(Box::new(BinaryOp {
28758 left: Expression::Identifier(u_id),
28759 right: target_val,
28760 left_comments: Vec::new(),
28761 operator_comments: Vec::new(),
28762 trailing_comments: Vec::new(),
28763 inferred_type: None,
28764 })),
28765 colon: false,
28766 parameter_types: Vec::new(),
28767 }));
28768 Ok(Expression::Function(Box::new(Function::new(
28769 "LIST_FILTER".to_string(),
28770 vec![arr, lambda],
28771 ))))
28772 }
28773 DialectType::ClickHouse => {
28774 let u_id = crate::expressions::Identifier::new("_u");
28775 let lambda =
28776 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
28777 parameters: vec![u_id.clone()],
28778 body: Expression::Neq(Box::new(BinaryOp {
28779 left: Expression::Identifier(u_id),
28780 right: target_val,
28781 left_comments: Vec::new(),
28782 operator_comments: Vec::new(),
28783 trailing_comments: Vec::new(),
28784 inferred_type: None,
28785 })),
28786 colon: false,
28787 parameter_types: Vec::new(),
28788 }));
28789 Ok(Expression::Function(Box::new(Function::new(
28790 "arrayFilter".to_string(),
28791 vec![lambda, arr],
28792 ))))
28793 }
28794 DialectType::BigQuery => {
28795 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
28796 let u_id = crate::expressions::Identifier::new("_u");
28797 let u_col =
28798 Expression::Column(Box::new(crate::expressions::Column {
28799 name: u_id.clone(),
28800 table: None,
28801 join_mark: false,
28802 trailing_comments: Vec::new(),
28803 span: None,
28804 inferred_type: None,
28805 }));
28806 let unnest_expr =
28807 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
28808 this: arr,
28809 expressions: Vec::new(),
28810 with_ordinality: false,
28811 alias: None,
28812 offset_alias: None,
28813 }));
28814 let aliased_unnest =
28815 Expression::Alias(Box::new(crate::expressions::Alias {
28816 this: unnest_expr,
28817 alias: u_id.clone(),
28818 column_aliases: Vec::new(),
28819 pre_alias_comments: Vec::new(),
28820 trailing_comments: Vec::new(),
28821 inferred_type: None,
28822 }));
28823 let where_cond = Expression::Neq(Box::new(BinaryOp {
28824 left: u_col.clone(),
28825 right: target_val,
28826 left_comments: Vec::new(),
28827 operator_comments: Vec::new(),
28828 trailing_comments: Vec::new(),
28829 inferred_type: None,
28830 }));
28831 let subquery = Expression::Select(Box::new(
28832 crate::expressions::Select::new()
28833 .column(u_col)
28834 .from(aliased_unnest)
28835 .where_(where_cond),
28836 ));
28837 Ok(Expression::ArrayFunc(Box::new(
28838 crate::expressions::ArrayConstructor {
28839 expressions: vec![subquery],
28840 bracket_notation: false,
28841 use_list_keyword: false,
28842 },
28843 )))
28844 }
28845 _ => Ok(Expression::ArrayRemove(Box::new(
28846 crate::expressions::BinaryFunc {
28847 original_name: None,
28848 this: arr,
28849 expression: target_val,
28850 inferred_type: None,
28851 },
28852 ))),
28853 }
28854 } else {
28855 Ok(e)
28856 }
28857 }
28858
28859 Action::ArrayReverseConvert => {
28860 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
28861 if let Expression::ArrayReverse(af) = e {
28862 Ok(Expression::Function(Box::new(Function::new(
28863 "arrayReverse".to_string(),
28864 vec![af.this],
28865 ))))
28866 } else {
28867 Ok(e)
28868 }
28869 }
28870
28871 Action::JsonKeysConvert => {
28872 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
28873 if let Expression::JsonKeys(uf) = e {
28874 match target {
28875 DialectType::Spark | DialectType::Databricks => {
28876 Ok(Expression::Function(Box::new(Function::new(
28877 "JSON_OBJECT_KEYS".to_string(),
28878 vec![uf.this],
28879 ))))
28880 }
28881 DialectType::Snowflake => Ok(Expression::Function(Box::new(
28882 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
28883 ))),
28884 _ => Ok(Expression::JsonKeys(uf)),
28885 }
28886 } else {
28887 Ok(e)
28888 }
28889 }
28890
28891 Action::ParseJsonStrip => {
28892 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
28893 if let Expression::ParseJson(uf) = e {
28894 Ok(uf.this)
28895 } else {
28896 Ok(e)
28897 }
28898 }
28899
28900 Action::ArraySizeDrill => {
28901 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
28902 if let Expression::ArraySize(uf) = e {
28903 Ok(Expression::Function(Box::new(Function::new(
28904 "REPEATED_COUNT".to_string(),
28905 vec![uf.this],
28906 ))))
28907 } else {
28908 Ok(e)
28909 }
28910 }
28911
28912 Action::WeekOfYearToWeekIso => {
28913 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
28914 if let Expression::WeekOfYear(uf) = e {
28915 Ok(Expression::Function(Box::new(Function::new(
28916 "WEEKISO".to_string(),
28917 vec![uf.this],
28918 ))))
28919 } else {
28920 Ok(e)
28921 }
28922 }
28923 }
28924 })
28925 }
28926
28927 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
28928 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
28929 use crate::expressions::Function;
28930 match unit {
28931 "DAY" => {
28932 // DATE(x)
28933 Ok(Expression::Function(Box::new(Function::new(
28934 "DATE".to_string(),
28935 vec![expr.clone()],
28936 ))))
28937 }
28938 "WEEK" => {
28939 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
28940 let year_x = Expression::Function(Box::new(Function::new(
28941 "YEAR".to_string(),
28942 vec![expr.clone()],
28943 )));
28944 let week_x = Expression::Function(Box::new(Function::new(
28945 "WEEK".to_string(),
28946 vec![expr.clone(), Expression::number(1)],
28947 )));
28948 let concat_args = vec![
28949 year_x,
28950 Expression::string(" "),
28951 week_x,
28952 Expression::string(" 1"),
28953 ];
28954 let concat = Expression::Function(Box::new(Function::new(
28955 "CONCAT".to_string(),
28956 concat_args,
28957 )));
28958 Ok(Expression::Function(Box::new(Function::new(
28959 "STR_TO_DATE".to_string(),
28960 vec![concat, Expression::string("%Y %u %w")],
28961 ))))
28962 }
28963 "MONTH" => {
28964 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
28965 let year_x = Expression::Function(Box::new(Function::new(
28966 "YEAR".to_string(),
28967 vec![expr.clone()],
28968 )));
28969 let month_x = Expression::Function(Box::new(Function::new(
28970 "MONTH".to_string(),
28971 vec![expr.clone()],
28972 )));
28973 let concat_args = vec![
28974 year_x,
28975 Expression::string(" "),
28976 month_x,
28977 Expression::string(" 1"),
28978 ];
28979 let concat = Expression::Function(Box::new(Function::new(
28980 "CONCAT".to_string(),
28981 concat_args,
28982 )));
28983 Ok(Expression::Function(Box::new(Function::new(
28984 "STR_TO_DATE".to_string(),
28985 vec![concat, Expression::string("%Y %c %e")],
28986 ))))
28987 }
28988 "QUARTER" => {
28989 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
28990 let year_x = Expression::Function(Box::new(Function::new(
28991 "YEAR".to_string(),
28992 vec![expr.clone()],
28993 )));
28994 let quarter_x = Expression::Function(Box::new(Function::new(
28995 "QUARTER".to_string(),
28996 vec![expr.clone()],
28997 )));
28998 // QUARTER(x) * 3 - 2
28999 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
29000 left: quarter_x,
29001 right: Expression::number(3),
29002 left_comments: Vec::new(),
29003 operator_comments: Vec::new(),
29004 trailing_comments: Vec::new(),
29005 inferred_type: None,
29006 }));
29007 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
29008 left: mul,
29009 right: Expression::number(2),
29010 left_comments: Vec::new(),
29011 operator_comments: Vec::new(),
29012 trailing_comments: Vec::new(),
29013 inferred_type: None,
29014 }));
29015 let concat_args = vec![
29016 year_x,
29017 Expression::string(" "),
29018 sub,
29019 Expression::string(" 1"),
29020 ];
29021 let concat = Expression::Function(Box::new(Function::new(
29022 "CONCAT".to_string(),
29023 concat_args,
29024 )));
29025 Ok(Expression::Function(Box::new(Function::new(
29026 "STR_TO_DATE".to_string(),
29027 vec![concat, Expression::string("%Y %c %e")],
29028 ))))
29029 }
29030 "YEAR" => {
29031 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
29032 let year_x = Expression::Function(Box::new(Function::new(
29033 "YEAR".to_string(),
29034 vec![expr.clone()],
29035 )));
29036 let concat_args = vec![year_x, Expression::string(" 1 1")];
29037 let concat = Expression::Function(Box::new(Function::new(
29038 "CONCAT".to_string(),
29039 concat_args,
29040 )));
29041 Ok(Expression::Function(Box::new(Function::new(
29042 "STR_TO_DATE".to_string(),
29043 vec![concat, Expression::string("%Y %c %e")],
29044 ))))
29045 }
29046 _ => {
29047 // Unsupported unit -> keep as DATE_TRUNC
29048 Ok(Expression::Function(Box::new(Function::new(
29049 "DATE_TRUNC".to_string(),
29050 vec![Expression::string(unit), expr.clone()],
29051 ))))
29052 }
29053 }
29054 }
29055
29056 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
29057 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
29058 use crate::expressions::DataType;
29059 match dt {
29060 DataType::VarChar { .. } | DataType::Char { .. } => true,
29061 DataType::Struct { fields, .. } => fields
29062 .iter()
29063 .any(|f| Self::has_varchar_char_type(&f.data_type)),
29064 _ => false,
29065 }
29066 }
29067
29068 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
29069 fn normalize_varchar_to_string(
29070 dt: crate::expressions::DataType,
29071 ) -> crate::expressions::DataType {
29072 use crate::expressions::DataType;
29073 match dt {
29074 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
29075 name: "STRING".to_string(),
29076 },
29077 DataType::Struct { fields, nested } => {
29078 let fields = fields
29079 .into_iter()
29080 .map(|mut f| {
29081 f.data_type = Self::normalize_varchar_to_string(f.data_type);
29082 f
29083 })
29084 .collect();
29085 DataType::Struct { fields, nested }
29086 }
29087 other => other,
29088 }
29089 }
29090
29091 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
29092 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
29093 if let Expression::Literal(ref lit) = expr {
29094 if let crate::expressions::Literal::String(ref s) = lit.as_ref() {
29095 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
29096 let trimmed = s.trim();
29097
29098 // Find where digits end and unit text begins
29099 let digit_end = trimmed
29100 .find(|c: char| !c.is_ascii_digit())
29101 .unwrap_or(trimmed.len());
29102 if digit_end == 0 || digit_end == trimmed.len() {
29103 return expr;
29104 }
29105 let num = &trimmed[..digit_end];
29106 let unit_text = trimmed[digit_end..].trim().to_ascii_uppercase();
29107 if unit_text.is_empty() {
29108 return expr;
29109 }
29110
29111 let known_units = [
29112 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS",
29113 "WEEK", "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
29114 ];
29115 if !known_units.contains(&unit_text.as_str()) {
29116 return expr;
29117 }
29118
29119 let unit_str = unit_text.clone();
29120 // Singularize
29121 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
29122 &unit_str[..unit_str.len() - 1]
29123 } else {
29124 &unit_str
29125 };
29126 let unit = unit_singular;
29127
29128 match target {
29129 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29130 // INTERVAL '2' DAY
29131 let iu = match unit {
29132 "DAY" => crate::expressions::IntervalUnit::Day,
29133 "HOUR" => crate::expressions::IntervalUnit::Hour,
29134 "MINUTE" => crate::expressions::IntervalUnit::Minute,
29135 "SECOND" => crate::expressions::IntervalUnit::Second,
29136 "WEEK" => crate::expressions::IntervalUnit::Week,
29137 "MONTH" => crate::expressions::IntervalUnit::Month,
29138 "YEAR" => crate::expressions::IntervalUnit::Year,
29139 _ => return expr,
29140 };
29141 return Expression::Interval(Box::new(crate::expressions::Interval {
29142 this: Some(Expression::string(num)),
29143 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29144 unit: iu,
29145 use_plural: false,
29146 }),
29147 }));
29148 }
29149 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
29150 // INTERVAL '2 DAYS'
29151 let plural = if num != "1" && !unit_str.ends_with('S') {
29152 format!("{} {}S", num, unit)
29153 } else if unit_str.ends_with('S') {
29154 format!("{} {}", num, unit_str)
29155 } else {
29156 format!("{} {}", num, unit)
29157 };
29158 return Expression::Interval(Box::new(crate::expressions::Interval {
29159 this: Some(Expression::string(&plural)),
29160 unit: None,
29161 }));
29162 }
29163 _ => {
29164 // Spark/Databricks/Hive: INTERVAL '1' DAY
29165 let iu = match unit {
29166 "DAY" => crate::expressions::IntervalUnit::Day,
29167 "HOUR" => crate::expressions::IntervalUnit::Hour,
29168 "MINUTE" => crate::expressions::IntervalUnit::Minute,
29169 "SECOND" => crate::expressions::IntervalUnit::Second,
29170 "WEEK" => crate::expressions::IntervalUnit::Week,
29171 "MONTH" => crate::expressions::IntervalUnit::Month,
29172 "YEAR" => crate::expressions::IntervalUnit::Year,
29173 _ => return expr,
29174 };
29175 return Expression::Interval(Box::new(crate::expressions::Interval {
29176 this: Some(Expression::string(num)),
29177 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29178 unit: iu,
29179 use_plural: false,
29180 }),
29181 }));
29182 }
29183 }
29184 }
29185 }
29186 // If it's already an INTERVAL expression, pass through
29187 expr
29188 }
29189
29190 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
29191 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
29192 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
29193 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
29194 fn rewrite_unnest_expansion(
29195 select: &crate::expressions::Select,
29196 target: DialectType,
29197 ) -> Option<crate::expressions::Select> {
29198 use crate::expressions::{
29199 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
29200 UnnestFunc,
29201 };
29202
29203 let index_offset: i64 = match target {
29204 DialectType::Presto | DialectType::Trino => 1,
29205 _ => 0, // BigQuery, Snowflake
29206 };
29207
29208 let if_func_name = match target {
29209 DialectType::Snowflake => "IFF",
29210 _ => "IF",
29211 };
29212
29213 let array_length_func = match target {
29214 DialectType::BigQuery => "ARRAY_LENGTH",
29215 DialectType::Presto | DialectType::Trino => "CARDINALITY",
29216 DialectType::Snowflake => "ARRAY_SIZE",
29217 _ => "ARRAY_LENGTH",
29218 };
29219
29220 let use_table_aliases = matches!(
29221 target,
29222 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
29223 );
29224 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
29225
29226 fn make_col(name: &str, table: Option<&str>) -> Expression {
29227 if let Some(tbl) = table {
29228 Expression::boxed_column(Column {
29229 name: Identifier::new(name.to_string()),
29230 table: Some(Identifier::new(tbl.to_string())),
29231 join_mark: false,
29232 trailing_comments: Vec::new(),
29233 span: None,
29234 inferred_type: None,
29235 })
29236 } else {
29237 Expression::Identifier(Identifier::new(name.to_string()))
29238 }
29239 }
29240
29241 fn make_join(this: Expression) -> Join {
29242 Join {
29243 this,
29244 on: None,
29245 using: Vec::new(),
29246 kind: JoinKind::Cross,
29247 use_inner_keyword: false,
29248 use_outer_keyword: false,
29249 deferred_condition: false,
29250 join_hint: None,
29251 match_condition: None,
29252 pivots: Vec::new(),
29253 comments: Vec::new(),
29254 nesting_group: 0,
29255 directed: false,
29256 }
29257 }
29258
29259 // Collect UNNEST info from SELECT expressions
29260 struct UnnestInfo {
29261 arr_expr: Expression,
29262 col_alias: String,
29263 pos_alias: String,
29264 source_alias: String,
29265 original_expr: Expression,
29266 has_outer_alias: Option<String>,
29267 }
29268
29269 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
29270 let mut col_counter = 0usize;
29271 let mut pos_counter = 1usize;
29272 let mut source_counter = 1usize;
29273
29274 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
29275 match expr {
29276 Expression::Unnest(u) => Some(u.this.clone()),
29277 Expression::Function(f)
29278 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
29279 {
29280 Some(f.args[0].clone())
29281 }
29282 Expression::Alias(a) => extract_unnest_arg(&a.this),
29283 Expression::Add(op)
29284 | Expression::Sub(op)
29285 | Expression::Mul(op)
29286 | Expression::Div(op) => {
29287 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
29288 }
29289 _ => None,
29290 }
29291 }
29292
29293 fn get_alias_name(expr: &Expression) -> Option<String> {
29294 if let Expression::Alias(a) = expr {
29295 Some(a.alias.name.clone())
29296 } else {
29297 None
29298 }
29299 }
29300
29301 for sel_expr in &select.expressions {
29302 if let Some(arr) = extract_unnest_arg(sel_expr) {
29303 col_counter += 1;
29304 pos_counter += 1;
29305 source_counter += 1;
29306
29307 let col_alias = if col_counter == 1 {
29308 "col".to_string()
29309 } else {
29310 format!("col_{}", col_counter)
29311 };
29312 let pos_alias = format!("pos_{}", pos_counter);
29313 let source_alias = format!("_u_{}", source_counter);
29314 let has_outer_alias = get_alias_name(sel_expr);
29315
29316 unnest_infos.push(UnnestInfo {
29317 arr_expr: arr,
29318 col_alias,
29319 pos_alias,
29320 source_alias,
29321 original_expr: sel_expr.clone(),
29322 has_outer_alias,
29323 });
29324 }
29325 }
29326
29327 if unnest_infos.is_empty() {
29328 return None;
29329 }
29330
29331 let series_alias = "pos".to_string();
29332 let series_source_alias = "_u".to_string();
29333 let tbl_ref = if use_table_aliases {
29334 Some(series_source_alias.as_str())
29335 } else {
29336 None
29337 };
29338
29339 // Build new SELECT expressions
29340 let mut new_select_exprs = Vec::new();
29341 for info in &unnest_infos {
29342 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
29343 let src_ref = if use_table_aliases {
29344 Some(info.source_alias.as_str())
29345 } else {
29346 None
29347 };
29348
29349 let pos_col = make_col(&series_alias, tbl_ref);
29350 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
29351 let col_ref = make_col(actual_col_name, src_ref);
29352
29353 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
29354 pos_col.clone(),
29355 unnest_pos_col.clone(),
29356 )));
29357 let mut if_args = vec![eq_cond, col_ref];
29358 if null_third_arg {
29359 if_args.push(Expression::Null(crate::expressions::Null));
29360 }
29361
29362 let if_expr =
29363 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
29364 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
29365
29366 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
29367 final_expr,
29368 Identifier::new(actual_col_name.clone()),
29369 ))));
29370 }
29371
29372 // Build array size expressions for GREATEST
29373 let size_exprs: Vec<Expression> = unnest_infos
29374 .iter()
29375 .map(|info| {
29376 Expression::Function(Box::new(Function::new(
29377 array_length_func.to_string(),
29378 vec![info.arr_expr.clone()],
29379 )))
29380 })
29381 .collect();
29382
29383 let greatest =
29384 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
29385
29386 let series_end = if index_offset == 0 {
29387 Expression::Sub(Box::new(BinaryOp::new(
29388 greatest,
29389 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
29390 )))
29391 } else {
29392 greatest
29393 };
29394
29395 // Build the position array source
29396 let series_unnest_expr = match target {
29397 DialectType::BigQuery => {
29398 let gen_array = Expression::Function(Box::new(Function::new(
29399 "GENERATE_ARRAY".to_string(),
29400 vec![
29401 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
29402 series_end,
29403 ],
29404 )));
29405 Expression::Unnest(Box::new(UnnestFunc {
29406 this: gen_array,
29407 expressions: Vec::new(),
29408 with_ordinality: false,
29409 alias: None,
29410 offset_alias: None,
29411 }))
29412 }
29413 DialectType::Presto | DialectType::Trino => {
29414 let sequence = Expression::Function(Box::new(Function::new(
29415 "SEQUENCE".to_string(),
29416 vec![
29417 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
29418 series_end,
29419 ],
29420 )));
29421 Expression::Unnest(Box::new(UnnestFunc {
29422 this: sequence,
29423 expressions: Vec::new(),
29424 with_ordinality: false,
29425 alias: None,
29426 offset_alias: None,
29427 }))
29428 }
29429 DialectType::Snowflake => {
29430 let range_end = Expression::Add(Box::new(BinaryOp::new(
29431 Expression::Paren(Box::new(crate::expressions::Paren {
29432 this: series_end,
29433 trailing_comments: Vec::new(),
29434 })),
29435 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
29436 )));
29437 let gen_range = Expression::Function(Box::new(Function::new(
29438 "ARRAY_GENERATE_RANGE".to_string(),
29439 vec![
29440 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
29441 range_end,
29442 ],
29443 )));
29444 let flatten_arg =
29445 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
29446 name: Identifier::new("INPUT".to_string()),
29447 value: gen_range,
29448 separator: crate::expressions::NamedArgSeparator::DArrow,
29449 }));
29450 let flatten = Expression::Function(Box::new(Function::new(
29451 "FLATTEN".to_string(),
29452 vec![flatten_arg],
29453 )));
29454 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
29455 }
29456 _ => return None,
29457 };
29458
29459 // Build series alias expression
29460 let series_alias_expr = if use_table_aliases {
29461 let col_aliases = if matches!(target, DialectType::Snowflake) {
29462 vec![
29463 Identifier::new("seq".to_string()),
29464 Identifier::new("key".to_string()),
29465 Identifier::new("path".to_string()),
29466 Identifier::new("index".to_string()),
29467 Identifier::new(series_alias.clone()),
29468 Identifier::new("this".to_string()),
29469 ]
29470 } else {
29471 vec![Identifier::new(series_alias.clone())]
29472 };
29473 Expression::Alias(Box::new(Alias {
29474 this: series_unnest_expr,
29475 alias: Identifier::new(series_source_alias.clone()),
29476 column_aliases: col_aliases,
29477 pre_alias_comments: Vec::new(),
29478 trailing_comments: Vec::new(),
29479 inferred_type: None,
29480 }))
29481 } else {
29482 Expression::Alias(Box::new(Alias::new(
29483 series_unnest_expr,
29484 Identifier::new(series_alias.clone()),
29485 )))
29486 };
29487
29488 // Build CROSS JOINs for each UNNEST
29489 let mut joins = Vec::new();
29490 for info in &unnest_infos {
29491 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
29492
29493 let unnest_join_expr = match target {
29494 DialectType::BigQuery => {
29495 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
29496 let unnest = UnnestFunc {
29497 this: info.arr_expr.clone(),
29498 expressions: Vec::new(),
29499 with_ordinality: true,
29500 alias: Some(Identifier::new(actual_col_name.clone())),
29501 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
29502 };
29503 Expression::Unnest(Box::new(unnest))
29504 }
29505 DialectType::Presto | DialectType::Trino => {
29506 let unnest = UnnestFunc {
29507 this: info.arr_expr.clone(),
29508 expressions: Vec::new(),
29509 with_ordinality: true,
29510 alias: None,
29511 offset_alias: None,
29512 };
29513 Expression::Alias(Box::new(Alias {
29514 this: Expression::Unnest(Box::new(unnest)),
29515 alias: Identifier::new(info.source_alias.clone()),
29516 column_aliases: vec![
29517 Identifier::new(actual_col_name.clone()),
29518 Identifier::new(info.pos_alias.clone()),
29519 ],
29520 pre_alias_comments: Vec::new(),
29521 trailing_comments: Vec::new(),
29522 inferred_type: None,
29523 }))
29524 }
29525 DialectType::Snowflake => {
29526 let flatten_arg =
29527 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
29528 name: Identifier::new("INPUT".to_string()),
29529 value: info.arr_expr.clone(),
29530 separator: crate::expressions::NamedArgSeparator::DArrow,
29531 }));
29532 let flatten = Expression::Function(Box::new(Function::new(
29533 "FLATTEN".to_string(),
29534 vec![flatten_arg],
29535 )));
29536 let table_fn = Expression::Function(Box::new(Function::new(
29537 "TABLE".to_string(),
29538 vec![flatten],
29539 )));
29540 Expression::Alias(Box::new(Alias {
29541 this: table_fn,
29542 alias: Identifier::new(info.source_alias.clone()),
29543 column_aliases: vec![
29544 Identifier::new("seq".to_string()),
29545 Identifier::new("key".to_string()),
29546 Identifier::new("path".to_string()),
29547 Identifier::new(info.pos_alias.clone()),
29548 Identifier::new(actual_col_name.clone()),
29549 Identifier::new("this".to_string()),
29550 ],
29551 pre_alias_comments: Vec::new(),
29552 trailing_comments: Vec::new(),
29553 inferred_type: None,
29554 }))
29555 }
29556 _ => return None,
29557 };
29558
29559 joins.push(make_join(unnest_join_expr));
29560 }
29561
29562 // Build WHERE clause
29563 let mut where_conditions: Vec<Expression> = Vec::new();
29564 for info in &unnest_infos {
29565 let src_ref = if use_table_aliases {
29566 Some(info.source_alias.as_str())
29567 } else {
29568 None
29569 };
29570 let pos_col = make_col(&series_alias, tbl_ref);
29571 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
29572
29573 let arr_size = Expression::Function(Box::new(Function::new(
29574 array_length_func.to_string(),
29575 vec![info.arr_expr.clone()],
29576 )));
29577
29578 let size_ref = if index_offset == 0 {
29579 Expression::Paren(Box::new(crate::expressions::Paren {
29580 this: Expression::Sub(Box::new(BinaryOp::new(
29581 arr_size,
29582 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
29583 ))),
29584 trailing_comments: Vec::new(),
29585 }))
29586 } else {
29587 arr_size
29588 };
29589
29590 let eq = Expression::Eq(Box::new(BinaryOp::new(
29591 pos_col.clone(),
29592 unnest_pos_col.clone(),
29593 )));
29594 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
29595 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
29596 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
29597 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
29598 this: and_cond,
29599 trailing_comments: Vec::new(),
29600 }));
29601 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
29602
29603 where_conditions.push(or_cond);
29604 }
29605
29606 let where_expr = if where_conditions.len() == 1 {
29607 // Single condition: no parens needed
29608 where_conditions.into_iter().next().unwrap()
29609 } else {
29610 // Multiple conditions: wrap each OR in parens, then combine with AND
29611 let wrap = |e: Expression| {
29612 Expression::Paren(Box::new(crate::expressions::Paren {
29613 this: e,
29614 trailing_comments: Vec::new(),
29615 }))
29616 };
29617 let mut iter = where_conditions.into_iter();
29618 let first = wrap(iter.next().unwrap());
29619 let second = wrap(iter.next().unwrap());
29620 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
29621 this: Expression::And(Box::new(BinaryOp::new(first, second))),
29622 trailing_comments: Vec::new(),
29623 }));
29624 for cond in iter {
29625 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
29626 }
29627 combined
29628 };
29629
29630 // Build the new SELECT
29631 let mut new_select = select.clone();
29632 new_select.expressions = new_select_exprs;
29633
29634 if new_select.from.is_some() {
29635 let mut all_joins = vec![make_join(series_alias_expr)];
29636 all_joins.extend(joins);
29637 new_select.joins.extend(all_joins);
29638 } else {
29639 new_select.from = Some(From {
29640 expressions: vec![series_alias_expr],
29641 });
29642 new_select.joins.extend(joins);
29643 }
29644
29645 if let Some(ref existing_where) = new_select.where_clause {
29646 let combined = Expression::And(Box::new(BinaryOp::new(
29647 existing_where.this.clone(),
29648 where_expr,
29649 )));
29650 new_select.where_clause = Some(crate::expressions::Where { this: combined });
29651 } else {
29652 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
29653 }
29654
29655 Some(new_select)
29656 }
29657
29658 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
29659 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
29660 match original {
29661 Expression::Unnest(_) => replacement.clone(),
29662 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
29663 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
29664 Expression::Add(op) => {
29665 let left = Self::replace_unnest_with_if(&op.left, replacement);
29666 let right = Self::replace_unnest_with_if(&op.right, replacement);
29667 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
29668 }
29669 Expression::Sub(op) => {
29670 let left = Self::replace_unnest_with_if(&op.left, replacement);
29671 let right = Self::replace_unnest_with_if(&op.right, replacement);
29672 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
29673 }
29674 Expression::Mul(op) => {
29675 let left = Self::replace_unnest_with_if(&op.left, replacement);
29676 let right = Self::replace_unnest_with_if(&op.right, replacement);
29677 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
29678 }
29679 Expression::Div(op) => {
29680 let left = Self::replace_unnest_with_if(&op.left, replacement);
29681 let right = Self::replace_unnest_with_if(&op.right, replacement);
29682 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
29683 }
29684 _ => original.clone(),
29685 }
29686 }
29687
29688 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
29689 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
29690 fn decompose_json_path(path: &str) -> Vec<String> {
29691 let mut parts = Vec::new();
29692 let path = if path.starts_with("$.") {
29693 &path[2..]
29694 } else if path.starts_with('$') {
29695 &path[1..]
29696 } else {
29697 path
29698 };
29699 if path.is_empty() {
29700 return parts;
29701 }
29702 let mut current = String::new();
29703 let chars: Vec<char> = path.chars().collect();
29704 let mut i = 0;
29705 while i < chars.len() {
29706 match chars[i] {
29707 '.' => {
29708 if !current.is_empty() {
29709 parts.push(current.clone());
29710 current.clear();
29711 }
29712 i += 1;
29713 }
29714 '[' => {
29715 if !current.is_empty() {
29716 parts.push(current.clone());
29717 current.clear();
29718 }
29719 i += 1;
29720 let mut bracket_content = String::new();
29721 while i < chars.len() && chars[i] != ']' {
29722 if chars[i] == '"' || chars[i] == '\'' {
29723 let quote = chars[i];
29724 i += 1;
29725 while i < chars.len() && chars[i] != quote {
29726 bracket_content.push(chars[i]);
29727 i += 1;
29728 }
29729 if i < chars.len() {
29730 i += 1;
29731 }
29732 } else {
29733 bracket_content.push(chars[i]);
29734 i += 1;
29735 }
29736 }
29737 if i < chars.len() {
29738 i += 1;
29739 }
29740 if bracket_content != "*" {
29741 parts.push(bracket_content);
29742 }
29743 }
29744 _ => {
29745 current.push(chars[i]);
29746 i += 1;
29747 }
29748 }
29749 }
29750 if !current.is_empty() {
29751 parts.push(current);
29752 }
29753 parts
29754 }
29755
29756 /// Strip `$` prefix from a JSON path, keeping the rest.
29757 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
29758 fn strip_json_dollar_prefix(path: &str) -> String {
29759 if path.starts_with("$.") {
29760 path[2..].to_string()
29761 } else if path.starts_with('$') {
29762 path[1..].to_string()
29763 } else {
29764 path.to_string()
29765 }
29766 }
29767
29768 /// Strip `[*]` wildcards from a JSON path.
29769 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
29770 fn strip_json_wildcards(path: &str) -> String {
29771 path.replace("[*]", "")
29772 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
29773 .trim_end_matches('.')
29774 .to_string()
29775 }
29776
29777 /// Convert bracket notation to dot notation for JSON paths.
29778 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
29779 fn bracket_to_dot_notation(path: &str) -> String {
29780 let mut result = String::new();
29781 let chars: Vec<char> = path.chars().collect();
29782 let mut i = 0;
29783 while i < chars.len() {
29784 if chars[i] == '[' {
29785 // Read bracket content
29786 i += 1;
29787 let mut bracket_content = String::new();
29788 let mut is_quoted = false;
29789 let mut _quote_char = '"';
29790 while i < chars.len() && chars[i] != ']' {
29791 if chars[i] == '"' || chars[i] == '\'' {
29792 is_quoted = true;
29793 _quote_char = chars[i];
29794 i += 1;
29795 while i < chars.len() && chars[i] != _quote_char {
29796 bracket_content.push(chars[i]);
29797 i += 1;
29798 }
29799 if i < chars.len() {
29800 i += 1;
29801 }
29802 } else {
29803 bracket_content.push(chars[i]);
29804 i += 1;
29805 }
29806 }
29807 if i < chars.len() {
29808 i += 1;
29809 } // skip ]
29810 if bracket_content == "*" {
29811 // Keep wildcard as-is
29812 result.push_str("[*]");
29813 } else if is_quoted {
29814 // Quoted bracket -> dot notation with quotes
29815 result.push('.');
29816 result.push('"');
29817 result.push_str(&bracket_content);
29818 result.push('"');
29819 } else {
29820 // Numeric index -> keep as bracket
29821 result.push('[');
29822 result.push_str(&bracket_content);
29823 result.push(']');
29824 }
29825 } else {
29826 result.push(chars[i]);
29827 i += 1;
29828 }
29829 }
29830 result
29831 }
29832
29833 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
29834 /// `$["a b"]` -> `$['a b']`
29835 fn bracket_to_single_quotes(path: &str) -> String {
29836 let mut result = String::new();
29837 let chars: Vec<char> = path.chars().collect();
29838 let mut i = 0;
29839 while i < chars.len() {
29840 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
29841 result.push('[');
29842 result.push('\'');
29843 i += 2; // skip [ and "
29844 while i < chars.len() && chars[i] != '"' {
29845 result.push(chars[i]);
29846 i += 1;
29847 }
29848 if i < chars.len() {
29849 i += 1;
29850 } // skip closing "
29851 result.push('\'');
29852 } else {
29853 result.push(chars[i]);
29854 i += 1;
29855 }
29856 }
29857 result
29858 }
29859
29860 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
29861 /// or PostgreSQL #temp -> TEMPORARY.
29862 /// Also strips # from INSERT INTO #table for non-TSQL targets.
29863 fn transform_select_into(
29864 expr: Expression,
29865 _source: DialectType,
29866 target: DialectType,
29867 ) -> Expression {
29868 use crate::expressions::{CreateTable, Expression, TableRef};
29869
29870 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
29871 if let Expression::Insert(ref insert) = expr {
29872 if insert.table.name.name.starts_with('#')
29873 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
29874 {
29875 let mut new_insert = insert.clone();
29876 new_insert.table.name.name =
29877 insert.table.name.name.trim_start_matches('#').to_string();
29878 return Expression::Insert(new_insert);
29879 }
29880 return expr;
29881 }
29882
29883 if let Expression::Select(ref select) = expr {
29884 if let Some(ref into) = select.into {
29885 let table_name_raw = match &into.this {
29886 Expression::Table(tr) => tr.name.name.clone(),
29887 Expression::Identifier(id) => id.name.clone(),
29888 _ => String::new(),
29889 };
29890 let is_temp = table_name_raw.starts_with('#') || into.temporary;
29891 let clean_name = table_name_raw.trim_start_matches('#').to_string();
29892
29893 match target {
29894 DialectType::DuckDB | DialectType::Snowflake => {
29895 // SELECT INTO -> CREATE TABLE AS SELECT
29896 let mut new_select = select.clone();
29897 new_select.into = None;
29898 let ct = CreateTable {
29899 name: TableRef::new(clean_name),
29900 on_cluster: None,
29901 columns: Vec::new(),
29902 constraints: Vec::new(),
29903 if_not_exists: false,
29904 temporary: is_temp,
29905 or_replace: false,
29906 table_modifier: None,
29907 as_select: Some(Expression::Select(new_select)),
29908 as_select_parenthesized: false,
29909 on_commit: None,
29910 clone_source: None,
29911 clone_at_clause: None,
29912 shallow_clone: false,
29913 is_copy: false,
29914 leading_comments: Vec::new(),
29915 with_properties: Vec::new(),
29916 teradata_post_name_options: Vec::new(),
29917 with_data: None,
29918 with_statistics: None,
29919 teradata_indexes: Vec::new(),
29920 with_cte: None,
29921 properties: Vec::new(),
29922 partition_of: None,
29923 post_table_properties: Vec::new(),
29924 mysql_table_options: Vec::new(),
29925 inherits: Vec::new(),
29926 on_property: None,
29927 copy_grants: false,
29928 using_template: None,
29929 rollup: None,
29930 uuid: None,
29931 with_partition_columns: Vec::new(),
29932 with_connection: None,
29933 };
29934 return Expression::CreateTable(Box::new(ct));
29935 }
29936 DialectType::PostgreSQL | DialectType::Redshift => {
29937 // PostgreSQL: #foo -> INTO TEMPORARY foo
29938 if is_temp && !into.temporary {
29939 let mut new_select = select.clone();
29940 let mut new_into = into.clone();
29941 new_into.temporary = true;
29942 new_into.unlogged = false;
29943 new_into.this = Expression::Table(Box::new(TableRef::new(clean_name)));
29944 new_select.into = Some(new_into);
29945 Expression::Select(new_select)
29946 } else {
29947 expr
29948 }
29949 }
29950 _ => expr,
29951 }
29952 } else {
29953 expr
29954 }
29955 } else {
29956 expr
29957 }
29958 }
29959
29960 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
29961 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
29962 fn transform_create_table_properties(
29963 ct: &mut crate::expressions::CreateTable,
29964 _source: DialectType,
29965 target: DialectType,
29966 ) {
29967 use crate::expressions::{
29968 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
29969 Properties,
29970 };
29971
29972 // Helper to convert a raw property value string to the correct Expression
29973 let value_to_expr = |v: &str| -> Expression {
29974 let trimmed = v.trim();
29975 // Check if it's a quoted string (starts and ends with ')
29976 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
29977 Expression::Literal(Box::new(Literal::String(
29978 trimmed[1..trimmed.len() - 1].to_string(),
29979 )))
29980 }
29981 // Check if it's a number
29982 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
29983 Expression::Literal(Box::new(Literal::Number(trimmed.to_string())))
29984 }
29985 // Check if it's ARRAY[...] or ARRAY(...)
29986 else if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
29987 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
29988 let inner = trimmed
29989 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
29990 .trim_start_matches('[')
29991 .trim_start_matches('(')
29992 .trim_end_matches(']')
29993 .trim_end_matches(')');
29994 let elements: Vec<Expression> = inner
29995 .split(',')
29996 .map(|e| {
29997 let elem = e.trim().trim_matches('\'');
29998 Expression::Literal(Box::new(Literal::String(elem.to_string())))
29999 })
30000 .collect();
30001 Expression::Function(Box::new(crate::expressions::Function::new(
30002 "ARRAY".to_string(),
30003 elements,
30004 )))
30005 }
30006 // Otherwise, just output as identifier (unquoted)
30007 else {
30008 Expression::Identifier(Identifier::new(trimmed.to_string()))
30009 }
30010 };
30011
30012 if ct.with_properties.is_empty() && ct.properties.is_empty() {
30013 return;
30014 }
30015
30016 // Handle Presto-style WITH properties
30017 if !ct.with_properties.is_empty() {
30018 // Extract FORMAT property and remaining properties
30019 let mut format_value: Option<String> = None;
30020 let mut partitioned_by: Option<String> = None;
30021 let mut other_props: Vec<(String, String)> = Vec::new();
30022
30023 for (key, value) in ct.with_properties.drain(..) {
30024 if key.eq_ignore_ascii_case("FORMAT") {
30025 // Strip surrounding quotes from value if present
30026 format_value = Some(value.trim_matches('\'').to_string());
30027 } else if key.eq_ignore_ascii_case("PARTITIONED_BY") {
30028 partitioned_by = Some(value);
30029 } else {
30030 other_props.push((key, value));
30031 }
30032 }
30033
30034 match target {
30035 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30036 // Presto: keep WITH properties but lowercase 'format' key
30037 if let Some(fmt) = format_value {
30038 ct.with_properties
30039 .push(("format".to_string(), format!("'{}'", fmt)));
30040 }
30041 if let Some(part) = partitioned_by {
30042 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
30043 let trimmed = part.trim();
30044 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
30045 // Also handle ARRAY['...'] format - keep as-is
30046 if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
30047 ct.with_properties
30048 .push(("PARTITIONED_BY".to_string(), part));
30049 } else {
30050 // Parse column names from the parenthesized list
30051 let cols: Vec<&str> = inner
30052 .split(',')
30053 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
30054 .collect();
30055 let array_val = format!(
30056 "ARRAY[{}]",
30057 cols.iter()
30058 .map(|c| format!("'{}'", c))
30059 .collect::<Vec<_>>()
30060 .join(", ")
30061 );
30062 ct.with_properties
30063 .push(("PARTITIONED_BY".to_string(), array_val));
30064 }
30065 }
30066 ct.with_properties.extend(other_props);
30067 }
30068 DialectType::Hive => {
30069 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
30070 if let Some(fmt) = format_value {
30071 ct.properties.push(Expression::FileFormatProperty(Box::new(
30072 FileFormatProperty {
30073 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
30074 expressions: vec![],
30075 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
30076 value: true,
30077 }))),
30078 },
30079 )));
30080 }
30081 if let Some(_part) = partitioned_by {
30082 // PARTITIONED_BY handling is complex - move columns to partitioned by
30083 // For now, the partition columns are extracted from the column list
30084 Self::apply_partitioned_by(ct, &_part, target);
30085 }
30086 if !other_props.is_empty() {
30087 let eq_exprs: Vec<Expression> = other_props
30088 .into_iter()
30089 .map(|(k, v)| {
30090 Expression::Eq(Box::new(BinaryOp::new(
30091 Expression::Literal(Box::new(Literal::String(k))),
30092 value_to_expr(&v),
30093 )))
30094 })
30095 .collect();
30096 ct.properties
30097 .push(Expression::Properties(Box::new(Properties {
30098 expressions: eq_exprs,
30099 })));
30100 }
30101 }
30102 DialectType::Spark | DialectType::Databricks => {
30103 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
30104 if let Some(fmt) = format_value {
30105 ct.properties.push(Expression::FileFormatProperty(Box::new(
30106 FileFormatProperty {
30107 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
30108 expressions: vec![],
30109 hive_format: None, // None means USING syntax
30110 },
30111 )));
30112 }
30113 if let Some(_part) = partitioned_by {
30114 Self::apply_partitioned_by(ct, &_part, target);
30115 }
30116 if !other_props.is_empty() {
30117 let eq_exprs: Vec<Expression> = other_props
30118 .into_iter()
30119 .map(|(k, v)| {
30120 Expression::Eq(Box::new(BinaryOp::new(
30121 Expression::Literal(Box::new(Literal::String(k))),
30122 value_to_expr(&v),
30123 )))
30124 })
30125 .collect();
30126 ct.properties
30127 .push(Expression::Properties(Box::new(Properties {
30128 expressions: eq_exprs,
30129 })));
30130 }
30131 }
30132 DialectType::DuckDB => {
30133 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
30134 // Keep nothing
30135 }
30136 _ => {
30137 // For other dialects, keep WITH properties as-is
30138 if let Some(fmt) = format_value {
30139 ct.with_properties
30140 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
30141 }
30142 if let Some(part) = partitioned_by {
30143 ct.with_properties
30144 .push(("PARTITIONED_BY".to_string(), part));
30145 }
30146 ct.with_properties.extend(other_props);
30147 }
30148 }
30149 }
30150
30151 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
30152 // and Hive STORED AS -> Presto WITH (format=...) conversion
30153 if !ct.properties.is_empty() {
30154 let is_presto_target = matches!(
30155 target,
30156 DialectType::Presto | DialectType::Trino | DialectType::Athena
30157 );
30158 let is_duckdb_target = matches!(target, DialectType::DuckDB);
30159
30160 if is_presto_target || is_duckdb_target {
30161 let mut new_properties = Vec::new();
30162 for prop in ct.properties.drain(..) {
30163 match &prop {
30164 Expression::FileFormatProperty(ffp) => {
30165 if is_presto_target {
30166 // Convert STORED AS/USING to WITH (format=...)
30167 if let Some(ref fmt_expr) = ffp.this {
30168 let fmt_str = match fmt_expr.as_ref() {
30169 Expression::Identifier(id) => id.name.clone(),
30170 Expression::Literal(lit)
30171 if matches!(lit.as_ref(), Literal::String(_)) =>
30172 {
30173 let Literal::String(s) = lit.as_ref() else {
30174 unreachable!()
30175 };
30176 s.clone()
30177 }
30178 _ => {
30179 new_properties.push(prop);
30180 continue;
30181 }
30182 };
30183 ct.with_properties
30184 .push(("format".to_string(), format!("'{}'", fmt_str)));
30185 }
30186 }
30187 // DuckDB: just strip file format properties
30188 }
30189 // Convert TBLPROPERTIES to WITH properties for Presto target
30190 Expression::Properties(props) if is_presto_target => {
30191 for expr in &props.expressions {
30192 if let Expression::Eq(eq) = expr {
30193 // Extract key and value from the Eq expression
30194 let key = match &eq.left {
30195 Expression::Literal(lit)
30196 if matches!(lit.as_ref(), Literal::String(_)) =>
30197 {
30198 let Literal::String(s) = lit.as_ref() else {
30199 unreachable!()
30200 };
30201 s.clone()
30202 }
30203 Expression::Identifier(id) => id.name.clone(),
30204 _ => continue,
30205 };
30206 let value = match &eq.right {
30207 Expression::Literal(lit)
30208 if matches!(lit.as_ref(), Literal::String(_)) =>
30209 {
30210 let Literal::String(s) = lit.as_ref() else {
30211 unreachable!()
30212 };
30213 format!("'{}'", s)
30214 }
30215 Expression::Literal(lit)
30216 if matches!(lit.as_ref(), Literal::Number(_)) =>
30217 {
30218 let Literal::Number(n) = lit.as_ref() else {
30219 unreachable!()
30220 };
30221 n.clone()
30222 }
30223 Expression::Identifier(id) => id.name.clone(),
30224 _ => continue,
30225 };
30226 ct.with_properties.push((key, value));
30227 }
30228 }
30229 }
30230 // Convert PartitionedByProperty for Presto target
30231 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
30232 // Check if it contains ColumnDef expressions (Hive-style with types)
30233 if let Expression::Tuple(ref tuple) = *pbp.this {
30234 let mut col_names: Vec<String> = Vec::new();
30235 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
30236 let mut has_col_defs = false;
30237 for expr in &tuple.expressions {
30238 if let Expression::ColumnDef(ref cd) = expr {
30239 has_col_defs = true;
30240 col_names.push(cd.name.name.clone());
30241 col_defs.push(*cd.clone());
30242 } else if let Expression::Column(ref col) = expr {
30243 col_names.push(col.name.name.clone());
30244 } else if let Expression::Identifier(ref id) = expr {
30245 col_names.push(id.name.clone());
30246 } else {
30247 // For function expressions like MONTHS(y), serialize to SQL
30248 let generic = Dialect::get(DialectType::Generic);
30249 if let Ok(sql) = generic.generate(expr) {
30250 col_names.push(sql);
30251 }
30252 }
30253 }
30254 if has_col_defs {
30255 // Merge partition column defs into the main column list
30256 for cd in col_defs {
30257 ct.columns.push(cd);
30258 }
30259 }
30260 if !col_names.is_empty() {
30261 // Add PARTITIONED_BY property
30262 let array_val = format!(
30263 "ARRAY[{}]",
30264 col_names
30265 .iter()
30266 .map(|n| format!("'{}'", n))
30267 .collect::<Vec<_>>()
30268 .join(", ")
30269 );
30270 ct.with_properties
30271 .push(("PARTITIONED_BY".to_string(), array_val));
30272 }
30273 }
30274 // Skip - don't keep in properties
30275 }
30276 _ => {
30277 if !is_duckdb_target {
30278 new_properties.push(prop);
30279 }
30280 }
30281 }
30282 }
30283 ct.properties = new_properties;
30284 } else {
30285 // For Hive/Spark targets, unquote format names in STORED AS
30286 for prop in &mut ct.properties {
30287 if let Expression::FileFormatProperty(ref mut ffp) = prop {
30288 if let Some(ref mut fmt_expr) = ffp.this {
30289 if let Expression::Literal(lit) = fmt_expr.as_ref() {
30290 if let Literal::String(s) = lit.as_ref() {
30291 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
30292 let unquoted = s.clone();
30293 *fmt_expr =
30294 Box::new(Expression::Identifier(Identifier::new(unquoted)));
30295 }
30296 }
30297 }
30298 }
30299 }
30300 }
30301 }
30302 }
30303
30304 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
30305 fn apply_partitioned_by(
30306 ct: &mut crate::expressions::CreateTable,
30307 partitioned_by_value: &str,
30308 target: DialectType,
30309 ) {
30310 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
30311
30312 // Parse the ARRAY['col1', 'col2'] value to extract column names
30313 let mut col_names: Vec<String> = Vec::new();
30314 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
30315 let inner = partitioned_by_value
30316 .trim()
30317 .trim_start_matches("ARRAY")
30318 .trim_start_matches('[')
30319 .trim_start_matches('(')
30320 .trim_end_matches(']')
30321 .trim_end_matches(')');
30322 for part in inner.split(',') {
30323 let col = part.trim().trim_matches('\'').trim_matches('"');
30324 if !col.is_empty() {
30325 col_names.push(col.to_string());
30326 }
30327 }
30328
30329 if col_names.is_empty() {
30330 return;
30331 }
30332
30333 if matches!(target, DialectType::Hive) {
30334 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
30335 let mut partition_col_defs = Vec::new();
30336 for col_name in &col_names {
30337 // Find and remove from columns
30338 if let Some(pos) = ct
30339 .columns
30340 .iter()
30341 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
30342 {
30343 let col_def = ct.columns.remove(pos);
30344 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
30345 }
30346 }
30347 if !partition_col_defs.is_empty() {
30348 ct.properties
30349 .push(Expression::PartitionedByProperty(Box::new(
30350 PartitionedByProperty {
30351 this: Box::new(Expression::Tuple(Box::new(Tuple {
30352 expressions: partition_col_defs,
30353 }))),
30354 },
30355 )));
30356 }
30357 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
30358 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
30359 // Use quoted identifiers to match the quoting style of the original column definitions
30360 let partition_exprs: Vec<Expression> = col_names
30361 .iter()
30362 .map(|name| {
30363 // Check if the column exists in the column list and use its quoting
30364 let is_quoted = ct
30365 .columns
30366 .iter()
30367 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
30368 let ident = if is_quoted {
30369 Identifier::quoted(name.clone())
30370 } else {
30371 Identifier::new(name.clone())
30372 };
30373 Expression::boxed_column(Column {
30374 name: ident,
30375 table: None,
30376 join_mark: false,
30377 trailing_comments: Vec::new(),
30378 span: None,
30379 inferred_type: None,
30380 })
30381 })
30382 .collect();
30383 ct.properties
30384 .push(Expression::PartitionedByProperty(Box::new(
30385 PartitionedByProperty {
30386 this: Box::new(Expression::Tuple(Box::new(Tuple {
30387 expressions: partition_exprs,
30388 }))),
30389 },
30390 )));
30391 }
30392 // DuckDB: strip partitioned_by entirely (already handled)
30393 }
30394
30395 /// Convert a DataType to Spark's type string format (using angle brackets)
30396 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
30397 use crate::expressions::DataType;
30398 match dt {
30399 DataType::Int { .. } => "INT".to_string(),
30400 DataType::BigInt { .. } => "BIGINT".to_string(),
30401 DataType::SmallInt { .. } => "SMALLINT".to_string(),
30402 DataType::TinyInt { .. } => "TINYINT".to_string(),
30403 DataType::Float { .. } => "FLOAT".to_string(),
30404 DataType::Double { .. } => "DOUBLE".to_string(),
30405 DataType::Decimal {
30406 precision: Some(p),
30407 scale: Some(s),
30408 } => format!("DECIMAL({}, {})", p, s),
30409 DataType::Decimal {
30410 precision: Some(p), ..
30411 } => format!("DECIMAL({})", p),
30412 DataType::Decimal { .. } => "DECIMAL".to_string(),
30413 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
30414 "STRING".to_string()
30415 }
30416 DataType::Char { .. } => "STRING".to_string(),
30417 DataType::Boolean => "BOOLEAN".to_string(),
30418 DataType::Date => "DATE".to_string(),
30419 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
30420 DataType::Json | DataType::JsonB => "STRING".to_string(),
30421 DataType::Binary { .. } => "BINARY".to_string(),
30422 DataType::Array { element_type, .. } => {
30423 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
30424 }
30425 DataType::Map {
30426 key_type,
30427 value_type,
30428 } => format!(
30429 "MAP<{}, {}>",
30430 Self::data_type_to_spark_string(key_type),
30431 Self::data_type_to_spark_string(value_type)
30432 ),
30433 DataType::Struct { fields, .. } => {
30434 let field_strs: Vec<String> = fields
30435 .iter()
30436 .map(|f| {
30437 if f.name.is_empty() {
30438 Self::data_type_to_spark_string(&f.data_type)
30439 } else {
30440 format!(
30441 "{}: {}",
30442 f.name,
30443 Self::data_type_to_spark_string(&f.data_type)
30444 )
30445 }
30446 })
30447 .collect();
30448 format!("STRUCT<{}>", field_strs.join(", "))
30449 }
30450 DataType::Custom { name } => name.clone(),
30451 _ => format!("{:?}", dt),
30452 }
30453 }
30454
30455 /// Extract value and unit from an Interval expression
30456 /// Returns (value_expression, IntervalUnit)
30457 fn extract_interval_parts(
30458 interval_expr: &Expression,
30459 ) -> (Expression, crate::expressions::IntervalUnit) {
30460 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
30461
30462 if let Expression::Interval(iv) = interval_expr {
30463 let val = iv.this.clone().unwrap_or(Expression::number(0));
30464 let unit = match &iv.unit {
30465 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
30466 None => {
30467 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
30468 if let Expression::Literal(lit) = &val {
30469 if let crate::expressions::Literal::String(s) = lit.as_ref() {
30470 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
30471 if parts.len() == 2 {
30472 let unit_str = parts[1].trim().to_ascii_uppercase();
30473 let parsed_unit = match unit_str.as_str() {
30474 "YEAR" | "YEARS" => IntervalUnit::Year,
30475 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
30476 "MONTH" | "MONTHS" => IntervalUnit::Month,
30477 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
30478 "DAY" | "DAYS" => IntervalUnit::Day,
30479 "HOUR" | "HOURS" => IntervalUnit::Hour,
30480 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
30481 "SECOND" | "SECONDS" => IntervalUnit::Second,
30482 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
30483 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
30484 _ => IntervalUnit::Day,
30485 };
30486 // Return just the numeric part as value and parsed unit
30487 return (
30488 Expression::Literal(Box::new(
30489 crate::expressions::Literal::String(parts[0].to_string()),
30490 )),
30491 parsed_unit,
30492 );
30493 }
30494 IntervalUnit::Day
30495 } else {
30496 IntervalUnit::Day
30497 }
30498 } else {
30499 IntervalUnit::Day
30500 }
30501 }
30502 _ => IntervalUnit::Day,
30503 };
30504 (val, unit)
30505 } else {
30506 // Not an interval - pass through
30507 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
30508 }
30509 }
30510
30511 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
30512 fn normalize_bigquery_function(
30513 e: Expression,
30514 source: DialectType,
30515 target: DialectType,
30516 ) -> Result<Expression> {
30517 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
30518
30519 let f = if let Expression::Function(f) = e {
30520 *f
30521 } else {
30522 return Ok(e);
30523 };
30524 let name = f.name.to_ascii_uppercase();
30525 let mut args = f.args;
30526
30527 /// Helper to extract unit string from an identifier, column, or literal expression
30528 fn get_unit_str(expr: &Expression) -> String {
30529 match expr {
30530 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
30531 Expression::Var(v) => v.this.to_ascii_uppercase(),
30532 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
30533 let Literal::String(s) = lit.as_ref() else {
30534 unreachable!()
30535 };
30536 s.to_ascii_uppercase()
30537 }
30538 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
30539 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
30540 Expression::Function(f) => {
30541 let base = f.name.to_ascii_uppercase();
30542 if !f.args.is_empty() {
30543 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
30544 let inner = get_unit_str(&f.args[0]);
30545 format!("{}({})", base, inner)
30546 } else {
30547 base
30548 }
30549 }
30550 _ => "DAY".to_string(),
30551 }
30552 }
30553
30554 /// Parse unit string to IntervalUnit
30555 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
30556 match s {
30557 "YEAR" => crate::expressions::IntervalUnit::Year,
30558 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
30559 "MONTH" => crate::expressions::IntervalUnit::Month,
30560 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
30561 "DAY" => crate::expressions::IntervalUnit::Day,
30562 "HOUR" => crate::expressions::IntervalUnit::Hour,
30563 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30564 "SECOND" => crate::expressions::IntervalUnit::Second,
30565 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
30566 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
30567 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
30568 _ => crate::expressions::IntervalUnit::Day,
30569 }
30570 }
30571
30572 match name.as_str() {
30573 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
30574 // (BigQuery: result = date1 - date2, Standard: result = end - start)
30575 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
30576 let date1 = args.remove(0);
30577 let date2 = args.remove(0);
30578 let unit_expr = args.remove(0);
30579 let unit_str = get_unit_str(&unit_expr);
30580
30581 if matches!(target, DialectType::BigQuery) {
30582 // BigQuery -> BigQuery: just uppercase the unit
30583 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
30584 return Ok(Expression::Function(Box::new(Function::new(
30585 f.name,
30586 vec![date1, date2, unit],
30587 ))));
30588 }
30589
30590 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
30591 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
30592 if matches!(target, DialectType::Snowflake) {
30593 return Ok(Expression::TimestampDiff(Box::new(
30594 crate::expressions::TimestampDiff {
30595 this: Box::new(date2),
30596 expression: Box::new(date1),
30597 unit: Some(unit_str),
30598 },
30599 )));
30600 }
30601
30602 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
30603 if matches!(target, DialectType::DuckDB) {
30604 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
30605 // CAST to TIME
30606 let cast_fn = |e: Expression| -> Expression {
30607 match e {
30608 Expression::Literal(lit)
30609 if matches!(lit.as_ref(), Literal::String(_)) =>
30610 {
30611 let Literal::String(s) = lit.as_ref() else {
30612 unreachable!()
30613 };
30614 Expression::Cast(Box::new(Cast {
30615 this: Expression::Literal(Box::new(Literal::String(
30616 s.clone(),
30617 ))),
30618 to: DataType::Custom {
30619 name: "TIME".to_string(),
30620 },
30621 trailing_comments: vec![],
30622 double_colon_syntax: false,
30623 format: None,
30624 default: None,
30625 inferred_type: None,
30626 }))
30627 }
30628 other => other,
30629 }
30630 };
30631 (cast_fn(date1), cast_fn(date2))
30632 } else if name == "DATETIME_DIFF" {
30633 // CAST to TIMESTAMP
30634 (
30635 Self::ensure_cast_timestamp(date1),
30636 Self::ensure_cast_timestamp(date2),
30637 )
30638 } else {
30639 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
30640 (
30641 Self::ensure_cast_timestamptz(date1),
30642 Self::ensure_cast_timestamptz(date2),
30643 )
30644 };
30645 return Ok(Expression::Function(Box::new(Function::new(
30646 "DATE_DIFF".to_string(),
30647 vec![
30648 Expression::Literal(Box::new(Literal::String(unit_str))),
30649 cast_d2,
30650 cast_d1,
30651 ],
30652 ))));
30653 }
30654
30655 // Convert to standard TIMESTAMPDIFF(unit, start, end)
30656 let unit = Expression::Identifier(Identifier::new(unit_str));
30657 Ok(Expression::Function(Box::new(Function::new(
30658 "TIMESTAMPDIFF".to_string(),
30659 vec![unit, date2, date1],
30660 ))))
30661 }
30662
30663 // DATEDIFF(unit, start, end) -> target-specific form
30664 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
30665 "DATEDIFF" if args.len() == 3 => {
30666 let arg0 = args.remove(0);
30667 let arg1 = args.remove(0);
30668 let arg2 = args.remove(0);
30669 let unit_str = get_unit_str(&arg0);
30670
30671 // Redshift DATEDIFF(unit, start, end) order: result = end - start
30672 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
30673 // TSQL DATEDIFF(unit, start, end) order: result = end - start
30674
30675 if matches!(target, DialectType::Snowflake) {
30676 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
30677 let unit = Expression::Identifier(Identifier::new(unit_str));
30678 return Ok(Expression::Function(Box::new(Function::new(
30679 "DATEDIFF".to_string(),
30680 vec![unit, arg1, arg2],
30681 ))));
30682 }
30683
30684 if matches!(target, DialectType::DuckDB) {
30685 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
30686 let cast_d1 = Self::ensure_cast_timestamp(arg1);
30687 let cast_d2 = Self::ensure_cast_timestamp(arg2);
30688 return Ok(Expression::Function(Box::new(Function::new(
30689 "DATE_DIFF".to_string(),
30690 vec![
30691 Expression::Literal(Box::new(Literal::String(unit_str))),
30692 cast_d1,
30693 cast_d2,
30694 ],
30695 ))));
30696 }
30697
30698 if matches!(target, DialectType::BigQuery) {
30699 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
30700 let cast_d1 = Self::ensure_cast_datetime(arg1);
30701 let cast_d2 = Self::ensure_cast_datetime(arg2);
30702 let unit = Expression::Identifier(Identifier::new(unit_str));
30703 return Ok(Expression::Function(Box::new(Function::new(
30704 "DATE_DIFF".to_string(),
30705 vec![cast_d2, cast_d1, unit],
30706 ))));
30707 }
30708
30709 if matches!(target, DialectType::Spark | DialectType::Databricks) {
30710 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
30711 let unit = Expression::Identifier(Identifier::new(unit_str));
30712 return Ok(Expression::Function(Box::new(Function::new(
30713 "DATEDIFF".to_string(),
30714 vec![unit, arg1, arg2],
30715 ))));
30716 }
30717
30718 if matches!(target, DialectType::Hive) {
30719 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
30720 match unit_str.as_str() {
30721 "MONTH" => {
30722 return Ok(Expression::Function(Box::new(Function::new(
30723 "CAST".to_string(),
30724 vec![Expression::Function(Box::new(Function::new(
30725 "MONTHS_BETWEEN".to_string(),
30726 vec![arg2, arg1],
30727 )))],
30728 ))));
30729 }
30730 "WEEK" => {
30731 return Ok(Expression::Cast(Box::new(Cast {
30732 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
30733 Expression::Function(Box::new(Function::new(
30734 "DATEDIFF".to_string(),
30735 vec![arg2, arg1],
30736 ))),
30737 Expression::Literal(Box::new(Literal::Number("7".to_string()))),
30738 ))),
30739 to: DataType::Int {
30740 length: None,
30741 integer_spelling: false,
30742 },
30743 trailing_comments: vec![],
30744 double_colon_syntax: false,
30745 format: None,
30746 default: None,
30747 inferred_type: None,
30748 })));
30749 }
30750 _ => {
30751 // Default: DATEDIFF(end, start) for DAY
30752 return Ok(Expression::Function(Box::new(Function::new(
30753 "DATEDIFF".to_string(),
30754 vec![arg2, arg1],
30755 ))));
30756 }
30757 }
30758 }
30759
30760 if matches!(
30761 target,
30762 DialectType::Presto | DialectType::Trino | DialectType::Athena
30763 ) {
30764 // Presto/Trino: DATE_DIFF('UNIT', start, end)
30765 return Ok(Expression::Function(Box::new(Function::new(
30766 "DATE_DIFF".to_string(),
30767 vec![
30768 Expression::Literal(Box::new(Literal::String(unit_str))),
30769 arg1,
30770 arg2,
30771 ],
30772 ))));
30773 }
30774
30775 if matches!(target, DialectType::TSQL) {
30776 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
30777 let cast_d2 = Self::ensure_cast_datetime2(arg2);
30778 let unit = Expression::Identifier(Identifier::new(unit_str));
30779 return Ok(Expression::Function(Box::new(Function::new(
30780 "DATEDIFF".to_string(),
30781 vec![unit, arg1, cast_d2],
30782 ))));
30783 }
30784
30785 if matches!(target, DialectType::PostgreSQL) {
30786 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
30787 // For now, use DATEDIFF (passthrough) with uppercased unit
30788 let unit = Expression::Identifier(Identifier::new(unit_str));
30789 return Ok(Expression::Function(Box::new(Function::new(
30790 "DATEDIFF".to_string(),
30791 vec![unit, arg1, arg2],
30792 ))));
30793 }
30794
30795 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
30796 let unit = Expression::Identifier(Identifier::new(unit_str));
30797 Ok(Expression::Function(Box::new(Function::new(
30798 "DATEDIFF".to_string(),
30799 vec![unit, arg1, arg2],
30800 ))))
30801 }
30802
30803 // DATE_DIFF(date1, date2, unit) -> standard form
30804 "DATE_DIFF" if args.len() == 3 => {
30805 let date1 = args.remove(0);
30806 let date2 = args.remove(0);
30807 let unit_expr = args.remove(0);
30808 let unit_str = get_unit_str(&unit_expr);
30809
30810 if matches!(target, DialectType::BigQuery) {
30811 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
30812 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
30813 "WEEK".to_string()
30814 } else {
30815 unit_str
30816 };
30817 let norm_d1 = Self::date_literal_to_cast(date1);
30818 let norm_d2 = Self::date_literal_to_cast(date2);
30819 let unit = Expression::Identifier(Identifier::new(norm_unit));
30820 return Ok(Expression::Function(Box::new(Function::new(
30821 f.name,
30822 vec![norm_d1, norm_d2, unit],
30823 ))));
30824 }
30825
30826 if matches!(target, DialectType::MySQL) {
30827 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
30828 let norm_d1 = Self::date_literal_to_cast(date1);
30829 let norm_d2 = Self::date_literal_to_cast(date2);
30830 return Ok(Expression::Function(Box::new(Function::new(
30831 "DATEDIFF".to_string(),
30832 vec![norm_d1, norm_d2],
30833 ))));
30834 }
30835
30836 if matches!(target, DialectType::StarRocks) {
30837 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
30838 let norm_d1 = Self::date_literal_to_cast(date1);
30839 let norm_d2 = Self::date_literal_to_cast(date2);
30840 return Ok(Expression::Function(Box::new(Function::new(
30841 "DATE_DIFF".to_string(),
30842 vec![
30843 Expression::Literal(Box::new(Literal::String(unit_str))),
30844 norm_d1,
30845 norm_d2,
30846 ],
30847 ))));
30848 }
30849
30850 if matches!(target, DialectType::DuckDB) {
30851 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
30852 let norm_d1 = Self::ensure_cast_date(date1);
30853 let norm_d2 = Self::ensure_cast_date(date2);
30854
30855 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
30856 let is_week_variant = unit_str == "WEEK"
30857 || unit_str.starts_with("WEEK(")
30858 || unit_str == "ISOWEEK";
30859 if is_week_variant {
30860 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
30861 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
30862 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
30863 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
30864 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
30865 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
30866 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
30867 Some("1") // Shift Sunday to Monday alignment
30868 } else if unit_str == "WEEK(SATURDAY)" {
30869 Some("-5")
30870 } else if unit_str == "WEEK(TUESDAY)" {
30871 Some("-1")
30872 } else if unit_str == "WEEK(WEDNESDAY)" {
30873 Some("-2")
30874 } else if unit_str == "WEEK(THURSDAY)" {
30875 Some("-3")
30876 } else if unit_str == "WEEK(FRIDAY)" {
30877 Some("-4")
30878 } else {
30879 Some("1") // default to Sunday
30880 };
30881
30882 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
30883 let shifted = if let Some(off) = offset {
30884 let interval =
30885 Expression::Interval(Box::new(crate::expressions::Interval {
30886 this: Some(Expression::Literal(Box::new(Literal::String(
30887 off.to_string(),
30888 )))),
30889 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30890 unit: crate::expressions::IntervalUnit::Day,
30891 use_plural: false,
30892 }),
30893 }));
30894 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
30895 date, interval,
30896 )))
30897 } else {
30898 date
30899 };
30900 Expression::Function(Box::new(Function::new(
30901 "DATE_TRUNC".to_string(),
30902 vec![
30903 Expression::Literal(Box::new(Literal::String(
30904 "WEEK".to_string(),
30905 ))),
30906 shifted,
30907 ],
30908 )))
30909 };
30910
30911 let trunc_d2 = make_trunc(norm_d2, day_offset);
30912 let trunc_d1 = make_trunc(norm_d1, day_offset);
30913 return Ok(Expression::Function(Box::new(Function::new(
30914 "DATE_DIFF".to_string(),
30915 vec![
30916 Expression::Literal(Box::new(Literal::String("WEEK".to_string()))),
30917 trunc_d2,
30918 trunc_d1,
30919 ],
30920 ))));
30921 }
30922
30923 return Ok(Expression::Function(Box::new(Function::new(
30924 "DATE_DIFF".to_string(),
30925 vec![
30926 Expression::Literal(Box::new(Literal::String(unit_str))),
30927 norm_d2,
30928 norm_d1,
30929 ],
30930 ))));
30931 }
30932
30933 // Default: DATEDIFF(unit, date2, date1)
30934 let unit = Expression::Identifier(Identifier::new(unit_str));
30935 Ok(Expression::Function(Box::new(Function::new(
30936 "DATEDIFF".to_string(),
30937 vec![unit, date2, date1],
30938 ))))
30939 }
30940
30941 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
30942 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
30943 let ts = args.remove(0);
30944 let interval_expr = args.remove(0);
30945 let (val, unit) = Self::extract_interval_parts(&interval_expr);
30946
30947 match target {
30948 DialectType::Snowflake => {
30949 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
30950 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
30951 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
30952 let unit_str = Self::interval_unit_to_string(&unit);
30953 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
30954 Ok(Expression::TimestampAdd(Box::new(
30955 crate::expressions::TimestampAdd {
30956 this: Box::new(val),
30957 expression: Box::new(cast_ts),
30958 unit: Some(unit_str.to_string()),
30959 },
30960 )))
30961 }
30962 DialectType::Spark | DialectType::Databricks => {
30963 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
30964 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
30965 let interval =
30966 Expression::Interval(Box::new(crate::expressions::Interval {
30967 this: Some(val),
30968 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30969 unit,
30970 use_plural: false,
30971 }),
30972 }));
30973 Ok(Expression::Add(Box::new(
30974 crate::expressions::BinaryOp::new(ts, interval),
30975 )))
30976 } else if name == "DATETIME_ADD"
30977 && matches!(target, DialectType::Databricks)
30978 {
30979 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
30980 let unit_str = Self::interval_unit_to_string(&unit);
30981 Ok(Expression::Function(Box::new(Function::new(
30982 "TIMESTAMPADD".to_string(),
30983 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
30984 ))))
30985 } else {
30986 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
30987 let unit_str = Self::interval_unit_to_string(&unit);
30988 let cast_ts =
30989 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
30990 Self::maybe_cast_ts(ts)
30991 } else {
30992 ts
30993 };
30994 Ok(Expression::Function(Box::new(Function::new(
30995 "DATE_ADD".to_string(),
30996 vec![
30997 Expression::Identifier(Identifier::new(unit_str)),
30998 val,
30999 cast_ts,
31000 ],
31001 ))))
31002 }
31003 }
31004 DialectType::MySQL => {
31005 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
31006 let mysql_ts = if name.starts_with("TIMESTAMP") {
31007 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
31008 match &ts {
31009 Expression::Function(ref inner_f)
31010 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
31011 {
31012 // Already wrapped, keep as-is
31013 ts
31014 }
31015 _ => {
31016 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
31017 let unwrapped = match ts {
31018 Expression::Literal(lit)
31019 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
31020 {
31021 let Literal::Timestamp(s) = lit.as_ref() else {
31022 unreachable!()
31023 };
31024 Expression::Literal(Box::new(Literal::String(
31025 s.clone(),
31026 )))
31027 }
31028 other => other,
31029 };
31030 Expression::Function(Box::new(Function::new(
31031 "TIMESTAMP".to_string(),
31032 vec![unwrapped],
31033 )))
31034 }
31035 }
31036 } else {
31037 ts
31038 };
31039 Ok(Expression::DateAdd(Box::new(
31040 crate::expressions::DateAddFunc {
31041 this: mysql_ts,
31042 interval: val,
31043 unit,
31044 },
31045 )))
31046 }
31047 _ => {
31048 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
31049 let cast_ts = if matches!(target, DialectType::DuckDB) {
31050 if name == "DATETIME_ADD" {
31051 Self::ensure_cast_timestamp(ts)
31052 } else if name.starts_with("TIMESTAMP") {
31053 Self::maybe_cast_ts_to_tz(ts, &name)
31054 } else {
31055 ts
31056 }
31057 } else {
31058 ts
31059 };
31060 Ok(Expression::DateAdd(Box::new(
31061 crate::expressions::DateAddFunc {
31062 this: cast_ts,
31063 interval: val,
31064 unit,
31065 },
31066 )))
31067 }
31068 }
31069 }
31070
31071 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
31072 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
31073 let ts = args.remove(0);
31074 let interval_expr = args.remove(0);
31075 let (val, unit) = Self::extract_interval_parts(&interval_expr);
31076
31077 match target {
31078 DialectType::Snowflake => {
31079 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
31080 let unit_str = Self::interval_unit_to_string(&unit);
31081 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
31082 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
31083 val,
31084 Expression::Neg(Box::new(crate::expressions::UnaryOp {
31085 this: Expression::number(1),
31086 inferred_type: None,
31087 })),
31088 )));
31089 Ok(Expression::TimestampAdd(Box::new(
31090 crate::expressions::TimestampAdd {
31091 this: Box::new(neg_val),
31092 expression: Box::new(cast_ts),
31093 unit: Some(unit_str.to_string()),
31094 },
31095 )))
31096 }
31097 DialectType::Spark | DialectType::Databricks => {
31098 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
31099 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
31100 {
31101 // Spark: ts - INTERVAL val UNIT
31102 let cast_ts = if name.starts_with("TIMESTAMP") {
31103 Self::maybe_cast_ts(ts)
31104 } else {
31105 ts
31106 };
31107 let interval =
31108 Expression::Interval(Box::new(crate::expressions::Interval {
31109 this: Some(val),
31110 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31111 unit,
31112 use_plural: false,
31113 }),
31114 }));
31115 Ok(Expression::Sub(Box::new(
31116 crate::expressions::BinaryOp::new(cast_ts, interval),
31117 )))
31118 } else {
31119 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
31120 let unit_str = Self::interval_unit_to_string(&unit);
31121 let neg_val =
31122 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
31123 val,
31124 Expression::Neg(Box::new(crate::expressions::UnaryOp {
31125 this: Expression::number(1),
31126 inferred_type: None,
31127 })),
31128 )));
31129 Ok(Expression::Function(Box::new(Function::new(
31130 "TIMESTAMPADD".to_string(),
31131 vec![
31132 Expression::Identifier(Identifier::new(unit_str)),
31133 neg_val,
31134 ts,
31135 ],
31136 ))))
31137 }
31138 }
31139 DialectType::MySQL => {
31140 let mysql_ts = if name.starts_with("TIMESTAMP") {
31141 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
31142 match &ts {
31143 Expression::Function(ref inner_f)
31144 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
31145 {
31146 // Already wrapped, keep as-is
31147 ts
31148 }
31149 _ => {
31150 let unwrapped = match ts {
31151 Expression::Literal(lit)
31152 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
31153 {
31154 let Literal::Timestamp(s) = lit.as_ref() else {
31155 unreachable!()
31156 };
31157 Expression::Literal(Box::new(Literal::String(
31158 s.clone(),
31159 )))
31160 }
31161 other => other,
31162 };
31163 Expression::Function(Box::new(Function::new(
31164 "TIMESTAMP".to_string(),
31165 vec![unwrapped],
31166 )))
31167 }
31168 }
31169 } else {
31170 ts
31171 };
31172 Ok(Expression::DateSub(Box::new(
31173 crate::expressions::DateAddFunc {
31174 this: mysql_ts,
31175 interval: val,
31176 unit,
31177 },
31178 )))
31179 }
31180 _ => {
31181 let cast_ts = if matches!(target, DialectType::DuckDB) {
31182 if name == "DATETIME_SUB" {
31183 Self::ensure_cast_timestamp(ts)
31184 } else if name.starts_with("TIMESTAMP") {
31185 Self::maybe_cast_ts_to_tz(ts, &name)
31186 } else {
31187 ts
31188 }
31189 } else {
31190 ts
31191 };
31192 Ok(Expression::DateSub(Box::new(
31193 crate::expressions::DateAddFunc {
31194 this: cast_ts,
31195 interval: val,
31196 unit,
31197 },
31198 )))
31199 }
31200 }
31201 }
31202
31203 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
31204 "DATE_SUB" if args.len() == 2 => {
31205 let date = args.remove(0);
31206 let interval_expr = args.remove(0);
31207 let (val, unit) = Self::extract_interval_parts(&interval_expr);
31208
31209 match target {
31210 DialectType::Databricks | DialectType::Spark => {
31211 // Databricks/Spark: DATE_ADD(date, -val)
31212 // Use DateAdd expression with negative val so it generates correctly
31213 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
31214 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
31215 // Instead, we directly output as a simple negated DateSub
31216 Ok(Expression::DateSub(Box::new(
31217 crate::expressions::DateAddFunc {
31218 this: date,
31219 interval: val,
31220 unit,
31221 },
31222 )))
31223 }
31224 DialectType::DuckDB => {
31225 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
31226 let cast_date = Self::ensure_cast_date(date);
31227 let interval =
31228 Expression::Interval(Box::new(crate::expressions::Interval {
31229 this: Some(val),
31230 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31231 unit,
31232 use_plural: false,
31233 }),
31234 }));
31235 Ok(Expression::Sub(Box::new(
31236 crate::expressions::BinaryOp::new(cast_date, interval),
31237 )))
31238 }
31239 DialectType::Snowflake => {
31240 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
31241 // Just ensure the date is cast properly
31242 let cast_date = Self::ensure_cast_date(date);
31243 Ok(Expression::DateSub(Box::new(
31244 crate::expressions::DateAddFunc {
31245 this: cast_date,
31246 interval: val,
31247 unit,
31248 },
31249 )))
31250 }
31251 DialectType::PostgreSQL => {
31252 // PostgreSQL: date - INTERVAL 'val UNIT'
31253 let unit_str = Self::interval_unit_to_string(&unit);
31254 let interval =
31255 Expression::Interval(Box::new(crate::expressions::Interval {
31256 this: Some(Expression::Literal(Box::new(Literal::String(
31257 format!("{} {}", Self::expr_to_string(&val), unit_str),
31258 )))),
31259 unit: None,
31260 }));
31261 Ok(Expression::Sub(Box::new(
31262 crate::expressions::BinaryOp::new(date, interval),
31263 )))
31264 }
31265 _ => Ok(Expression::DateSub(Box::new(
31266 crate::expressions::DateAddFunc {
31267 this: date,
31268 interval: val,
31269 unit,
31270 },
31271 ))),
31272 }
31273 }
31274
31275 // DATEADD(unit, val, date) -> target-specific form
31276 // Used by: Redshift, Snowflake, TSQL, ClickHouse
31277 "DATEADD" if args.len() == 3 => {
31278 let arg0 = args.remove(0);
31279 let arg1 = args.remove(0);
31280 let arg2 = args.remove(0);
31281 let unit_str = get_unit_str(&arg0);
31282
31283 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
31284 // Keep DATEADD(UNIT, val, date) with uppercased unit
31285 let unit = Expression::Identifier(Identifier::new(unit_str));
31286 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
31287 let date = if matches!(target, DialectType::TSQL)
31288 && !matches!(
31289 source,
31290 DialectType::Spark | DialectType::Databricks | DialectType::Hive
31291 ) {
31292 Self::ensure_cast_datetime2(arg2)
31293 } else {
31294 arg2
31295 };
31296 return Ok(Expression::Function(Box::new(Function::new(
31297 "DATEADD".to_string(),
31298 vec![unit, arg1, date],
31299 ))));
31300 }
31301
31302 if matches!(target, DialectType::DuckDB) {
31303 // DuckDB: date + INTERVAL 'val' UNIT
31304 let iu = parse_interval_unit(&unit_str);
31305 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31306 this: Some(arg1),
31307 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31308 unit: iu,
31309 use_plural: false,
31310 }),
31311 }));
31312 let cast_date = Self::ensure_cast_timestamp(arg2);
31313 return Ok(Expression::Add(Box::new(
31314 crate::expressions::BinaryOp::new(cast_date, interval),
31315 )));
31316 }
31317
31318 if matches!(target, DialectType::BigQuery) {
31319 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
31320 let iu = parse_interval_unit(&unit_str);
31321 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31322 this: Some(arg1),
31323 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31324 unit: iu,
31325 use_plural: false,
31326 }),
31327 }));
31328 return Ok(Expression::Function(Box::new(Function::new(
31329 "DATE_ADD".to_string(),
31330 vec![arg2, interval],
31331 ))));
31332 }
31333
31334 if matches!(target, DialectType::Databricks) {
31335 // Databricks: keep DATEADD(UNIT, val, date) format
31336 let unit = Expression::Identifier(Identifier::new(unit_str));
31337 return Ok(Expression::Function(Box::new(Function::new(
31338 "DATEADD".to_string(),
31339 vec![unit, arg1, arg2],
31340 ))));
31341 }
31342
31343 if matches!(target, DialectType::Spark) {
31344 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
31345 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
31346 if let Expression::Literal(lit) = &expr {
31347 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
31348 if let Ok(val) = n.parse::<i64>() {
31349 return Expression::Literal(Box::new(
31350 crate::expressions::Literal::Number(
31351 (val * factor).to_string(),
31352 ),
31353 ));
31354 }
31355 }
31356 }
31357 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
31358 expr,
31359 Expression::Literal(Box::new(crate::expressions::Literal::Number(
31360 factor.to_string(),
31361 ))),
31362 )))
31363 }
31364 match unit_str.as_str() {
31365 "YEAR" => {
31366 let months = multiply_expr_dateadd(arg1, 12);
31367 return Ok(Expression::Function(Box::new(Function::new(
31368 "ADD_MONTHS".to_string(),
31369 vec![arg2, months],
31370 ))));
31371 }
31372 "QUARTER" => {
31373 let months = multiply_expr_dateadd(arg1, 3);
31374 return Ok(Expression::Function(Box::new(Function::new(
31375 "ADD_MONTHS".to_string(),
31376 vec![arg2, months],
31377 ))));
31378 }
31379 "MONTH" => {
31380 return Ok(Expression::Function(Box::new(Function::new(
31381 "ADD_MONTHS".to_string(),
31382 vec![arg2, arg1],
31383 ))));
31384 }
31385 "WEEK" => {
31386 let days = multiply_expr_dateadd(arg1, 7);
31387 return Ok(Expression::Function(Box::new(Function::new(
31388 "DATE_ADD".to_string(),
31389 vec![arg2, days],
31390 ))));
31391 }
31392 "DAY" => {
31393 return Ok(Expression::Function(Box::new(Function::new(
31394 "DATE_ADD".to_string(),
31395 vec![arg2, arg1],
31396 ))));
31397 }
31398 _ => {
31399 let unit = Expression::Identifier(Identifier::new(unit_str));
31400 return Ok(Expression::Function(Box::new(Function::new(
31401 "DATE_ADD".to_string(),
31402 vec![unit, arg1, arg2],
31403 ))));
31404 }
31405 }
31406 }
31407
31408 if matches!(target, DialectType::Hive) {
31409 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
31410 match unit_str.as_str() {
31411 "DAY" => {
31412 return Ok(Expression::Function(Box::new(Function::new(
31413 "DATE_ADD".to_string(),
31414 vec![arg2, arg1],
31415 ))));
31416 }
31417 "MONTH" => {
31418 return Ok(Expression::Function(Box::new(Function::new(
31419 "ADD_MONTHS".to_string(),
31420 vec![arg2, arg1],
31421 ))));
31422 }
31423 _ => {
31424 let iu = parse_interval_unit(&unit_str);
31425 let interval =
31426 Expression::Interval(Box::new(crate::expressions::Interval {
31427 this: Some(arg1),
31428 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31429 unit: iu,
31430 use_plural: false,
31431 }),
31432 }));
31433 return Ok(Expression::Add(Box::new(
31434 crate::expressions::BinaryOp::new(arg2, interval),
31435 )));
31436 }
31437 }
31438 }
31439
31440 if matches!(target, DialectType::PostgreSQL) {
31441 // PostgreSQL: date + INTERVAL 'val UNIT'
31442 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31443 this: Some(Expression::Literal(Box::new(Literal::String(format!(
31444 "{} {}",
31445 Self::expr_to_string(&arg1),
31446 unit_str
31447 ))))),
31448 unit: None,
31449 }));
31450 return Ok(Expression::Add(Box::new(
31451 crate::expressions::BinaryOp::new(arg2, interval),
31452 )));
31453 }
31454
31455 if matches!(
31456 target,
31457 DialectType::Presto | DialectType::Trino | DialectType::Athena
31458 ) {
31459 // Presto/Trino: DATE_ADD('UNIT', val, date)
31460 return Ok(Expression::Function(Box::new(Function::new(
31461 "DATE_ADD".to_string(),
31462 vec![
31463 Expression::Literal(Box::new(Literal::String(unit_str))),
31464 arg1,
31465 arg2,
31466 ],
31467 ))));
31468 }
31469
31470 if matches!(target, DialectType::ClickHouse) {
31471 // ClickHouse: DATE_ADD(UNIT, val, date)
31472 let unit = Expression::Identifier(Identifier::new(unit_str));
31473 return Ok(Expression::Function(Box::new(Function::new(
31474 "DATE_ADD".to_string(),
31475 vec![unit, arg1, arg2],
31476 ))));
31477 }
31478
31479 // Default: keep DATEADD with uppercased unit
31480 let unit = Expression::Identifier(Identifier::new(unit_str));
31481 Ok(Expression::Function(Box::new(Function::new(
31482 "DATEADD".to_string(),
31483 vec![unit, arg1, arg2],
31484 ))))
31485 }
31486
31487 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
31488 "DATE_ADD" if args.len() == 3 => {
31489 let arg0 = args.remove(0);
31490 let arg1 = args.remove(0);
31491 let arg2 = args.remove(0);
31492 let unit_str = get_unit_str(&arg0);
31493
31494 if matches!(
31495 target,
31496 DialectType::Presto | DialectType::Trino | DialectType::Athena
31497 ) {
31498 // Presto/Trino: DATE_ADD('UNIT', val, date)
31499 return Ok(Expression::Function(Box::new(Function::new(
31500 "DATE_ADD".to_string(),
31501 vec![
31502 Expression::Literal(Box::new(Literal::String(unit_str))),
31503 arg1,
31504 arg2,
31505 ],
31506 ))));
31507 }
31508
31509 if matches!(
31510 target,
31511 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
31512 ) {
31513 // DATEADD(UNIT, val, date)
31514 let unit = Expression::Identifier(Identifier::new(unit_str));
31515 let date = if matches!(target, DialectType::TSQL) {
31516 Self::ensure_cast_datetime2(arg2)
31517 } else {
31518 arg2
31519 };
31520 return Ok(Expression::Function(Box::new(Function::new(
31521 "DATEADD".to_string(),
31522 vec![unit, arg1, date],
31523 ))));
31524 }
31525
31526 if matches!(target, DialectType::DuckDB) {
31527 // DuckDB: date + INTERVAL val UNIT
31528 let iu = parse_interval_unit(&unit_str);
31529 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31530 this: Some(arg1),
31531 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31532 unit: iu,
31533 use_plural: false,
31534 }),
31535 }));
31536 return Ok(Expression::Add(Box::new(
31537 crate::expressions::BinaryOp::new(arg2, interval),
31538 )));
31539 }
31540
31541 if matches!(target, DialectType::Spark | DialectType::Databricks) {
31542 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
31543 let unit = Expression::Identifier(Identifier::new(unit_str));
31544 return Ok(Expression::Function(Box::new(Function::new(
31545 "DATE_ADD".to_string(),
31546 vec![unit, arg1, arg2],
31547 ))));
31548 }
31549
31550 // Default: DATE_ADD(UNIT, val, date)
31551 let unit = Expression::Identifier(Identifier::new(unit_str));
31552 Ok(Expression::Function(Box::new(Function::new(
31553 "DATE_ADD".to_string(),
31554 vec![unit, arg1, arg2],
31555 ))))
31556 }
31557
31558 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
31559 "DATE_ADD" if args.len() == 2 => {
31560 let date = args.remove(0);
31561 let interval_expr = args.remove(0);
31562 let (val, unit) = Self::extract_interval_parts(&interval_expr);
31563 let unit_str = Self::interval_unit_to_string(&unit);
31564
31565 match target {
31566 DialectType::DuckDB => {
31567 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
31568 let cast_date = Self::ensure_cast_date(date);
31569 let quoted_val = Self::quote_interval_val(&val);
31570 let interval =
31571 Expression::Interval(Box::new(crate::expressions::Interval {
31572 this: Some(quoted_val),
31573 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31574 unit,
31575 use_plural: false,
31576 }),
31577 }));
31578 Ok(Expression::Add(Box::new(
31579 crate::expressions::BinaryOp::new(cast_date, interval),
31580 )))
31581 }
31582 DialectType::PostgreSQL => {
31583 // PostgreSQL: date + INTERVAL 'val UNIT'
31584 let interval =
31585 Expression::Interval(Box::new(crate::expressions::Interval {
31586 this: Some(Expression::Literal(Box::new(Literal::String(
31587 format!("{} {}", Self::expr_to_string(&val), unit_str),
31588 )))),
31589 unit: None,
31590 }));
31591 Ok(Expression::Add(Box::new(
31592 crate::expressions::BinaryOp::new(date, interval),
31593 )))
31594 }
31595 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31596 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
31597 let val_str = Self::expr_to_string(&val);
31598 Ok(Expression::Function(Box::new(Function::new(
31599 "DATE_ADD".to_string(),
31600 vec![
31601 Expression::Literal(Box::new(Literal::String(
31602 unit_str.to_string(),
31603 ))),
31604 Expression::Cast(Box::new(Cast {
31605 this: Expression::Literal(Box::new(Literal::String(val_str))),
31606 to: DataType::BigInt { length: None },
31607 trailing_comments: vec![],
31608 double_colon_syntax: false,
31609 format: None,
31610 default: None,
31611 inferred_type: None,
31612 })),
31613 date,
31614 ],
31615 ))))
31616 }
31617 DialectType::Spark | DialectType::Hive => {
31618 // Spark/Hive: DATE_ADD(date, val) for DAY
31619 match unit_str {
31620 "DAY" => Ok(Expression::Function(Box::new(Function::new(
31621 "DATE_ADD".to_string(),
31622 vec![date, val],
31623 )))),
31624 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
31625 "ADD_MONTHS".to_string(),
31626 vec![date, val],
31627 )))),
31628 _ => {
31629 let iu = parse_interval_unit(&unit_str);
31630 let interval =
31631 Expression::Interval(Box::new(crate::expressions::Interval {
31632 this: Some(val),
31633 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31634 unit: iu,
31635 use_plural: false,
31636 }),
31637 }));
31638 Ok(Expression::Function(Box::new(Function::new(
31639 "DATE_ADD".to_string(),
31640 vec![date, interval],
31641 ))))
31642 }
31643 }
31644 }
31645 DialectType::Snowflake => {
31646 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
31647 let cast_date = Self::ensure_cast_date(date);
31648 let val_str = Self::expr_to_string(&val);
31649 Ok(Expression::Function(Box::new(Function::new(
31650 "DATEADD".to_string(),
31651 vec![
31652 Expression::Identifier(Identifier::new(unit_str)),
31653 Expression::Literal(Box::new(Literal::String(val_str))),
31654 cast_date,
31655 ],
31656 ))))
31657 }
31658 DialectType::TSQL | DialectType::Fabric => {
31659 let cast_date = Self::ensure_cast_datetime2(date);
31660 Ok(Expression::Function(Box::new(Function::new(
31661 "DATEADD".to_string(),
31662 vec![
31663 Expression::Identifier(Identifier::new(unit_str)),
31664 val,
31665 cast_date,
31666 ],
31667 ))))
31668 }
31669 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
31670 "DATEADD".to_string(),
31671 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
31672 )))),
31673 DialectType::MySQL => {
31674 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
31675 let quoted_val = Self::quote_interval_val(&val);
31676 let iu = parse_interval_unit(&unit_str);
31677 let interval =
31678 Expression::Interval(Box::new(crate::expressions::Interval {
31679 this: Some(quoted_val),
31680 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31681 unit: iu,
31682 use_plural: false,
31683 }),
31684 }));
31685 Ok(Expression::Function(Box::new(Function::new(
31686 "DATE_ADD".to_string(),
31687 vec![date, interval],
31688 ))))
31689 }
31690 DialectType::BigQuery => {
31691 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
31692 let quoted_val = Self::quote_interval_val(&val);
31693 let iu = parse_interval_unit(&unit_str);
31694 let interval =
31695 Expression::Interval(Box::new(crate::expressions::Interval {
31696 this: Some(quoted_val),
31697 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31698 unit: iu,
31699 use_plural: false,
31700 }),
31701 }));
31702 Ok(Expression::Function(Box::new(Function::new(
31703 "DATE_ADD".to_string(),
31704 vec![date, interval],
31705 ))))
31706 }
31707 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
31708 "DATEADD".to_string(),
31709 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
31710 )))),
31711 _ => {
31712 // Default: keep as DATE_ADD with decomposed interval
31713 Ok(Expression::DateAdd(Box::new(
31714 crate::expressions::DateAddFunc {
31715 this: date,
31716 interval: val,
31717 unit,
31718 },
31719 )))
31720 }
31721 }
31722 }
31723
31724 // ADD_MONTHS(date, val) -> target-specific form
31725 "ADD_MONTHS" if args.len() == 2 => {
31726 let date = args.remove(0);
31727 let val = args.remove(0);
31728
31729 if matches!(target, DialectType::TSQL) {
31730 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
31731 let cast_date = Self::ensure_cast_datetime2(date);
31732 return Ok(Expression::Function(Box::new(Function::new(
31733 "DATEADD".to_string(),
31734 vec![
31735 Expression::Identifier(Identifier::new("MONTH")),
31736 val,
31737 cast_date,
31738 ],
31739 ))));
31740 }
31741
31742 if matches!(target, DialectType::DuckDB) {
31743 // DuckDB: date + INTERVAL val MONTH
31744 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31745 this: Some(val),
31746 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31747 unit: crate::expressions::IntervalUnit::Month,
31748 use_plural: false,
31749 }),
31750 }));
31751 return Ok(Expression::Add(Box::new(
31752 crate::expressions::BinaryOp::new(date, interval),
31753 )));
31754 }
31755
31756 if matches!(target, DialectType::Snowflake) {
31757 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
31758 if matches!(source, DialectType::Snowflake) {
31759 return Ok(Expression::Function(Box::new(Function::new(
31760 "ADD_MONTHS".to_string(),
31761 vec![date, val],
31762 ))));
31763 }
31764 return Ok(Expression::Function(Box::new(Function::new(
31765 "DATEADD".to_string(),
31766 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
31767 ))));
31768 }
31769
31770 if matches!(target, DialectType::Spark | DialectType::Databricks) {
31771 // Spark: ADD_MONTHS(date, val) - keep as is
31772 return Ok(Expression::Function(Box::new(Function::new(
31773 "ADD_MONTHS".to_string(),
31774 vec![date, val],
31775 ))));
31776 }
31777
31778 if matches!(target, DialectType::Hive) {
31779 return Ok(Expression::Function(Box::new(Function::new(
31780 "ADD_MONTHS".to_string(),
31781 vec![date, val],
31782 ))));
31783 }
31784
31785 if matches!(
31786 target,
31787 DialectType::Presto | DialectType::Trino | DialectType::Athena
31788 ) {
31789 // Presto: DATE_ADD('MONTH', val, date)
31790 return Ok(Expression::Function(Box::new(Function::new(
31791 "DATE_ADD".to_string(),
31792 vec![
31793 Expression::Literal(Box::new(Literal::String("MONTH".to_string()))),
31794 val,
31795 date,
31796 ],
31797 ))));
31798 }
31799
31800 // Default: keep ADD_MONTHS
31801 Ok(Expression::Function(Box::new(Function::new(
31802 "ADD_MONTHS".to_string(),
31803 vec![date, val],
31804 ))))
31805 }
31806
31807 // SAFE_DIVIDE(x, y) -> target-specific form directly
31808 "SAFE_DIVIDE" if args.len() == 2 => {
31809 let x = args.remove(0);
31810 let y = args.remove(0);
31811 // Wrap x and y in parens if they're complex expressions
31812 let y_ref = match &y {
31813 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
31814 y.clone()
31815 }
31816 _ => Expression::Paren(Box::new(Paren {
31817 this: y.clone(),
31818 trailing_comments: vec![],
31819 })),
31820 };
31821 let x_ref = match &x {
31822 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
31823 x.clone()
31824 }
31825 _ => Expression::Paren(Box::new(Paren {
31826 this: x.clone(),
31827 trailing_comments: vec![],
31828 })),
31829 };
31830 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
31831 y_ref.clone(),
31832 Expression::number(0),
31833 )));
31834 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
31835 x_ref.clone(),
31836 y_ref.clone(),
31837 )));
31838
31839 match target {
31840 DialectType::DuckDB | DialectType::PostgreSQL => {
31841 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
31842 let result_div = if matches!(target, DialectType::PostgreSQL) {
31843 let cast_x = Expression::Cast(Box::new(Cast {
31844 this: x_ref,
31845 to: DataType::Custom {
31846 name: "DOUBLE PRECISION".to_string(),
31847 },
31848 trailing_comments: vec![],
31849 double_colon_syntax: false,
31850 format: None,
31851 default: None,
31852 inferred_type: None,
31853 }));
31854 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
31855 cast_x, y_ref,
31856 )))
31857 } else {
31858 div_expr
31859 };
31860 Ok(Expression::Case(Box::new(crate::expressions::Case {
31861 operand: None,
31862 whens: vec![(condition, result_div)],
31863 else_: Some(Expression::Null(crate::expressions::Null)),
31864 comments: Vec::new(),
31865 inferred_type: None,
31866 })))
31867 }
31868 DialectType::Snowflake => {
31869 // IFF(y <> 0, x / y, NULL)
31870 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
31871 condition,
31872 true_value: div_expr,
31873 false_value: Some(Expression::Null(crate::expressions::Null)),
31874 original_name: Some("IFF".to_string()),
31875 inferred_type: None,
31876 })))
31877 }
31878 DialectType::Presto | DialectType::Trino => {
31879 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
31880 let cast_x = Expression::Cast(Box::new(Cast {
31881 this: x_ref,
31882 to: DataType::Double {
31883 precision: None,
31884 scale: None,
31885 },
31886 trailing_comments: vec![],
31887 double_colon_syntax: false,
31888 format: None,
31889 default: None,
31890 inferred_type: None,
31891 }));
31892 let cast_div = Expression::Div(Box::new(
31893 crate::expressions::BinaryOp::new(cast_x, y_ref),
31894 ));
31895 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
31896 condition,
31897 true_value: cast_div,
31898 false_value: Some(Expression::Null(crate::expressions::Null)),
31899 original_name: None,
31900 inferred_type: None,
31901 })))
31902 }
31903 _ => {
31904 // IF(y <> 0, x / y, NULL)
31905 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
31906 condition,
31907 true_value: div_expr,
31908 false_value: Some(Expression::Null(crate::expressions::Null)),
31909 original_name: None,
31910 inferred_type: None,
31911 })))
31912 }
31913 }
31914 }
31915
31916 // GENERATE_UUID() -> UUID() with CAST to string
31917 "GENERATE_UUID" => {
31918 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
31919 this: None,
31920 name: None,
31921 is_string: None,
31922 }));
31923 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
31924 let cast_type = match target {
31925 DialectType::DuckDB => Some(DataType::Text),
31926 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
31927 length: None,
31928 parenthesized_length: false,
31929 }),
31930 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
31931 Some(DataType::String { length: None })
31932 }
31933 _ => None,
31934 };
31935 if let Some(dt) = cast_type {
31936 Ok(Expression::Cast(Box::new(Cast {
31937 this: uuid_expr,
31938 to: dt,
31939 trailing_comments: vec![],
31940 double_colon_syntax: false,
31941 format: None,
31942 default: None,
31943 inferred_type: None,
31944 })))
31945 } else {
31946 Ok(uuid_expr)
31947 }
31948 }
31949
31950 // COUNTIF(x) -> CountIf expression
31951 "COUNTIF" if args.len() == 1 => {
31952 let arg = args.remove(0);
31953 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
31954 this: arg,
31955 distinct: false,
31956 filter: None,
31957 order_by: vec![],
31958 name: None,
31959 ignore_nulls: None,
31960 having_max: None,
31961 limit: None,
31962 inferred_type: None,
31963 })))
31964 }
31965
31966 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
31967 "EDIT_DISTANCE" => {
31968 // Strip named arguments (max_distance => N) and pass as positional
31969 let mut positional_args: Vec<Expression> = vec![];
31970 for arg in args {
31971 match arg {
31972 Expression::NamedArgument(na) => {
31973 positional_args.push(na.value);
31974 }
31975 other => positional_args.push(other),
31976 }
31977 }
31978 if positional_args.len() >= 2 {
31979 let col1 = positional_args.remove(0);
31980 let col2 = positional_args.remove(0);
31981 let levenshtein = crate::expressions::BinaryFunc {
31982 this: col1,
31983 expression: col2,
31984 original_name: None,
31985 inferred_type: None,
31986 };
31987 // Pass extra args through a function wrapper with all args
31988 if !positional_args.is_empty() {
31989 let max_dist = positional_args.remove(0);
31990 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
31991 if matches!(target, DialectType::DuckDB) {
31992 let lev = Expression::Function(Box::new(Function::new(
31993 "LEVENSHTEIN".to_string(),
31994 vec![levenshtein.this, levenshtein.expression],
31995 )));
31996 let lev_is_null =
31997 Expression::IsNull(Box::new(crate::expressions::IsNull {
31998 this: lev.clone(),
31999 not: false,
32000 postfix_form: false,
32001 }));
32002 let max_is_null =
32003 Expression::IsNull(Box::new(crate::expressions::IsNull {
32004 this: max_dist.clone(),
32005 not: false,
32006 postfix_form: false,
32007 }));
32008 let null_check =
32009 Expression::Or(Box::new(crate::expressions::BinaryOp {
32010 left: lev_is_null,
32011 right: max_is_null,
32012 left_comments: Vec::new(),
32013 operator_comments: Vec::new(),
32014 trailing_comments: Vec::new(),
32015 inferred_type: None,
32016 }));
32017 let least =
32018 Expression::Least(Box::new(crate::expressions::VarArgFunc {
32019 expressions: vec![lev, max_dist],
32020 original_name: None,
32021 inferred_type: None,
32022 }));
32023 return Ok(Expression::Case(Box::new(crate::expressions::Case {
32024 operand: None,
32025 whens: vec![(
32026 null_check,
32027 Expression::Null(crate::expressions::Null),
32028 )],
32029 else_: Some(least),
32030 comments: Vec::new(),
32031 inferred_type: None,
32032 })));
32033 }
32034 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
32035 all_args.extend(positional_args);
32036 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
32037 let func_name = if matches!(target, DialectType::PostgreSQL) {
32038 "LEVENSHTEIN_LESS_EQUAL"
32039 } else {
32040 "LEVENSHTEIN"
32041 };
32042 return Ok(Expression::Function(Box::new(Function::new(
32043 func_name.to_string(),
32044 all_args,
32045 ))));
32046 }
32047 Ok(Expression::Levenshtein(Box::new(levenshtein)))
32048 } else {
32049 Ok(Expression::Function(Box::new(Function::new(
32050 "EDIT_DISTANCE".to_string(),
32051 positional_args,
32052 ))))
32053 }
32054 }
32055
32056 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
32057 "TIMESTAMP_SECONDS" if args.len() == 1 => {
32058 let arg = args.remove(0);
32059 Ok(Expression::UnixToTime(Box::new(
32060 crate::expressions::UnixToTime {
32061 this: Box::new(arg),
32062 scale: Some(0),
32063 zone: None,
32064 hours: None,
32065 minutes: None,
32066 format: None,
32067 target_type: None,
32068 },
32069 )))
32070 }
32071
32072 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
32073 "TIMESTAMP_MILLIS" if args.len() == 1 => {
32074 let arg = args.remove(0);
32075 Ok(Expression::UnixToTime(Box::new(
32076 crate::expressions::UnixToTime {
32077 this: Box::new(arg),
32078 scale: Some(3),
32079 zone: None,
32080 hours: None,
32081 minutes: None,
32082 format: None,
32083 target_type: None,
32084 },
32085 )))
32086 }
32087
32088 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
32089 "TIMESTAMP_MICROS" if args.len() == 1 => {
32090 let arg = args.remove(0);
32091 Ok(Expression::UnixToTime(Box::new(
32092 crate::expressions::UnixToTime {
32093 this: Box::new(arg),
32094 scale: Some(6),
32095 zone: None,
32096 hours: None,
32097 minutes: None,
32098 format: None,
32099 target_type: None,
32100 },
32101 )))
32102 }
32103
32104 // DIV(x, y) -> IntDiv expression
32105 "DIV" if args.len() == 2 => {
32106 let x = args.remove(0);
32107 let y = args.remove(0);
32108 Ok(Expression::IntDiv(Box::new(
32109 crate::expressions::BinaryFunc {
32110 this: x,
32111 expression: y,
32112 original_name: None,
32113 inferred_type: None,
32114 },
32115 )))
32116 }
32117
32118 // TO_HEX(x) -> target-specific form
32119 "TO_HEX" if args.len() == 1 => {
32120 let arg = args.remove(0);
32121 // Check if inner function already returns hex string in certain targets
32122 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
32123 if matches!(target, DialectType::BigQuery) {
32124 // BQ->BQ: keep as TO_HEX
32125 Ok(Expression::Function(Box::new(Function::new(
32126 "TO_HEX".to_string(),
32127 vec![arg],
32128 ))))
32129 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
32130 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
32131 Ok(arg)
32132 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
32133 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
32134 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
32135 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
32136 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
32137 if let Expression::Function(ref inner_f) = arg {
32138 let inner_args = inner_f.args.clone();
32139 let binary_func = match inner_f.name.to_ascii_uppercase().as_str() {
32140 "SHA1" => Expression::Function(Box::new(Function::new(
32141 "SHA1_BINARY".to_string(),
32142 inner_args,
32143 ))),
32144 "MD5" => Expression::Function(Box::new(Function::new(
32145 "MD5_BINARY".to_string(),
32146 inner_args,
32147 ))),
32148 "SHA256" => {
32149 let mut a = inner_args;
32150 a.push(Expression::number(256));
32151 Expression::Function(Box::new(Function::new(
32152 "SHA2_BINARY".to_string(),
32153 a,
32154 )))
32155 }
32156 "SHA512" => {
32157 let mut a = inner_args;
32158 a.push(Expression::number(512));
32159 Expression::Function(Box::new(Function::new(
32160 "SHA2_BINARY".to_string(),
32161 a,
32162 )))
32163 }
32164 _ => arg.clone(),
32165 };
32166 Ok(Expression::Function(Box::new(Function::new(
32167 "TO_CHAR".to_string(),
32168 vec![binary_func],
32169 ))))
32170 } else {
32171 let inner = Expression::Function(Box::new(Function::new(
32172 "HEX".to_string(),
32173 vec![arg],
32174 )));
32175 Ok(Expression::Lower(Box::new(
32176 crate::expressions::UnaryFunc::new(inner),
32177 )))
32178 }
32179 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
32180 let inner = Expression::Function(Box::new(Function::new(
32181 "TO_HEX".to_string(),
32182 vec![arg],
32183 )));
32184 Ok(Expression::Lower(Box::new(
32185 crate::expressions::UnaryFunc::new(inner),
32186 )))
32187 } else {
32188 let inner =
32189 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
32190 Ok(Expression::Lower(Box::new(
32191 crate::expressions::UnaryFunc::new(inner),
32192 )))
32193 }
32194 }
32195
32196 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
32197 "LAST_DAY" if args.len() == 2 => {
32198 let date = args.remove(0);
32199 let _unit = args.remove(0); // Strip the unit (MONTH is default)
32200 Ok(Expression::Function(Box::new(Function::new(
32201 "LAST_DAY".to_string(),
32202 vec![date],
32203 ))))
32204 }
32205
32206 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
32207 "GENERATE_ARRAY" => {
32208 let start = args.get(0).cloned();
32209 let end = args.get(1).cloned();
32210 let step = args.get(2).cloned();
32211 Ok(Expression::GenerateSeries(Box::new(
32212 crate::expressions::GenerateSeries {
32213 start: start.map(Box::new),
32214 end: end.map(Box::new),
32215 step: step.map(Box::new),
32216 is_end_exclusive: None,
32217 },
32218 )))
32219 }
32220
32221 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
32222 "GENERATE_TIMESTAMP_ARRAY" => {
32223 let start = args.get(0).cloned();
32224 let end = args.get(1).cloned();
32225 let step = args.get(2).cloned();
32226
32227 if matches!(target, DialectType::DuckDB) {
32228 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
32229 // Only cast string literals - leave columns/expressions as-is
32230 let maybe_cast_ts = |expr: Expression| -> Expression {
32231 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
32232 {
32233 Expression::Cast(Box::new(Cast {
32234 this: expr,
32235 to: DataType::Timestamp {
32236 precision: None,
32237 timezone: false,
32238 },
32239 trailing_comments: vec![],
32240 double_colon_syntax: false,
32241 format: None,
32242 default: None,
32243 inferred_type: None,
32244 }))
32245 } else {
32246 expr
32247 }
32248 };
32249 let cast_start = start.map(maybe_cast_ts);
32250 let cast_end = end.map(maybe_cast_ts);
32251 Ok(Expression::GenerateSeries(Box::new(
32252 crate::expressions::GenerateSeries {
32253 start: cast_start.map(Box::new),
32254 end: cast_end.map(Box::new),
32255 step: step.map(Box::new),
32256 is_end_exclusive: None,
32257 },
32258 )))
32259 } else {
32260 Ok(Expression::GenerateSeries(Box::new(
32261 crate::expressions::GenerateSeries {
32262 start: start.map(Box::new),
32263 end: end.map(Box::new),
32264 step: step.map(Box::new),
32265 is_end_exclusive: None,
32266 },
32267 )))
32268 }
32269 }
32270
32271 // TO_JSON(x) -> target-specific (from Spark/Hive)
32272 "TO_JSON" => {
32273 match target {
32274 DialectType::Presto | DialectType::Trino => {
32275 // JSON_FORMAT(CAST(x AS JSON))
32276 let arg = args
32277 .into_iter()
32278 .next()
32279 .unwrap_or(Expression::Null(crate::expressions::Null));
32280 let cast_json = Expression::Cast(Box::new(Cast {
32281 this: arg,
32282 to: DataType::Custom {
32283 name: "JSON".to_string(),
32284 },
32285 trailing_comments: vec![],
32286 double_colon_syntax: false,
32287 format: None,
32288 default: None,
32289 inferred_type: None,
32290 }));
32291 Ok(Expression::Function(Box::new(Function::new(
32292 "JSON_FORMAT".to_string(),
32293 vec![cast_json],
32294 ))))
32295 }
32296 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
32297 "TO_JSON_STRING".to_string(),
32298 args,
32299 )))),
32300 DialectType::DuckDB => {
32301 // CAST(TO_JSON(x) AS TEXT)
32302 let arg = args
32303 .into_iter()
32304 .next()
32305 .unwrap_or(Expression::Null(crate::expressions::Null));
32306 let to_json = Expression::Function(Box::new(Function::new(
32307 "TO_JSON".to_string(),
32308 vec![arg],
32309 )));
32310 Ok(Expression::Cast(Box::new(Cast {
32311 this: to_json,
32312 to: DataType::Text,
32313 trailing_comments: vec![],
32314 double_colon_syntax: false,
32315 format: None,
32316 default: None,
32317 inferred_type: None,
32318 })))
32319 }
32320 _ => Ok(Expression::Function(Box::new(Function::new(
32321 "TO_JSON".to_string(),
32322 args,
32323 )))),
32324 }
32325 }
32326
32327 // TO_JSON_STRING(x) -> target-specific
32328 "TO_JSON_STRING" => {
32329 match target {
32330 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
32331 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
32332 ),
32333 DialectType::Presto | DialectType::Trino => {
32334 // JSON_FORMAT(CAST(x AS JSON))
32335 let arg = args
32336 .into_iter()
32337 .next()
32338 .unwrap_or(Expression::Null(crate::expressions::Null));
32339 let cast_json = Expression::Cast(Box::new(Cast {
32340 this: arg,
32341 to: DataType::Custom {
32342 name: "JSON".to_string(),
32343 },
32344 trailing_comments: vec![],
32345 double_colon_syntax: false,
32346 format: None,
32347 default: None,
32348 inferred_type: None,
32349 }));
32350 Ok(Expression::Function(Box::new(Function::new(
32351 "JSON_FORMAT".to_string(),
32352 vec![cast_json],
32353 ))))
32354 }
32355 DialectType::DuckDB => {
32356 // CAST(TO_JSON(x) AS TEXT)
32357 let arg = args
32358 .into_iter()
32359 .next()
32360 .unwrap_or(Expression::Null(crate::expressions::Null));
32361 let to_json = Expression::Function(Box::new(Function::new(
32362 "TO_JSON".to_string(),
32363 vec![arg],
32364 )));
32365 Ok(Expression::Cast(Box::new(Cast {
32366 this: to_json,
32367 to: DataType::Text,
32368 trailing_comments: vec![],
32369 double_colon_syntax: false,
32370 format: None,
32371 default: None,
32372 inferred_type: None,
32373 })))
32374 }
32375 DialectType::Snowflake => {
32376 // TO_JSON(x)
32377 Ok(Expression::Function(Box::new(Function::new(
32378 "TO_JSON".to_string(),
32379 args,
32380 ))))
32381 }
32382 _ => Ok(Expression::Function(Box::new(Function::new(
32383 "TO_JSON_STRING".to_string(),
32384 args,
32385 )))),
32386 }
32387 }
32388
32389 // SAFE_ADD(x, y) -> SafeAdd expression
32390 "SAFE_ADD" if args.len() == 2 => {
32391 let x = args.remove(0);
32392 let y = args.remove(0);
32393 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
32394 this: Box::new(x),
32395 expression: Box::new(y),
32396 })))
32397 }
32398
32399 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
32400 "SAFE_SUBTRACT" if args.len() == 2 => {
32401 let x = args.remove(0);
32402 let y = args.remove(0);
32403 Ok(Expression::SafeSubtract(Box::new(
32404 crate::expressions::SafeSubtract {
32405 this: Box::new(x),
32406 expression: Box::new(y),
32407 },
32408 )))
32409 }
32410
32411 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
32412 "SAFE_MULTIPLY" if args.len() == 2 => {
32413 let x = args.remove(0);
32414 let y = args.remove(0);
32415 Ok(Expression::SafeMultiply(Box::new(
32416 crate::expressions::SafeMultiply {
32417 this: Box::new(x),
32418 expression: Box::new(y),
32419 },
32420 )))
32421 }
32422
32423 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
32424 "REGEXP_CONTAINS" if args.len() == 2 => {
32425 let str_expr = args.remove(0);
32426 let pattern = args.remove(0);
32427 Ok(Expression::RegexpLike(Box::new(
32428 crate::expressions::RegexpFunc {
32429 this: str_expr,
32430 pattern,
32431 flags: None,
32432 },
32433 )))
32434 }
32435
32436 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
32437 "CONTAINS_SUBSTR" if args.len() == 2 => {
32438 let a = args.remove(0);
32439 let b = args.remove(0);
32440 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
32441 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
32442 Ok(Expression::Function(Box::new(Function::new(
32443 "CONTAINS".to_string(),
32444 vec![lower_a, lower_b],
32445 ))))
32446 }
32447
32448 // INT64(x) -> CAST(x AS BIGINT)
32449 "INT64" if args.len() == 1 => {
32450 let arg = args.remove(0);
32451 Ok(Expression::Cast(Box::new(Cast {
32452 this: arg,
32453 to: DataType::BigInt { length: None },
32454 trailing_comments: vec![],
32455 double_colon_syntax: false,
32456 format: None,
32457 default: None,
32458 inferred_type: None,
32459 })))
32460 }
32461
32462 // INSTR(str, substr) -> target-specific
32463 "INSTR" if args.len() >= 2 => {
32464 let str_expr = args.remove(0);
32465 let substr = args.remove(0);
32466 if matches!(target, DialectType::Snowflake) {
32467 // CHARINDEX(substr, str)
32468 Ok(Expression::Function(Box::new(Function::new(
32469 "CHARINDEX".to_string(),
32470 vec![substr, str_expr],
32471 ))))
32472 } else if matches!(target, DialectType::BigQuery) {
32473 // Keep as INSTR
32474 Ok(Expression::Function(Box::new(Function::new(
32475 "INSTR".to_string(),
32476 vec![str_expr, substr],
32477 ))))
32478 } else {
32479 // Default: keep as INSTR
32480 Ok(Expression::Function(Box::new(Function::new(
32481 "INSTR".to_string(),
32482 vec![str_expr, substr],
32483 ))))
32484 }
32485 }
32486
32487 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
32488 "DATE_TRUNC" if args.len() == 2 => {
32489 let expr = args.remove(0);
32490 let unit_expr = args.remove(0);
32491 let unit_str = get_unit_str(&unit_expr);
32492
32493 match target {
32494 DialectType::DuckDB
32495 | DialectType::Snowflake
32496 | DialectType::PostgreSQL
32497 | DialectType::Presto
32498 | DialectType::Trino
32499 | DialectType::Databricks
32500 | DialectType::Spark
32501 | DialectType::Redshift
32502 | DialectType::ClickHouse
32503 | DialectType::TSQL => {
32504 // Standard: DATE_TRUNC('UNIT', expr)
32505 Ok(Expression::Function(Box::new(Function::new(
32506 "DATE_TRUNC".to_string(),
32507 vec![
32508 Expression::Literal(Box::new(Literal::String(unit_str))),
32509 expr,
32510 ],
32511 ))))
32512 }
32513 _ => {
32514 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
32515 Ok(Expression::Function(Box::new(Function::new(
32516 "DATE_TRUNC".to_string(),
32517 vec![expr, unit_expr],
32518 ))))
32519 }
32520 }
32521 }
32522
32523 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
32524 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
32525 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
32526 let ts = args.remove(0);
32527 let unit_expr = args.remove(0);
32528 let tz = if !args.is_empty() {
32529 Some(args.remove(0))
32530 } else {
32531 None
32532 };
32533 let unit_str = get_unit_str(&unit_expr);
32534
32535 match target {
32536 DialectType::DuckDB => {
32537 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
32538 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
32539 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
32540 let is_coarse = matches!(
32541 unit_str.as_str(),
32542 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
32543 );
32544 // For DATETIME_TRUNC, cast string args to TIMESTAMP
32545 let cast_ts = if name == "DATETIME_TRUNC" {
32546 match ts {
32547 Expression::Literal(ref lit)
32548 if matches!(lit.as_ref(), Literal::String(ref _s)) =>
32549 {
32550 Expression::Cast(Box::new(Cast {
32551 this: ts,
32552 to: DataType::Timestamp {
32553 precision: None,
32554 timezone: false,
32555 },
32556 trailing_comments: vec![],
32557 double_colon_syntax: false,
32558 format: None,
32559 default: None,
32560 inferred_type: None,
32561 }))
32562 }
32563 _ => Self::maybe_cast_ts_to_tz(ts, &name),
32564 }
32565 } else {
32566 Self::maybe_cast_ts_to_tz(ts, &name)
32567 };
32568
32569 if let Some(tz_arg) = tz {
32570 if is_coarse {
32571 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
32572 let at_tz = Expression::AtTimeZone(Box::new(
32573 crate::expressions::AtTimeZone {
32574 this: cast_ts,
32575 zone: tz_arg.clone(),
32576 },
32577 ));
32578 let date_trunc = Expression::Function(Box::new(Function::new(
32579 "DATE_TRUNC".to_string(),
32580 vec![
32581 Expression::Literal(Box::new(Literal::String(unit_str))),
32582 at_tz,
32583 ],
32584 )));
32585 Ok(Expression::AtTimeZone(Box::new(
32586 crate::expressions::AtTimeZone {
32587 this: date_trunc,
32588 zone: tz_arg,
32589 },
32590 )))
32591 } else {
32592 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
32593 Ok(Expression::Function(Box::new(Function::new(
32594 "DATE_TRUNC".to_string(),
32595 vec![
32596 Expression::Literal(Box::new(Literal::String(unit_str))),
32597 cast_ts,
32598 ],
32599 ))))
32600 }
32601 } else {
32602 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
32603 Ok(Expression::Function(Box::new(Function::new(
32604 "DATE_TRUNC".to_string(),
32605 vec![
32606 Expression::Literal(Box::new(Literal::String(unit_str))),
32607 cast_ts,
32608 ],
32609 ))))
32610 }
32611 }
32612 DialectType::Databricks | DialectType::Spark => {
32613 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
32614 Ok(Expression::Function(Box::new(Function::new(
32615 "DATE_TRUNC".to_string(),
32616 vec![Expression::Literal(Box::new(Literal::String(unit_str))), ts],
32617 ))))
32618 }
32619 _ => {
32620 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
32621 let unit = Expression::Literal(Box::new(Literal::String(unit_str)));
32622 let mut date_trunc_args = vec![unit, ts];
32623 if let Some(tz_arg) = tz {
32624 date_trunc_args.push(tz_arg);
32625 }
32626 Ok(Expression::Function(Box::new(Function::new(
32627 "TIMESTAMP_TRUNC".to_string(),
32628 date_trunc_args,
32629 ))))
32630 }
32631 }
32632 }
32633
32634 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
32635 "TIME" => {
32636 if args.len() == 3 {
32637 // TIME(h, m, s) constructor
32638 match target {
32639 DialectType::TSQL => {
32640 // TIMEFROMPARTS(h, m, s, 0, 0)
32641 args.push(Expression::number(0));
32642 args.push(Expression::number(0));
32643 Ok(Expression::Function(Box::new(Function::new(
32644 "TIMEFROMPARTS".to_string(),
32645 args,
32646 ))))
32647 }
32648 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
32649 "MAKETIME".to_string(),
32650 args,
32651 )))),
32652 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
32653 Function::new("MAKE_TIME".to_string(), args),
32654 ))),
32655 _ => Ok(Expression::Function(Box::new(Function::new(
32656 "TIME".to_string(),
32657 args,
32658 )))),
32659 }
32660 } else if args.len() == 1 {
32661 let arg = args.remove(0);
32662 if matches!(target, DialectType::Spark) {
32663 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
32664 Ok(Expression::Cast(Box::new(Cast {
32665 this: arg,
32666 to: DataType::Timestamp {
32667 timezone: false,
32668 precision: None,
32669 },
32670 trailing_comments: vec![],
32671 double_colon_syntax: false,
32672 format: None,
32673 default: None,
32674 inferred_type: None,
32675 })))
32676 } else {
32677 // Most targets: CAST(x AS TIME)
32678 Ok(Expression::Cast(Box::new(Cast {
32679 this: arg,
32680 to: DataType::Time {
32681 precision: None,
32682 timezone: false,
32683 },
32684 trailing_comments: vec![],
32685 double_colon_syntax: false,
32686 format: None,
32687 default: None,
32688 inferred_type: None,
32689 })))
32690 }
32691 } else if args.len() == 2 {
32692 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
32693 let expr = args.remove(0);
32694 let tz = args.remove(0);
32695 let cast_tstz = Expression::Cast(Box::new(Cast {
32696 this: expr,
32697 to: DataType::Timestamp {
32698 timezone: true,
32699 precision: None,
32700 },
32701 trailing_comments: vec![],
32702 double_colon_syntax: false,
32703 format: None,
32704 default: None,
32705 inferred_type: None,
32706 }));
32707 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
32708 this: cast_tstz,
32709 zone: tz,
32710 }));
32711 Ok(Expression::Cast(Box::new(Cast {
32712 this: at_tz,
32713 to: DataType::Time {
32714 precision: None,
32715 timezone: false,
32716 },
32717 trailing_comments: vec![],
32718 double_colon_syntax: false,
32719 format: None,
32720 default: None,
32721 inferred_type: None,
32722 })))
32723 } else {
32724 Ok(Expression::Function(Box::new(Function::new(
32725 "TIME".to_string(),
32726 args,
32727 ))))
32728 }
32729 }
32730
32731 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
32732 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
32733 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
32734 // DATETIME(y, m, d, h, min, s) -> target-specific
32735 "DATETIME" => {
32736 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
32737 if matches!(target, DialectType::BigQuery) {
32738 if args.len() == 2 {
32739 let has_time_literal = matches!(&args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
32740 if has_time_literal {
32741 let first = args.remove(0);
32742 let second = args.remove(0);
32743 let time_as_cast = match second {
32744 Expression::Literal(lit)
32745 if matches!(lit.as_ref(), Literal::Time(_)) =>
32746 {
32747 let Literal::Time(s) = lit.as_ref() else {
32748 unreachable!()
32749 };
32750 Expression::Cast(Box::new(Cast {
32751 this: Expression::Literal(Box::new(Literal::String(
32752 s.clone(),
32753 ))),
32754 to: DataType::Time {
32755 precision: None,
32756 timezone: false,
32757 },
32758 trailing_comments: vec![],
32759 double_colon_syntax: false,
32760 format: None,
32761 default: None,
32762 inferred_type: None,
32763 }))
32764 }
32765 other => other,
32766 };
32767 return Ok(Expression::Function(Box::new(Function::new(
32768 "DATETIME".to_string(),
32769 vec![first, time_as_cast],
32770 ))));
32771 }
32772 }
32773 return Ok(Expression::Function(Box::new(Function::new(
32774 "DATETIME".to_string(),
32775 args,
32776 ))));
32777 }
32778
32779 if args.len() == 1 {
32780 let arg = args.remove(0);
32781 Ok(Expression::Cast(Box::new(Cast {
32782 this: arg,
32783 to: DataType::Timestamp {
32784 timezone: false,
32785 precision: None,
32786 },
32787 trailing_comments: vec![],
32788 double_colon_syntax: false,
32789 format: None,
32790 default: None,
32791 inferred_type: None,
32792 })))
32793 } else if args.len() == 2 {
32794 let first = args.remove(0);
32795 let second = args.remove(0);
32796 // Check if second arg is a TIME literal
32797 let is_time_literal = matches!(&second, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
32798 if is_time_literal {
32799 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
32800 let cast_date = Expression::Cast(Box::new(Cast {
32801 this: first,
32802 to: DataType::Date,
32803 trailing_comments: vec![],
32804 double_colon_syntax: false,
32805 format: None,
32806 default: None,
32807 inferred_type: None,
32808 }));
32809 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
32810 let time_as_string = match second {
32811 Expression::Literal(lit)
32812 if matches!(lit.as_ref(), Literal::Time(_)) =>
32813 {
32814 let Literal::Time(s) = lit.as_ref() else {
32815 unreachable!()
32816 };
32817 Expression::Literal(Box::new(Literal::String(s.clone())))
32818 }
32819 other => other,
32820 };
32821 let cast_time = Expression::Cast(Box::new(Cast {
32822 this: time_as_string,
32823 to: DataType::Time {
32824 precision: None,
32825 timezone: false,
32826 },
32827 trailing_comments: vec![],
32828 double_colon_syntax: false,
32829 format: None,
32830 default: None,
32831 inferred_type: None,
32832 }));
32833 let add_expr =
32834 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
32835 Ok(Expression::Cast(Box::new(Cast {
32836 this: add_expr,
32837 to: DataType::Timestamp {
32838 timezone: false,
32839 precision: None,
32840 },
32841 trailing_comments: vec![],
32842 double_colon_syntax: false,
32843 format: None,
32844 default: None,
32845 inferred_type: None,
32846 })))
32847 } else {
32848 // DATETIME('string', 'timezone')
32849 let cast_tstz = Expression::Cast(Box::new(Cast {
32850 this: first,
32851 to: DataType::Timestamp {
32852 timezone: true,
32853 precision: None,
32854 },
32855 trailing_comments: vec![],
32856 double_colon_syntax: false,
32857 format: None,
32858 default: None,
32859 inferred_type: None,
32860 }));
32861 let at_tz =
32862 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
32863 this: cast_tstz,
32864 zone: second,
32865 }));
32866 Ok(Expression::Cast(Box::new(Cast {
32867 this: at_tz,
32868 to: DataType::Timestamp {
32869 timezone: false,
32870 precision: None,
32871 },
32872 trailing_comments: vec![],
32873 double_colon_syntax: false,
32874 format: None,
32875 default: None,
32876 inferred_type: None,
32877 })))
32878 }
32879 } else if args.len() >= 3 {
32880 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
32881 // For other targets, use MAKE_TIMESTAMP or similar
32882 if matches!(target, DialectType::Snowflake) {
32883 Ok(Expression::Function(Box::new(Function::new(
32884 "TIMESTAMP_FROM_PARTS".to_string(),
32885 args,
32886 ))))
32887 } else {
32888 Ok(Expression::Function(Box::new(Function::new(
32889 "DATETIME".to_string(),
32890 args,
32891 ))))
32892 }
32893 } else {
32894 Ok(Expression::Function(Box::new(Function::new(
32895 "DATETIME".to_string(),
32896 args,
32897 ))))
32898 }
32899 }
32900
32901 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
32902 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
32903 "TIMESTAMP" => {
32904 if args.len() == 1 {
32905 let arg = args.remove(0);
32906 Ok(Expression::Cast(Box::new(Cast {
32907 this: arg,
32908 to: DataType::Timestamp {
32909 timezone: true,
32910 precision: None,
32911 },
32912 trailing_comments: vec![],
32913 double_colon_syntax: false,
32914 format: None,
32915 default: None,
32916 inferred_type: None,
32917 })))
32918 } else if args.len() == 2 {
32919 let arg = args.remove(0);
32920 let tz = args.remove(0);
32921 let cast_ts = Expression::Cast(Box::new(Cast {
32922 this: arg,
32923 to: DataType::Timestamp {
32924 timezone: false,
32925 precision: None,
32926 },
32927 trailing_comments: vec![],
32928 double_colon_syntax: false,
32929 format: None,
32930 default: None,
32931 inferred_type: None,
32932 }));
32933 if matches!(target, DialectType::Snowflake) {
32934 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
32935 Ok(Expression::Function(Box::new(Function::new(
32936 "CONVERT_TIMEZONE".to_string(),
32937 vec![tz, cast_ts],
32938 ))))
32939 } else {
32940 Ok(Expression::AtTimeZone(Box::new(
32941 crate::expressions::AtTimeZone {
32942 this: cast_ts,
32943 zone: tz,
32944 },
32945 )))
32946 }
32947 } else {
32948 Ok(Expression::Function(Box::new(Function::new(
32949 "TIMESTAMP".to_string(),
32950 args,
32951 ))))
32952 }
32953 }
32954
32955 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
32956 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
32957 "STRING" => {
32958 if args.len() == 1 {
32959 let arg = args.remove(0);
32960 let cast_type = match target {
32961 DialectType::DuckDB => DataType::Text,
32962 _ => DataType::VarChar {
32963 length: None,
32964 parenthesized_length: false,
32965 },
32966 };
32967 Ok(Expression::Cast(Box::new(Cast {
32968 this: arg,
32969 to: cast_type,
32970 trailing_comments: vec![],
32971 double_colon_syntax: false,
32972 format: None,
32973 default: None,
32974 inferred_type: None,
32975 })))
32976 } else if args.len() == 2 {
32977 let arg = args.remove(0);
32978 let tz = args.remove(0);
32979 let cast_type = match target {
32980 DialectType::DuckDB => DataType::Text,
32981 _ => DataType::VarChar {
32982 length: None,
32983 parenthesized_length: false,
32984 },
32985 };
32986 if matches!(target, DialectType::Snowflake) {
32987 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
32988 let convert_tz = Expression::Function(Box::new(Function::new(
32989 "CONVERT_TIMEZONE".to_string(),
32990 vec![
32991 Expression::Literal(Box::new(Literal::String("UTC".to_string()))),
32992 tz,
32993 arg,
32994 ],
32995 )));
32996 Ok(Expression::Cast(Box::new(Cast {
32997 this: convert_tz,
32998 to: cast_type,
32999 trailing_comments: vec![],
33000 double_colon_syntax: false,
33001 format: None,
33002 default: None,
33003 inferred_type: None,
33004 })))
33005 } else {
33006 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
33007 let cast_ts = Expression::Cast(Box::new(Cast {
33008 this: arg,
33009 to: DataType::Timestamp {
33010 timezone: false,
33011 precision: None,
33012 },
33013 trailing_comments: vec![],
33014 double_colon_syntax: false,
33015 format: None,
33016 default: None,
33017 inferred_type: None,
33018 }));
33019 let at_utc =
33020 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
33021 this: cast_ts,
33022 zone: Expression::Literal(Box::new(Literal::String(
33023 "UTC".to_string(),
33024 ))),
33025 }));
33026 let at_tz =
33027 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
33028 this: at_utc,
33029 zone: tz,
33030 }));
33031 Ok(Expression::Cast(Box::new(Cast {
33032 this: at_tz,
33033 to: cast_type,
33034 trailing_comments: vec![],
33035 double_colon_syntax: false,
33036 format: None,
33037 default: None,
33038 inferred_type: None,
33039 })))
33040 }
33041 } else {
33042 Ok(Expression::Function(Box::new(Function::new(
33043 "STRING".to_string(),
33044 args,
33045 ))))
33046 }
33047 }
33048
33049 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
33050 "UNIX_SECONDS" if args.len() == 1 => {
33051 let ts = args.remove(0);
33052 match target {
33053 DialectType::DuckDB => {
33054 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
33055 let cast_ts = Self::ensure_cast_timestamptz(ts);
33056 let epoch = Expression::Function(Box::new(Function::new(
33057 "EPOCH".to_string(),
33058 vec![cast_ts],
33059 )));
33060 Ok(Expression::Cast(Box::new(Cast {
33061 this: epoch,
33062 to: DataType::BigInt { length: None },
33063 trailing_comments: vec![],
33064 double_colon_syntax: false,
33065 format: None,
33066 default: None,
33067 inferred_type: None,
33068 })))
33069 }
33070 DialectType::Snowflake => {
33071 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
33072 let epoch = Expression::Cast(Box::new(Cast {
33073 this: Expression::Literal(Box::new(Literal::String(
33074 "1970-01-01 00:00:00+00".to_string(),
33075 ))),
33076 to: DataType::Timestamp {
33077 timezone: true,
33078 precision: None,
33079 },
33080 trailing_comments: vec![],
33081 double_colon_syntax: false,
33082 format: None,
33083 default: None,
33084 inferred_type: None,
33085 }));
33086 Ok(Expression::TimestampDiff(Box::new(
33087 crate::expressions::TimestampDiff {
33088 this: Box::new(epoch),
33089 expression: Box::new(ts),
33090 unit: Some("SECONDS".to_string()),
33091 },
33092 )))
33093 }
33094 _ => Ok(Expression::Function(Box::new(Function::new(
33095 "UNIX_SECONDS".to_string(),
33096 vec![ts],
33097 )))),
33098 }
33099 }
33100
33101 "UNIX_MILLIS" if args.len() == 1 => {
33102 let ts = args.remove(0);
33103 match target {
33104 DialectType::DuckDB => {
33105 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
33106 let cast_ts = Self::ensure_cast_timestamptz(ts);
33107 Ok(Expression::Function(Box::new(Function::new(
33108 "EPOCH_MS".to_string(),
33109 vec![cast_ts],
33110 ))))
33111 }
33112 _ => Ok(Expression::Function(Box::new(Function::new(
33113 "UNIX_MILLIS".to_string(),
33114 vec![ts],
33115 )))),
33116 }
33117 }
33118
33119 "UNIX_MICROS" if args.len() == 1 => {
33120 let ts = args.remove(0);
33121 match target {
33122 DialectType::DuckDB => {
33123 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
33124 let cast_ts = Self::ensure_cast_timestamptz(ts);
33125 Ok(Expression::Function(Box::new(Function::new(
33126 "EPOCH_US".to_string(),
33127 vec![cast_ts],
33128 ))))
33129 }
33130 _ => Ok(Expression::Function(Box::new(Function::new(
33131 "UNIX_MICROS".to_string(),
33132 vec![ts],
33133 )))),
33134 }
33135 }
33136
33137 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
33138 "ARRAY_CONCAT" | "LIST_CONCAT" => {
33139 match target {
33140 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
33141 // CONCAT(arr1, arr2, ...)
33142 Ok(Expression::Function(Box::new(Function::new(
33143 "CONCAT".to_string(),
33144 args,
33145 ))))
33146 }
33147 DialectType::Presto | DialectType::Trino => {
33148 // CONCAT(arr1, arr2, ...)
33149 Ok(Expression::Function(Box::new(Function::new(
33150 "CONCAT".to_string(),
33151 args,
33152 ))))
33153 }
33154 DialectType::Snowflake => {
33155 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
33156 if args.len() == 1 {
33157 // ARRAY_CAT requires 2 args, add empty array as []
33158 let empty_arr = Expression::ArrayFunc(Box::new(
33159 crate::expressions::ArrayConstructor {
33160 expressions: vec![],
33161 bracket_notation: true,
33162 use_list_keyword: false,
33163 },
33164 ));
33165 let mut new_args = args;
33166 new_args.push(empty_arr);
33167 Ok(Expression::Function(Box::new(Function::new(
33168 "ARRAY_CAT".to_string(),
33169 new_args,
33170 ))))
33171 } else if args.is_empty() {
33172 Ok(Expression::Function(Box::new(Function::new(
33173 "ARRAY_CAT".to_string(),
33174 args,
33175 ))))
33176 } else {
33177 let mut it = args.into_iter().rev();
33178 let mut result = it.next().unwrap();
33179 for arr in it {
33180 result = Expression::Function(Box::new(Function::new(
33181 "ARRAY_CAT".to_string(),
33182 vec![arr, result],
33183 )));
33184 }
33185 Ok(result)
33186 }
33187 }
33188 DialectType::PostgreSQL => {
33189 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
33190 if args.len() <= 1 {
33191 Ok(Expression::Function(Box::new(Function::new(
33192 "ARRAY_CAT".to_string(),
33193 args,
33194 ))))
33195 } else {
33196 let mut it = args.into_iter().rev();
33197 let mut result = it.next().unwrap();
33198 for arr in it {
33199 result = Expression::Function(Box::new(Function::new(
33200 "ARRAY_CAT".to_string(),
33201 vec![arr, result],
33202 )));
33203 }
33204 Ok(result)
33205 }
33206 }
33207 DialectType::Redshift => {
33208 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
33209 if args.len() <= 2 {
33210 Ok(Expression::Function(Box::new(Function::new(
33211 "ARRAY_CONCAT".to_string(),
33212 args,
33213 ))))
33214 } else {
33215 let mut it = args.into_iter().rev();
33216 let mut result = it.next().unwrap();
33217 for arr in it {
33218 result = Expression::Function(Box::new(Function::new(
33219 "ARRAY_CONCAT".to_string(),
33220 vec![arr, result],
33221 )));
33222 }
33223 Ok(result)
33224 }
33225 }
33226 DialectType::DuckDB => {
33227 // LIST_CONCAT supports multiple args natively in DuckDB
33228 Ok(Expression::Function(Box::new(Function::new(
33229 "LIST_CONCAT".to_string(),
33230 args,
33231 ))))
33232 }
33233 _ => Ok(Expression::Function(Box::new(Function::new(
33234 "ARRAY_CONCAT".to_string(),
33235 args,
33236 )))),
33237 }
33238 }
33239
33240 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
33241 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
33242 let arg = args.remove(0);
33243 match target {
33244 DialectType::Snowflake => {
33245 let array_agg =
33246 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
33247 this: arg,
33248 distinct: false,
33249 filter: None,
33250 order_by: vec![],
33251 name: None,
33252 ignore_nulls: None,
33253 having_max: None,
33254 limit: None,
33255 inferred_type: None,
33256 }));
33257 Ok(Expression::Function(Box::new(Function::new(
33258 "ARRAY_FLATTEN".to_string(),
33259 vec![array_agg],
33260 ))))
33261 }
33262 _ => Ok(Expression::Function(Box::new(Function::new(
33263 "ARRAY_CONCAT_AGG".to_string(),
33264 vec![arg],
33265 )))),
33266 }
33267 }
33268
33269 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
33270 "MD5" if args.len() == 1 => {
33271 let arg = args.remove(0);
33272 match target {
33273 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
33274 // UNHEX(MD5(x))
33275 let md5 = Expression::Function(Box::new(Function::new(
33276 "MD5".to_string(),
33277 vec![arg],
33278 )));
33279 Ok(Expression::Function(Box::new(Function::new(
33280 "UNHEX".to_string(),
33281 vec![md5],
33282 ))))
33283 }
33284 DialectType::Snowflake => {
33285 // MD5_BINARY(x)
33286 Ok(Expression::Function(Box::new(Function::new(
33287 "MD5_BINARY".to_string(),
33288 vec![arg],
33289 ))))
33290 }
33291 _ => Ok(Expression::Function(Box::new(Function::new(
33292 "MD5".to_string(),
33293 vec![arg],
33294 )))),
33295 }
33296 }
33297
33298 "SHA1" if args.len() == 1 => {
33299 let arg = args.remove(0);
33300 match target {
33301 DialectType::DuckDB => {
33302 // UNHEX(SHA1(x))
33303 let sha1 = Expression::Function(Box::new(Function::new(
33304 "SHA1".to_string(),
33305 vec![arg],
33306 )));
33307 Ok(Expression::Function(Box::new(Function::new(
33308 "UNHEX".to_string(),
33309 vec![sha1],
33310 ))))
33311 }
33312 _ => Ok(Expression::Function(Box::new(Function::new(
33313 "SHA1".to_string(),
33314 vec![arg],
33315 )))),
33316 }
33317 }
33318
33319 "SHA256" if args.len() == 1 => {
33320 let arg = args.remove(0);
33321 match target {
33322 DialectType::DuckDB => {
33323 // UNHEX(SHA256(x))
33324 let sha = Expression::Function(Box::new(Function::new(
33325 "SHA256".to_string(),
33326 vec![arg],
33327 )));
33328 Ok(Expression::Function(Box::new(Function::new(
33329 "UNHEX".to_string(),
33330 vec![sha],
33331 ))))
33332 }
33333 DialectType::Snowflake => {
33334 // SHA2_BINARY(x, 256)
33335 Ok(Expression::Function(Box::new(Function::new(
33336 "SHA2_BINARY".to_string(),
33337 vec![arg, Expression::number(256)],
33338 ))))
33339 }
33340 DialectType::Redshift | DialectType::Spark => {
33341 // SHA2(x, 256)
33342 Ok(Expression::Function(Box::new(Function::new(
33343 "SHA2".to_string(),
33344 vec![arg, Expression::number(256)],
33345 ))))
33346 }
33347 _ => Ok(Expression::Function(Box::new(Function::new(
33348 "SHA256".to_string(),
33349 vec![arg],
33350 )))),
33351 }
33352 }
33353
33354 "SHA512" if args.len() == 1 => {
33355 let arg = args.remove(0);
33356 match target {
33357 DialectType::Snowflake => {
33358 // SHA2_BINARY(x, 512)
33359 Ok(Expression::Function(Box::new(Function::new(
33360 "SHA2_BINARY".to_string(),
33361 vec![arg, Expression::number(512)],
33362 ))))
33363 }
33364 DialectType::Redshift | DialectType::Spark => {
33365 // SHA2(x, 512)
33366 Ok(Expression::Function(Box::new(Function::new(
33367 "SHA2".to_string(),
33368 vec![arg, Expression::number(512)],
33369 ))))
33370 }
33371 _ => Ok(Expression::Function(Box::new(Function::new(
33372 "SHA512".to_string(),
33373 vec![arg],
33374 )))),
33375 }
33376 }
33377
33378 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
33379 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
33380 let str_expr = args.remove(0);
33381 let pattern = args.remove(0);
33382
33383 // Check if pattern contains capturing groups (parentheses)
33384 let has_groups = match &pattern {
33385 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
33386 let Literal::String(s) = lit.as_ref() else {
33387 unreachable!()
33388 };
33389 s.contains('(') && s.contains(')')
33390 }
33391 _ => false,
33392 };
33393
33394 match target {
33395 DialectType::DuckDB => {
33396 let group = if has_groups {
33397 Expression::number(1)
33398 } else {
33399 Expression::number(0)
33400 };
33401 Ok(Expression::Function(Box::new(Function::new(
33402 "REGEXP_EXTRACT_ALL".to_string(),
33403 vec![str_expr, pattern, group],
33404 ))))
33405 }
33406 DialectType::Spark | DialectType::Databricks => {
33407 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
33408 if has_groups {
33409 Ok(Expression::Function(Box::new(Function::new(
33410 "REGEXP_EXTRACT_ALL".to_string(),
33411 vec![str_expr, pattern],
33412 ))))
33413 } else {
33414 Ok(Expression::Function(Box::new(Function::new(
33415 "REGEXP_EXTRACT_ALL".to_string(),
33416 vec![str_expr, pattern, Expression::number(0)],
33417 ))))
33418 }
33419 }
33420 DialectType::Presto | DialectType::Trino => {
33421 if has_groups {
33422 Ok(Expression::Function(Box::new(Function::new(
33423 "REGEXP_EXTRACT_ALL".to_string(),
33424 vec![str_expr, pattern, Expression::number(1)],
33425 ))))
33426 } else {
33427 Ok(Expression::Function(Box::new(Function::new(
33428 "REGEXP_EXTRACT_ALL".to_string(),
33429 vec![str_expr, pattern],
33430 ))))
33431 }
33432 }
33433 DialectType::Snowflake => {
33434 if has_groups {
33435 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
33436 Ok(Expression::Function(Box::new(Function::new(
33437 "REGEXP_EXTRACT_ALL".to_string(),
33438 vec![
33439 str_expr,
33440 pattern,
33441 Expression::number(1),
33442 Expression::number(1),
33443 Expression::Literal(Box::new(Literal::String("c".to_string()))),
33444 Expression::number(1),
33445 ],
33446 ))))
33447 } else {
33448 Ok(Expression::Function(Box::new(Function::new(
33449 "REGEXP_EXTRACT_ALL".to_string(),
33450 vec![str_expr, pattern],
33451 ))))
33452 }
33453 }
33454 _ => Ok(Expression::Function(Box::new(Function::new(
33455 "REGEXP_EXTRACT_ALL".to_string(),
33456 vec![str_expr, pattern],
33457 )))),
33458 }
33459 }
33460
33461 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
33462 "MOD" if args.len() == 2 => {
33463 match target {
33464 DialectType::PostgreSQL
33465 | DialectType::DuckDB
33466 | DialectType::Presto
33467 | DialectType::Trino
33468 | DialectType::Athena
33469 | DialectType::Snowflake => {
33470 let x = args.remove(0);
33471 let y = args.remove(0);
33472 // Wrap complex expressions in parens to preserve precedence
33473 let needs_paren = |e: &Expression| {
33474 matches!(
33475 e,
33476 Expression::Add(_)
33477 | Expression::Sub(_)
33478 | Expression::Mul(_)
33479 | Expression::Div(_)
33480 )
33481 };
33482 let x = if needs_paren(&x) {
33483 Expression::Paren(Box::new(crate::expressions::Paren {
33484 this: x,
33485 trailing_comments: vec![],
33486 }))
33487 } else {
33488 x
33489 };
33490 let y = if needs_paren(&y) {
33491 Expression::Paren(Box::new(crate::expressions::Paren {
33492 this: y,
33493 trailing_comments: vec![],
33494 }))
33495 } else {
33496 y
33497 };
33498 Ok(Expression::Mod(Box::new(
33499 crate::expressions::BinaryOp::new(x, y),
33500 )))
33501 }
33502 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
33503 // Hive/Spark: a % b
33504 let x = args.remove(0);
33505 let y = args.remove(0);
33506 let needs_paren = |e: &Expression| {
33507 matches!(
33508 e,
33509 Expression::Add(_)
33510 | Expression::Sub(_)
33511 | Expression::Mul(_)
33512 | Expression::Div(_)
33513 )
33514 };
33515 let x = if needs_paren(&x) {
33516 Expression::Paren(Box::new(crate::expressions::Paren {
33517 this: x,
33518 trailing_comments: vec![],
33519 }))
33520 } else {
33521 x
33522 };
33523 let y = if needs_paren(&y) {
33524 Expression::Paren(Box::new(crate::expressions::Paren {
33525 this: y,
33526 trailing_comments: vec![],
33527 }))
33528 } else {
33529 y
33530 };
33531 Ok(Expression::Mod(Box::new(
33532 crate::expressions::BinaryOp::new(x, y),
33533 )))
33534 }
33535 _ => Ok(Expression::Function(Box::new(Function::new(
33536 "MOD".to_string(),
33537 args,
33538 )))),
33539 }
33540 }
33541
33542 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
33543 "ARRAY_FILTER" if args.len() == 2 => {
33544 let name = match target {
33545 DialectType::DuckDB => "LIST_FILTER",
33546 DialectType::StarRocks => "ARRAY_FILTER",
33547 _ => "FILTER",
33548 };
33549 Ok(Expression::Function(Box::new(Function::new(
33550 name.to_string(),
33551 args,
33552 ))))
33553 }
33554 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
33555 "FILTER" if args.len() == 2 => {
33556 let name = match target {
33557 DialectType::DuckDB => "LIST_FILTER",
33558 DialectType::StarRocks => "ARRAY_FILTER",
33559 _ => "FILTER",
33560 };
33561 Ok(Expression::Function(Box::new(Function::new(
33562 name.to_string(),
33563 args,
33564 ))))
33565 }
33566 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
33567 "REDUCE" if args.len() >= 3 => {
33568 let name = match target {
33569 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
33570 _ => "REDUCE",
33571 };
33572 Ok(Expression::Function(Box::new(Function::new(
33573 name.to_string(),
33574 args,
33575 ))))
33576 }
33577 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
33578 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
33579 Function::new("ARRAY_REVERSE".to_string(), args),
33580 ))),
33581
33582 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
33583 "CONCAT" if args.len() > 2 => match target {
33584 DialectType::DuckDB => {
33585 let mut it = args.into_iter();
33586 let mut result = it.next().unwrap();
33587 for arg in it {
33588 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
33589 this: Box::new(result),
33590 expression: Box::new(arg),
33591 safe: None,
33592 }));
33593 }
33594 Ok(result)
33595 }
33596 _ => Ok(Expression::Function(Box::new(Function::new(
33597 "CONCAT".to_string(),
33598 args,
33599 )))),
33600 },
33601
33602 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
33603 "GENERATE_DATE_ARRAY" => {
33604 if matches!(target, DialectType::BigQuery) {
33605 // BQ->BQ: add default interval if not present
33606 if args.len() == 2 {
33607 let start = args.remove(0);
33608 let end = args.remove(0);
33609 let default_interval =
33610 Expression::Interval(Box::new(crate::expressions::Interval {
33611 this: Some(Expression::Literal(Box::new(Literal::String(
33612 "1".to_string(),
33613 )))),
33614 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33615 unit: crate::expressions::IntervalUnit::Day,
33616 use_plural: false,
33617 }),
33618 }));
33619 Ok(Expression::Function(Box::new(Function::new(
33620 "GENERATE_DATE_ARRAY".to_string(),
33621 vec![start, end, default_interval],
33622 ))))
33623 } else {
33624 Ok(Expression::Function(Box::new(Function::new(
33625 "GENERATE_DATE_ARRAY".to_string(),
33626 args,
33627 ))))
33628 }
33629 } else if matches!(target, DialectType::DuckDB) {
33630 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
33631 let start = args.get(0).cloned();
33632 let end = args.get(1).cloned();
33633 let step = args.get(2).cloned().or_else(|| {
33634 Some(Expression::Interval(Box::new(
33635 crate::expressions::Interval {
33636 this: Some(Expression::Literal(Box::new(Literal::String(
33637 "1".to_string(),
33638 )))),
33639 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33640 unit: crate::expressions::IntervalUnit::Day,
33641 use_plural: false,
33642 }),
33643 },
33644 )))
33645 });
33646
33647 // Wrap start/end in CAST(... AS DATE) only for string literals
33648 let maybe_cast_date = |expr: Expression| -> Expression {
33649 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
33650 {
33651 Expression::Cast(Box::new(Cast {
33652 this: expr,
33653 to: DataType::Date,
33654 trailing_comments: vec![],
33655 double_colon_syntax: false,
33656 format: None,
33657 default: None,
33658 inferred_type: None,
33659 }))
33660 } else {
33661 expr
33662 }
33663 };
33664 let cast_start = start.map(maybe_cast_date);
33665 let cast_end = end.map(maybe_cast_date);
33666
33667 let gen_series =
33668 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
33669 start: cast_start.map(Box::new),
33670 end: cast_end.map(Box::new),
33671 step: step.map(Box::new),
33672 is_end_exclusive: None,
33673 }));
33674
33675 // Wrap in CAST(... AS DATE[])
33676 Ok(Expression::Cast(Box::new(Cast {
33677 this: gen_series,
33678 to: DataType::Array {
33679 element_type: Box::new(DataType::Date),
33680 dimension: None,
33681 },
33682 trailing_comments: vec![],
33683 double_colon_syntax: false,
33684 format: None,
33685 default: None,
33686 inferred_type: None,
33687 })))
33688 } else if matches!(target, DialectType::Snowflake) {
33689 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
33690 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
33691 if args.len() == 2 {
33692 let start = args.remove(0);
33693 let end = args.remove(0);
33694 let default_interval =
33695 Expression::Interval(Box::new(crate::expressions::Interval {
33696 this: Some(Expression::Literal(Box::new(Literal::String(
33697 "1".to_string(),
33698 )))),
33699 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33700 unit: crate::expressions::IntervalUnit::Day,
33701 use_plural: false,
33702 }),
33703 }));
33704 Ok(Expression::Function(Box::new(Function::new(
33705 "GENERATE_DATE_ARRAY".to_string(),
33706 vec![start, end, default_interval],
33707 ))))
33708 } else {
33709 Ok(Expression::Function(Box::new(Function::new(
33710 "GENERATE_DATE_ARRAY".to_string(),
33711 args,
33712 ))))
33713 }
33714 } else {
33715 // Convert to GenerateSeries for other targets
33716 let start = args.get(0).cloned();
33717 let end = args.get(1).cloned();
33718 let step = args.get(2).cloned().or_else(|| {
33719 Some(Expression::Interval(Box::new(
33720 crate::expressions::Interval {
33721 this: Some(Expression::Literal(Box::new(Literal::String(
33722 "1".to_string(),
33723 )))),
33724 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33725 unit: crate::expressions::IntervalUnit::Day,
33726 use_plural: false,
33727 }),
33728 },
33729 )))
33730 });
33731 Ok(Expression::GenerateSeries(Box::new(
33732 crate::expressions::GenerateSeries {
33733 start: start.map(Box::new),
33734 end: end.map(Box::new),
33735 step: step.map(Box::new),
33736 is_end_exclusive: None,
33737 },
33738 )))
33739 }
33740 }
33741
33742 // PARSE_DATE(format, str) -> target-specific
33743 "PARSE_DATE" if args.len() == 2 => {
33744 let format = args.remove(0);
33745 let str_expr = args.remove(0);
33746 match target {
33747 DialectType::DuckDB => {
33748 // CAST(STRPTIME(str, duck_format) AS DATE)
33749 let duck_format = Self::bq_format_to_duckdb(&format);
33750 let strptime = Expression::Function(Box::new(Function::new(
33751 "STRPTIME".to_string(),
33752 vec![str_expr, duck_format],
33753 )));
33754 Ok(Expression::Cast(Box::new(Cast {
33755 this: strptime,
33756 to: DataType::Date,
33757 trailing_comments: vec![],
33758 double_colon_syntax: false,
33759 format: None,
33760 default: None,
33761 inferred_type: None,
33762 })))
33763 }
33764 DialectType::Snowflake => {
33765 // _POLYGLOT_DATE(str, snowflake_format)
33766 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
33767 let sf_format = Self::bq_format_to_snowflake(&format);
33768 Ok(Expression::Function(Box::new(Function::new(
33769 "_POLYGLOT_DATE".to_string(),
33770 vec![str_expr, sf_format],
33771 ))))
33772 }
33773 _ => Ok(Expression::Function(Box::new(Function::new(
33774 "PARSE_DATE".to_string(),
33775 vec![format, str_expr],
33776 )))),
33777 }
33778 }
33779
33780 // PARSE_TIMESTAMP(format, str) -> target-specific
33781 "PARSE_TIMESTAMP" if args.len() >= 2 => {
33782 let format = args.remove(0);
33783 let str_expr = args.remove(0);
33784 let tz = if !args.is_empty() {
33785 Some(args.remove(0))
33786 } else {
33787 None
33788 };
33789 match target {
33790 DialectType::DuckDB => {
33791 let duck_format = Self::bq_format_to_duckdb(&format);
33792 let strptime = Expression::Function(Box::new(Function::new(
33793 "STRPTIME".to_string(),
33794 vec![str_expr, duck_format],
33795 )));
33796 Ok(strptime)
33797 }
33798 _ => {
33799 let mut result_args = vec![format, str_expr];
33800 if let Some(tz_arg) = tz {
33801 result_args.push(tz_arg);
33802 }
33803 Ok(Expression::Function(Box::new(Function::new(
33804 "PARSE_TIMESTAMP".to_string(),
33805 result_args,
33806 ))))
33807 }
33808 }
33809 }
33810
33811 // FORMAT_DATE(format, date) -> target-specific
33812 "FORMAT_DATE" if args.len() == 2 => {
33813 let format = args.remove(0);
33814 let date_expr = args.remove(0);
33815 match target {
33816 DialectType::DuckDB => {
33817 // STRFTIME(CAST(date AS DATE), format)
33818 let cast_date = Expression::Cast(Box::new(Cast {
33819 this: date_expr,
33820 to: DataType::Date,
33821 trailing_comments: vec![],
33822 double_colon_syntax: false,
33823 format: None,
33824 default: None,
33825 inferred_type: None,
33826 }));
33827 Ok(Expression::Function(Box::new(Function::new(
33828 "STRFTIME".to_string(),
33829 vec![cast_date, format],
33830 ))))
33831 }
33832 _ => Ok(Expression::Function(Box::new(Function::new(
33833 "FORMAT_DATE".to_string(),
33834 vec![format, date_expr],
33835 )))),
33836 }
33837 }
33838
33839 // FORMAT_DATETIME(format, datetime) -> target-specific
33840 "FORMAT_DATETIME" if args.len() == 2 => {
33841 let format = args.remove(0);
33842 let dt_expr = args.remove(0);
33843
33844 if matches!(target, DialectType::BigQuery) {
33845 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
33846 let norm_format = Self::bq_format_normalize_bq(&format);
33847 // Also strip DATETIME keyword from typed literals
33848 let norm_dt = match dt_expr {
33849 Expression::Literal(lit)
33850 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
33851 {
33852 let Literal::Timestamp(s) = lit.as_ref() else {
33853 unreachable!()
33854 };
33855 Expression::Cast(Box::new(Cast {
33856 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
33857 to: DataType::Custom {
33858 name: "DATETIME".to_string(),
33859 },
33860 trailing_comments: vec![],
33861 double_colon_syntax: false,
33862 format: None,
33863 default: None,
33864 inferred_type: None,
33865 }))
33866 }
33867 other => other,
33868 };
33869 return Ok(Expression::Function(Box::new(Function::new(
33870 "FORMAT_DATETIME".to_string(),
33871 vec![norm_format, norm_dt],
33872 ))));
33873 }
33874
33875 match target {
33876 DialectType::DuckDB => {
33877 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
33878 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
33879 let duck_format = Self::bq_format_to_duckdb(&format);
33880 Ok(Expression::Function(Box::new(Function::new(
33881 "STRFTIME".to_string(),
33882 vec![cast_dt, duck_format],
33883 ))))
33884 }
33885 _ => Ok(Expression::Function(Box::new(Function::new(
33886 "FORMAT_DATETIME".to_string(),
33887 vec![format, dt_expr],
33888 )))),
33889 }
33890 }
33891
33892 // FORMAT_TIMESTAMP(format, ts) -> target-specific
33893 "FORMAT_TIMESTAMP" if args.len() == 2 => {
33894 let format = args.remove(0);
33895 let ts_expr = args.remove(0);
33896 match target {
33897 DialectType::DuckDB => {
33898 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
33899 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
33900 let cast_ts = Expression::Cast(Box::new(Cast {
33901 this: cast_tstz,
33902 to: DataType::Timestamp {
33903 timezone: false,
33904 precision: None,
33905 },
33906 trailing_comments: vec![],
33907 double_colon_syntax: false,
33908 format: None,
33909 default: None,
33910 inferred_type: None,
33911 }));
33912 Ok(Expression::Function(Box::new(Function::new(
33913 "STRFTIME".to_string(),
33914 vec![cast_ts, format],
33915 ))))
33916 }
33917 DialectType::Snowflake => {
33918 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
33919 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
33920 let cast_ts = Expression::Cast(Box::new(Cast {
33921 this: cast_tstz,
33922 to: DataType::Timestamp {
33923 timezone: false,
33924 precision: None,
33925 },
33926 trailing_comments: vec![],
33927 double_colon_syntax: false,
33928 format: None,
33929 default: None,
33930 inferred_type: None,
33931 }));
33932 let sf_format = Self::bq_format_to_snowflake(&format);
33933 Ok(Expression::Function(Box::new(Function::new(
33934 "TO_CHAR".to_string(),
33935 vec![cast_ts, sf_format],
33936 ))))
33937 }
33938 _ => Ok(Expression::Function(Box::new(Function::new(
33939 "FORMAT_TIMESTAMP".to_string(),
33940 vec![format, ts_expr],
33941 )))),
33942 }
33943 }
33944
33945 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
33946 "UNIX_DATE" if args.len() == 1 => {
33947 let date = args.remove(0);
33948 match target {
33949 DialectType::DuckDB => {
33950 let epoch = Expression::Cast(Box::new(Cast {
33951 this: Expression::Literal(Box::new(Literal::String(
33952 "1970-01-01".to_string(),
33953 ))),
33954 to: DataType::Date,
33955 trailing_comments: vec![],
33956 double_colon_syntax: false,
33957 format: None,
33958 default: None,
33959 inferred_type: None,
33960 }));
33961 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
33962 // Need to convert DATE literal to CAST
33963 let norm_date = Self::date_literal_to_cast(date);
33964 Ok(Expression::Function(Box::new(Function::new(
33965 "DATE_DIFF".to_string(),
33966 vec![
33967 Expression::Literal(Box::new(Literal::String("DAY".to_string()))),
33968 epoch,
33969 norm_date,
33970 ],
33971 ))))
33972 }
33973 _ => Ok(Expression::Function(Box::new(Function::new(
33974 "UNIX_DATE".to_string(),
33975 vec![date],
33976 )))),
33977 }
33978 }
33979
33980 // UNIX_SECONDS(ts) -> target-specific
33981 "UNIX_SECONDS" if args.len() == 1 => {
33982 let ts = args.remove(0);
33983 match target {
33984 DialectType::DuckDB => {
33985 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
33986 let norm_ts = Self::ts_literal_to_cast_tz(ts);
33987 let epoch = Expression::Function(Box::new(Function::new(
33988 "EPOCH".to_string(),
33989 vec![norm_ts],
33990 )));
33991 Ok(Expression::Cast(Box::new(Cast {
33992 this: epoch,
33993 to: DataType::BigInt { length: None },
33994 trailing_comments: vec![],
33995 double_colon_syntax: false,
33996 format: None,
33997 default: None,
33998 inferred_type: None,
33999 })))
34000 }
34001 DialectType::Snowflake => {
34002 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
34003 let epoch = Expression::Cast(Box::new(Cast {
34004 this: Expression::Literal(Box::new(Literal::String(
34005 "1970-01-01 00:00:00+00".to_string(),
34006 ))),
34007 to: DataType::Timestamp {
34008 timezone: true,
34009 precision: None,
34010 },
34011 trailing_comments: vec![],
34012 double_colon_syntax: false,
34013 format: None,
34014 default: None,
34015 inferred_type: None,
34016 }));
34017 Ok(Expression::Function(Box::new(Function::new(
34018 "TIMESTAMPDIFF".to_string(),
34019 vec![
34020 Expression::Identifier(Identifier::new("SECONDS".to_string())),
34021 epoch,
34022 ts,
34023 ],
34024 ))))
34025 }
34026 _ => Ok(Expression::Function(Box::new(Function::new(
34027 "UNIX_SECONDS".to_string(),
34028 vec![ts],
34029 )))),
34030 }
34031 }
34032
34033 // UNIX_MILLIS(ts) -> target-specific
34034 "UNIX_MILLIS" if args.len() == 1 => {
34035 let ts = args.remove(0);
34036 match target {
34037 DialectType::DuckDB => {
34038 let norm_ts = Self::ts_literal_to_cast_tz(ts);
34039 Ok(Expression::Function(Box::new(Function::new(
34040 "EPOCH_MS".to_string(),
34041 vec![norm_ts],
34042 ))))
34043 }
34044 _ => Ok(Expression::Function(Box::new(Function::new(
34045 "UNIX_MILLIS".to_string(),
34046 vec![ts],
34047 )))),
34048 }
34049 }
34050
34051 // UNIX_MICROS(ts) -> target-specific
34052 "UNIX_MICROS" if args.len() == 1 => {
34053 let ts = args.remove(0);
34054 match target {
34055 DialectType::DuckDB => {
34056 let norm_ts = Self::ts_literal_to_cast_tz(ts);
34057 Ok(Expression::Function(Box::new(Function::new(
34058 "EPOCH_US".to_string(),
34059 vec![norm_ts],
34060 ))))
34061 }
34062 _ => Ok(Expression::Function(Box::new(Function::new(
34063 "UNIX_MICROS".to_string(),
34064 vec![ts],
34065 )))),
34066 }
34067 }
34068
34069 // INSTR(str, substr) -> target-specific
34070 "INSTR" => {
34071 if matches!(target, DialectType::BigQuery) {
34072 // BQ->BQ: keep as INSTR
34073 Ok(Expression::Function(Box::new(Function::new(
34074 "INSTR".to_string(),
34075 args,
34076 ))))
34077 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
34078 // Snowflake: CHARINDEX(substr, str) - swap args
34079 let str_expr = args.remove(0);
34080 let substr = args.remove(0);
34081 Ok(Expression::Function(Box::new(Function::new(
34082 "CHARINDEX".to_string(),
34083 vec![substr, str_expr],
34084 ))))
34085 } else {
34086 // Keep as INSTR for other targets
34087 Ok(Expression::Function(Box::new(Function::new(
34088 "INSTR".to_string(),
34089 args,
34090 ))))
34091 }
34092 }
34093
34094 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
34095 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
34096 if matches!(target, DialectType::BigQuery) {
34097 // BQ->BQ: always output with parens (function form), keep any timezone arg
34098 Ok(Expression::Function(Box::new(Function::new(name, args))))
34099 } else if name == "CURRENT_DATE" && args.len() == 1 {
34100 // CURRENT_DATE('UTC') - has timezone arg
34101 let tz_arg = args.remove(0);
34102 match target {
34103 DialectType::DuckDB => {
34104 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
34105 let ct = Expression::CurrentTimestamp(
34106 crate::expressions::CurrentTimestamp {
34107 precision: None,
34108 sysdate: false,
34109 },
34110 );
34111 let at_tz =
34112 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34113 this: ct,
34114 zone: tz_arg,
34115 }));
34116 Ok(Expression::Cast(Box::new(Cast {
34117 this: at_tz,
34118 to: DataType::Date,
34119 trailing_comments: vec![],
34120 double_colon_syntax: false,
34121 format: None,
34122 default: None,
34123 inferred_type: None,
34124 })))
34125 }
34126 DialectType::Snowflake => {
34127 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
34128 let ct = Expression::Function(Box::new(Function::new(
34129 "CURRENT_TIMESTAMP".to_string(),
34130 vec![],
34131 )));
34132 let convert = Expression::Function(Box::new(Function::new(
34133 "CONVERT_TIMEZONE".to_string(),
34134 vec![tz_arg, ct],
34135 )));
34136 Ok(Expression::Cast(Box::new(Cast {
34137 this: convert,
34138 to: DataType::Date,
34139 trailing_comments: vec![],
34140 double_colon_syntax: false,
34141 format: None,
34142 default: None,
34143 inferred_type: None,
34144 })))
34145 }
34146 _ => {
34147 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
34148 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
34149 Ok(Expression::AtTimeZone(Box::new(
34150 crate::expressions::AtTimeZone {
34151 this: cd,
34152 zone: tz_arg,
34153 },
34154 )))
34155 }
34156 }
34157 } else if (name == "CURRENT_TIMESTAMP"
34158 || name == "CURRENT_TIME"
34159 || name == "CURRENT_DATE")
34160 && args.is_empty()
34161 && matches!(
34162 target,
34163 DialectType::PostgreSQL
34164 | DialectType::DuckDB
34165 | DialectType::Presto
34166 | DialectType::Trino
34167 )
34168 {
34169 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
34170 if name == "CURRENT_TIMESTAMP" {
34171 Ok(Expression::CurrentTimestamp(
34172 crate::expressions::CurrentTimestamp {
34173 precision: None,
34174 sysdate: false,
34175 },
34176 ))
34177 } else if name == "CURRENT_DATE" {
34178 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
34179 } else {
34180 // CURRENT_TIME
34181 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
34182 precision: None,
34183 }))
34184 }
34185 } else {
34186 // All other targets: keep as function (with parens)
34187 Ok(Expression::Function(Box::new(Function::new(name, args))))
34188 }
34189 }
34190
34191 // JSON_QUERY(json, path) -> target-specific
34192 "JSON_QUERY" if args.len() == 2 => {
34193 match target {
34194 DialectType::DuckDB | DialectType::SQLite => {
34195 // json -> path syntax
34196 let json_expr = args.remove(0);
34197 let path = args.remove(0);
34198 Ok(Expression::JsonExtract(Box::new(
34199 crate::expressions::JsonExtractFunc {
34200 this: json_expr,
34201 path,
34202 returning: None,
34203 arrow_syntax: true,
34204 hash_arrow_syntax: false,
34205 wrapper_option: None,
34206 quotes_option: None,
34207 on_scalar_string: false,
34208 on_error: None,
34209 },
34210 )))
34211 }
34212 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
34213 Ok(Expression::Function(Box::new(Function::new(
34214 "GET_JSON_OBJECT".to_string(),
34215 args,
34216 ))))
34217 }
34218 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
34219 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
34220 )),
34221 _ => Ok(Expression::Function(Box::new(Function::new(
34222 "JSON_QUERY".to_string(),
34223 args,
34224 )))),
34225 }
34226 }
34227
34228 // JSON_VALUE_ARRAY(json, path) -> target-specific
34229 "JSON_VALUE_ARRAY" if args.len() == 2 => {
34230 match target {
34231 DialectType::DuckDB => {
34232 // CAST(json -> path AS TEXT[])
34233 let json_expr = args.remove(0);
34234 let path = args.remove(0);
34235 let arrow = Expression::JsonExtract(Box::new(
34236 crate::expressions::JsonExtractFunc {
34237 this: json_expr,
34238 path,
34239 returning: None,
34240 arrow_syntax: true,
34241 hash_arrow_syntax: false,
34242 wrapper_option: None,
34243 quotes_option: None,
34244 on_scalar_string: false,
34245 on_error: None,
34246 },
34247 ));
34248 Ok(Expression::Cast(Box::new(Cast {
34249 this: arrow,
34250 to: DataType::Array {
34251 element_type: Box::new(DataType::Text),
34252 dimension: None,
34253 },
34254 trailing_comments: vec![],
34255 double_colon_syntax: false,
34256 format: None,
34257 default: None,
34258 inferred_type: None,
34259 })))
34260 }
34261 DialectType::Snowflake => {
34262 let json_expr = args.remove(0);
34263 let path_expr = args.remove(0);
34264 // Convert JSON path from $.path to just path
34265 let sf_path = if let Expression::Literal(ref lit) = path_expr {
34266 if let Literal::String(ref s) = lit.as_ref() {
34267 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
34268 Expression::Literal(Box::new(Literal::String(trimmed.to_string())))
34269 } else {
34270 path_expr.clone()
34271 }
34272 } else {
34273 path_expr
34274 };
34275 let parse_json = Expression::Function(Box::new(Function::new(
34276 "PARSE_JSON".to_string(),
34277 vec![json_expr],
34278 )));
34279 let get_path = Expression::Function(Box::new(Function::new(
34280 "GET_PATH".to_string(),
34281 vec![parse_json, sf_path],
34282 )));
34283 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
34284 let cast_expr = Expression::Cast(Box::new(Cast {
34285 this: Expression::Identifier(Identifier::new("x")),
34286 to: DataType::VarChar {
34287 length: None,
34288 parenthesized_length: false,
34289 },
34290 trailing_comments: vec![],
34291 double_colon_syntax: false,
34292 format: None,
34293 default: None,
34294 inferred_type: None,
34295 }));
34296 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
34297 parameters: vec![Identifier::new("x")],
34298 body: cast_expr,
34299 colon: false,
34300 parameter_types: vec![],
34301 }));
34302 Ok(Expression::Function(Box::new(Function::new(
34303 "TRANSFORM".to_string(),
34304 vec![get_path, lambda],
34305 ))))
34306 }
34307 _ => Ok(Expression::Function(Box::new(Function::new(
34308 "JSON_VALUE_ARRAY".to_string(),
34309 args,
34310 )))),
34311 }
34312 }
34313
34314 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
34315 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
34316 // This is different from Hive/Spark where 3rd arg is "group_index"
34317 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
34318 match target {
34319 DialectType::DuckDB
34320 | DialectType::Presto
34321 | DialectType::Trino
34322 | DialectType::Athena => {
34323 if args.len() == 2 {
34324 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
34325 args.push(Expression::number(1));
34326 Ok(Expression::Function(Box::new(Function::new(
34327 "REGEXP_EXTRACT".to_string(),
34328 args,
34329 ))))
34330 } else if args.len() == 3 {
34331 let val = args.remove(0);
34332 let regex = args.remove(0);
34333 let position = args.remove(0);
34334 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
34335 if is_pos_1 {
34336 Ok(Expression::Function(Box::new(Function::new(
34337 "REGEXP_EXTRACT".to_string(),
34338 vec![val, regex, Expression::number(1)],
34339 ))))
34340 } else {
34341 let substring_expr = Expression::Function(Box::new(Function::new(
34342 "SUBSTRING".to_string(),
34343 vec![val, position],
34344 )));
34345 let nullif_expr = Expression::Function(Box::new(Function::new(
34346 "NULLIF".to_string(),
34347 vec![
34348 substring_expr,
34349 Expression::Literal(Box::new(Literal::String(
34350 String::new(),
34351 ))),
34352 ],
34353 )));
34354 Ok(Expression::Function(Box::new(Function::new(
34355 "REGEXP_EXTRACT".to_string(),
34356 vec![nullif_expr, regex, Expression::number(1)],
34357 ))))
34358 }
34359 } else if args.len() == 4 {
34360 let val = args.remove(0);
34361 let regex = args.remove(0);
34362 let position = args.remove(0);
34363 let occurrence = args.remove(0);
34364 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
34365 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
34366 if is_pos_1 && is_occ_1 {
34367 Ok(Expression::Function(Box::new(Function::new(
34368 "REGEXP_EXTRACT".to_string(),
34369 vec![val, regex, Expression::number(1)],
34370 ))))
34371 } else {
34372 let subject = if is_pos_1 {
34373 val
34374 } else {
34375 let substring_expr = Expression::Function(Box::new(
34376 Function::new("SUBSTRING".to_string(), vec![val, position]),
34377 ));
34378 Expression::Function(Box::new(Function::new(
34379 "NULLIF".to_string(),
34380 vec![
34381 substring_expr,
34382 Expression::Literal(Box::new(Literal::String(
34383 String::new(),
34384 ))),
34385 ],
34386 )))
34387 };
34388 let extract_all = Expression::Function(Box::new(Function::new(
34389 "REGEXP_EXTRACT_ALL".to_string(),
34390 vec![subject, regex, Expression::number(1)],
34391 )));
34392 Ok(Expression::Function(Box::new(Function::new(
34393 "ARRAY_EXTRACT".to_string(),
34394 vec![extract_all, occurrence],
34395 ))))
34396 }
34397 } else {
34398 Ok(Expression::Function(Box::new(Function {
34399 name: f.name,
34400 args,
34401 distinct: f.distinct,
34402 trailing_comments: f.trailing_comments,
34403 use_bracket_syntax: f.use_bracket_syntax,
34404 no_parens: f.no_parens,
34405 quoted: f.quoted,
34406 span: None,
34407 inferred_type: None,
34408 })))
34409 }
34410 }
34411 DialectType::Snowflake => {
34412 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
34413 Ok(Expression::Function(Box::new(Function::new(
34414 "REGEXP_SUBSTR".to_string(),
34415 args,
34416 ))))
34417 }
34418 _ => {
34419 // For other targets (Hive/Spark/BigQuery): pass through as-is
34420 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
34421 Ok(Expression::Function(Box::new(Function {
34422 name: f.name,
34423 args,
34424 distinct: f.distinct,
34425 trailing_comments: f.trailing_comments,
34426 use_bracket_syntax: f.use_bracket_syntax,
34427 no_parens: f.no_parens,
34428 quoted: f.quoted,
34429 span: None,
34430 inferred_type: None,
34431 })))
34432 }
34433 }
34434 }
34435
34436 // BigQuery STRUCT(args) -> target-specific struct expression
34437 "STRUCT" => {
34438 // Convert Function args to Struct fields
34439 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
34440 for (i, arg) in args.into_iter().enumerate() {
34441 match arg {
34442 Expression::Alias(a) => {
34443 // Named field: expr AS name
34444 fields.push((Some(a.alias.name.clone()), a.this));
34445 }
34446 other => {
34447 // Unnamed field: for Spark/Hive, keep as None
34448 // For Snowflake, auto-name as _N
34449 // For DuckDB, use column name for column refs, _N for others
34450 if matches!(target, DialectType::Snowflake) {
34451 fields.push((Some(format!("_{}", i)), other));
34452 } else if matches!(target, DialectType::DuckDB) {
34453 let auto_name = match &other {
34454 Expression::Column(col) => col.name.name.clone(),
34455 _ => format!("_{}", i),
34456 };
34457 fields.push((Some(auto_name), other));
34458 } else {
34459 fields.push((None, other));
34460 }
34461 }
34462 }
34463 }
34464
34465 match target {
34466 DialectType::Snowflake => {
34467 // OBJECT_CONSTRUCT('name', value, ...)
34468 let mut oc_args = Vec::new();
34469 for (name, val) in &fields {
34470 if let Some(n) = name {
34471 oc_args.push(Expression::Literal(Box::new(Literal::String(
34472 n.clone(),
34473 ))));
34474 oc_args.push(val.clone());
34475 } else {
34476 oc_args.push(val.clone());
34477 }
34478 }
34479 Ok(Expression::Function(Box::new(Function::new(
34480 "OBJECT_CONSTRUCT".to_string(),
34481 oc_args,
34482 ))))
34483 }
34484 DialectType::DuckDB => {
34485 // {'name': value, ...}
34486 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
34487 fields,
34488 })))
34489 }
34490 DialectType::Hive => {
34491 // STRUCT(val1, val2, ...) - strip aliases
34492 let hive_fields: Vec<(Option<String>, Expression)> =
34493 fields.into_iter().map(|(_, v)| (None, v)).collect();
34494 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
34495 fields: hive_fields,
34496 })))
34497 }
34498 DialectType::Spark | DialectType::Databricks => {
34499 // Use Expression::Struct to bypass Spark target transform auto-naming
34500 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
34501 fields,
34502 })))
34503 }
34504 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
34505 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
34506 let all_named =
34507 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
34508 let all_types_inferable = all_named
34509 && fields
34510 .iter()
34511 .all(|(_, val)| Self::can_infer_presto_type(val));
34512 let row_args: Vec<Expression> =
34513 fields.iter().map(|(_, v)| v.clone()).collect();
34514 let row_expr = Expression::Function(Box::new(Function::new(
34515 "ROW".to_string(),
34516 row_args,
34517 )));
34518 if all_named && all_types_inferable {
34519 // Build ROW type with inferred types
34520 let mut row_type_fields = Vec::new();
34521 for (name, val) in &fields {
34522 if let Some(n) = name {
34523 let type_str = Self::infer_sql_type_for_presto(val);
34524 row_type_fields.push(crate::expressions::StructField::new(
34525 n.clone(),
34526 crate::expressions::DataType::Custom { name: type_str },
34527 ));
34528 }
34529 }
34530 let row_type = crate::expressions::DataType::Struct {
34531 fields: row_type_fields,
34532 nested: true,
34533 };
34534 Ok(Expression::Cast(Box::new(Cast {
34535 this: row_expr,
34536 to: row_type,
34537 trailing_comments: Vec::new(),
34538 double_colon_syntax: false,
34539 format: None,
34540 default: None,
34541 inferred_type: None,
34542 })))
34543 } else {
34544 Ok(row_expr)
34545 }
34546 }
34547 _ => {
34548 // Default: keep as STRUCT function with original args
34549 let mut new_args = Vec::new();
34550 for (name, val) in fields {
34551 if let Some(n) = name {
34552 new_args.push(Expression::Alias(Box::new(
34553 crate::expressions::Alias::new(val, Identifier::new(n)),
34554 )));
34555 } else {
34556 new_args.push(val);
34557 }
34558 }
34559 Ok(Expression::Function(Box::new(Function::new(
34560 "STRUCT".to_string(),
34561 new_args,
34562 ))))
34563 }
34564 }
34565 }
34566
34567 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
34568 "ROUND" if args.len() == 3 => {
34569 let x = args.remove(0);
34570 let n = args.remove(0);
34571 let mode = args.remove(0);
34572 // Check if mode is 'ROUND_HALF_EVEN'
34573 let is_half_even = matches!(&mode, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN")));
34574 if is_half_even && matches!(target, DialectType::DuckDB) {
34575 Ok(Expression::Function(Box::new(Function::new(
34576 "ROUND_EVEN".to_string(),
34577 vec![x, n],
34578 ))))
34579 } else {
34580 // Pass through with all args
34581 Ok(Expression::Function(Box::new(Function::new(
34582 "ROUND".to_string(),
34583 vec![x, n, mode],
34584 ))))
34585 }
34586 }
34587
34588 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
34589 "MAKE_INTERVAL" => {
34590 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
34591 // The positional args are: year, month
34592 // Named args are: day =>, minute =>, etc.
34593 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
34594 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
34595 // For BigQuery->BigQuery: reorder named args (day before minute)
34596 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
34597 let mut parts: Vec<(String, String)> = Vec::new();
34598 let mut pos_idx = 0;
34599 let pos_units = ["year", "month"];
34600 for arg in &args {
34601 if let Expression::NamedArgument(na) = arg {
34602 // Named arg like minute => 5
34603 let unit = na.name.name.clone();
34604 if let Expression::Literal(lit) = &na.value {
34605 if let Literal::Number(n) = lit.as_ref() {
34606 parts.push((unit, n.clone()));
34607 }
34608 }
34609 } else if pos_idx < pos_units.len() {
34610 if let Expression::Literal(lit) = arg {
34611 if let Literal::Number(n) = lit.as_ref() {
34612 parts.push((pos_units[pos_idx].to_string(), n.clone()));
34613 }
34614 }
34615 pos_idx += 1;
34616 }
34617 }
34618 // Don't sort - preserve original argument order
34619 let separator = if matches!(target, DialectType::Snowflake) {
34620 ", "
34621 } else {
34622 " "
34623 };
34624 let interval_str = parts
34625 .iter()
34626 .map(|(u, v)| format!("{} {}", v, u))
34627 .collect::<Vec<_>>()
34628 .join(separator);
34629 Ok(Expression::Interval(Box::new(
34630 crate::expressions::Interval {
34631 this: Some(Expression::Literal(Box::new(Literal::String(
34632 interval_str,
34633 )))),
34634 unit: None,
34635 },
34636 )))
34637 } else if matches!(target, DialectType::BigQuery) {
34638 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
34639 let mut positional = Vec::new();
34640 let mut named: Vec<(
34641 String,
34642 Expression,
34643 crate::expressions::NamedArgSeparator,
34644 )> = Vec::new();
34645 let _pos_units = ["year", "month"];
34646 let mut _pos_idx = 0;
34647 for arg in args {
34648 if let Expression::NamedArgument(na) = arg {
34649 named.push((na.name.name.clone(), na.value, na.separator));
34650 } else {
34651 positional.push(arg);
34652 _pos_idx += 1;
34653 }
34654 }
34655 // Sort named args by: day, hour, minute, second
34656 let unit_order = |u: &str| -> usize {
34657 match u.to_ascii_lowercase().as_str() {
34658 "day" => 0,
34659 "hour" => 1,
34660 "minute" => 2,
34661 "second" => 3,
34662 _ => 4,
34663 }
34664 };
34665 named.sort_by_key(|(u, _, _)| unit_order(u));
34666 let mut result_args = positional;
34667 for (name, value, sep) in named {
34668 result_args.push(Expression::NamedArgument(Box::new(
34669 crate::expressions::NamedArgument {
34670 name: Identifier::new(&name),
34671 value,
34672 separator: sep,
34673 },
34674 )));
34675 }
34676 Ok(Expression::Function(Box::new(Function::new(
34677 "MAKE_INTERVAL".to_string(),
34678 result_args,
34679 ))))
34680 } else {
34681 Ok(Expression::Function(Box::new(Function::new(
34682 "MAKE_INTERVAL".to_string(),
34683 args,
34684 ))))
34685 }
34686 }
34687
34688 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
34689 "ARRAY_TO_STRING" if args.len() == 3 => {
34690 let arr = args.remove(0);
34691 let sep = args.remove(0);
34692 let null_text = args.remove(0);
34693 match target {
34694 DialectType::DuckDB => {
34695 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
34696 let _lambda_param =
34697 Expression::Identifier(crate::expressions::Identifier::new("x"));
34698 let coalesce =
34699 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
34700 original_name: None,
34701 expressions: vec![
34702 Expression::Identifier(crate::expressions::Identifier::new(
34703 "x",
34704 )),
34705 null_text,
34706 ],
34707 inferred_type: None,
34708 }));
34709 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
34710 parameters: vec![crate::expressions::Identifier::new("x")],
34711 body: coalesce,
34712 colon: false,
34713 parameter_types: vec![],
34714 }));
34715 let list_transform = Expression::Function(Box::new(Function::new(
34716 "LIST_TRANSFORM".to_string(),
34717 vec![arr, lambda],
34718 )));
34719 Ok(Expression::Function(Box::new(Function::new(
34720 "ARRAY_TO_STRING".to_string(),
34721 vec![list_transform, sep],
34722 ))))
34723 }
34724 _ => Ok(Expression::Function(Box::new(Function::new(
34725 "ARRAY_TO_STRING".to_string(),
34726 vec![arr, sep, null_text],
34727 )))),
34728 }
34729 }
34730
34731 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
34732 "LENGTH" if args.len() == 1 => {
34733 let arg = args.remove(0);
34734 match target {
34735 DialectType::DuckDB => {
34736 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
34737 let typeof_func = Expression::Function(Box::new(Function::new(
34738 "TYPEOF".to_string(),
34739 vec![arg.clone()],
34740 )));
34741 let blob_cast = Expression::Cast(Box::new(Cast {
34742 this: arg.clone(),
34743 to: DataType::VarBinary { length: None },
34744 trailing_comments: vec![],
34745 double_colon_syntax: false,
34746 format: None,
34747 default: None,
34748 inferred_type: None,
34749 }));
34750 let octet_length = Expression::Function(Box::new(Function::new(
34751 "OCTET_LENGTH".to_string(),
34752 vec![blob_cast],
34753 )));
34754 let text_cast = Expression::Cast(Box::new(Cast {
34755 this: arg,
34756 to: DataType::Text,
34757 trailing_comments: vec![],
34758 double_colon_syntax: false,
34759 format: None,
34760 default: None,
34761 inferred_type: None,
34762 }));
34763 let length_text = Expression::Function(Box::new(Function::new(
34764 "LENGTH".to_string(),
34765 vec![text_cast],
34766 )));
34767 Ok(Expression::Case(Box::new(crate::expressions::Case {
34768 operand: Some(typeof_func),
34769 whens: vec![(
34770 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
34771 octet_length,
34772 )],
34773 else_: Some(length_text),
34774 comments: Vec::new(),
34775 inferred_type: None,
34776 })))
34777 }
34778 _ => Ok(Expression::Function(Box::new(Function::new(
34779 "LENGTH".to_string(),
34780 vec![arg],
34781 )))),
34782 }
34783 }
34784
34785 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
34786 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
34787 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
34788 // The args should be [x, fraction] with the null handling stripped
34789 // For DuckDB: QUANTILE_CONT(x, fraction)
34790 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
34791 match target {
34792 DialectType::DuckDB => {
34793 // Strip down to just 2 args, rename to QUANTILE_CONT
34794 let x = args[0].clone();
34795 let frac = args[1].clone();
34796 Ok(Expression::Function(Box::new(Function::new(
34797 "QUANTILE_CONT".to_string(),
34798 vec![x, frac],
34799 ))))
34800 }
34801 _ => Ok(Expression::Function(Box::new(Function::new(
34802 "PERCENTILE_CONT".to_string(),
34803 args,
34804 )))),
34805 }
34806 }
34807
34808 // All others: pass through
34809 _ => Ok(Expression::Function(Box::new(Function {
34810 name: f.name,
34811 args,
34812 distinct: f.distinct,
34813 trailing_comments: f.trailing_comments,
34814 use_bracket_syntax: f.use_bracket_syntax,
34815 no_parens: f.no_parens,
34816 quoted: f.quoted,
34817 span: None,
34818 inferred_type: None,
34819 }))),
34820 }
34821 }
34822
34823 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
34824 /// Returns false for column references and other non-literal expressions where the type is unknown.
34825 fn can_infer_presto_type(expr: &Expression) -> bool {
34826 match expr {
34827 Expression::Literal(_) => true,
34828 Expression::Boolean(_) => true,
34829 Expression::Array(_) | Expression::ArrayFunc(_) => true,
34830 Expression::Struct(_) | Expression::StructFunc(_) => true,
34831 Expression::Function(f) => {
34832 f.name.eq_ignore_ascii_case("STRUCT")
34833 || f.name.eq_ignore_ascii_case("ROW")
34834 || f.name.eq_ignore_ascii_case("CURRENT_DATE")
34835 || f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
34836 || f.name.eq_ignore_ascii_case("NOW")
34837 }
34838 Expression::Cast(_) => true,
34839 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
34840 _ => false,
34841 }
34842 }
34843
34844 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
34845 fn infer_sql_type_for_presto(expr: &Expression) -> String {
34846 use crate::expressions::Literal;
34847 match expr {
34848 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
34849 "VARCHAR".to_string()
34850 }
34851 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
34852 let Literal::Number(n) = lit.as_ref() else {
34853 unreachable!()
34854 };
34855 if n.contains('.') {
34856 "DOUBLE".to_string()
34857 } else {
34858 "INTEGER".to_string()
34859 }
34860 }
34861 Expression::Boolean(_) => "BOOLEAN".to_string(),
34862 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
34863 "DATE".to_string()
34864 }
34865 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
34866 "TIMESTAMP".to_string()
34867 }
34868 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
34869 "TIMESTAMP".to_string()
34870 }
34871 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
34872 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
34873 Expression::Function(f) => {
34874 if f.name.eq_ignore_ascii_case("STRUCT") || f.name.eq_ignore_ascii_case("ROW") {
34875 "ROW".to_string()
34876 } else if f.name.eq_ignore_ascii_case("CURRENT_DATE") {
34877 "DATE".to_string()
34878 } else if f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
34879 || f.name.eq_ignore_ascii_case("NOW")
34880 {
34881 "TIMESTAMP".to_string()
34882 } else {
34883 "VARCHAR".to_string()
34884 }
34885 }
34886 Expression::Cast(c) => {
34887 // If already cast, use the target type
34888 Self::data_type_to_presto_string(&c.to)
34889 }
34890 _ => "VARCHAR".to_string(),
34891 }
34892 }
34893
34894 /// Convert a DataType to its Presto/Trino string representation for ROW type
34895 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
34896 use crate::expressions::DataType;
34897 match dt {
34898 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
34899 "VARCHAR".to_string()
34900 }
34901 DataType::Int { .. }
34902 | DataType::BigInt { .. }
34903 | DataType::SmallInt { .. }
34904 | DataType::TinyInt { .. } => "INTEGER".to_string(),
34905 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
34906 DataType::Boolean => "BOOLEAN".to_string(),
34907 DataType::Date => "DATE".to_string(),
34908 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
34909 DataType::Struct { fields, .. } => {
34910 let field_strs: Vec<String> = fields
34911 .iter()
34912 .map(|f| {
34913 format!(
34914 "{} {}",
34915 f.name,
34916 Self::data_type_to_presto_string(&f.data_type)
34917 )
34918 })
34919 .collect();
34920 format!("ROW({})", field_strs.join(", "))
34921 }
34922 DataType::Array { element_type, .. } => {
34923 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
34924 }
34925 DataType::Custom { name } => {
34926 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
34927 name.clone()
34928 }
34929 _ => "VARCHAR".to_string(),
34930 }
34931 }
34932
34933 /// Convert IntervalUnit to string
34934 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> &'static str {
34935 match unit {
34936 crate::expressions::IntervalUnit::Year => "YEAR",
34937 crate::expressions::IntervalUnit::Quarter => "QUARTER",
34938 crate::expressions::IntervalUnit::Month => "MONTH",
34939 crate::expressions::IntervalUnit::Week => "WEEK",
34940 crate::expressions::IntervalUnit::Day => "DAY",
34941 crate::expressions::IntervalUnit::Hour => "HOUR",
34942 crate::expressions::IntervalUnit::Minute => "MINUTE",
34943 crate::expressions::IntervalUnit::Second => "SECOND",
34944 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
34945 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
34946 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
34947 }
34948 }
34949
34950 /// Extract unit string from an expression (uppercased)
34951 fn get_unit_str_static(expr: &Expression) -> String {
34952 use crate::expressions::Literal;
34953 match expr {
34954 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
34955 Expression::Var(v) => v.this.to_ascii_uppercase(),
34956 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
34957 let Literal::String(s) = lit.as_ref() else {
34958 unreachable!()
34959 };
34960 s.to_ascii_uppercase()
34961 }
34962 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
34963 Expression::Function(f) => {
34964 let base = f.name.to_ascii_uppercase();
34965 if !f.args.is_empty() {
34966 let inner = Self::get_unit_str_static(&f.args[0]);
34967 format!("{}({})", base, inner)
34968 } else {
34969 base
34970 }
34971 }
34972 _ => "DAY".to_string(),
34973 }
34974 }
34975
34976 /// Parse unit string to IntervalUnit
34977 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
34978 match s {
34979 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
34980 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
34981 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
34982 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
34983 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
34984 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
34985 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
34986 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
34987 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
34988 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
34989 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
34990 _ => crate::expressions::IntervalUnit::Day,
34991 }
34992 }
34993
34994 /// Convert expression to simple string for interval building
34995 fn expr_to_string_static(expr: &Expression) -> String {
34996 use crate::expressions::Literal;
34997 match expr {
34998 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
34999 let Literal::Number(s) = lit.as_ref() else {
35000 unreachable!()
35001 };
35002 s.clone()
35003 }
35004 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
35005 let Literal::String(s) = lit.as_ref() else {
35006 unreachable!()
35007 };
35008 s.clone()
35009 }
35010 Expression::Identifier(id) => id.name.clone(),
35011 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
35012 _ => "1".to_string(),
35013 }
35014 }
35015
35016 /// Extract a simple string representation from a literal expression
35017 fn expr_to_string(expr: &Expression) -> String {
35018 use crate::expressions::Literal;
35019 match expr {
35020 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
35021 let Literal::Number(s) = lit.as_ref() else {
35022 unreachable!()
35023 };
35024 s.clone()
35025 }
35026 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
35027 let Literal::String(s) = lit.as_ref() else {
35028 unreachable!()
35029 };
35030 s.clone()
35031 }
35032 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
35033 Expression::Identifier(id) => id.name.clone(),
35034 _ => "1".to_string(),
35035 }
35036 }
35037
35038 /// Quote an interval value expression as a string literal if it's a number (or negated number)
35039 fn quote_interval_val(expr: &Expression) -> Expression {
35040 use crate::expressions::Literal;
35041 match expr {
35042 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
35043 let Literal::Number(n) = lit.as_ref() else {
35044 unreachable!()
35045 };
35046 Expression::Literal(Box::new(Literal::String(n.clone())))
35047 }
35048 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => expr.clone(),
35049 Expression::Neg(inner) => {
35050 if let Expression::Literal(lit) = &inner.this {
35051 if let Literal::Number(n) = lit.as_ref() {
35052 Expression::Literal(Box::new(Literal::String(format!("-{}", n))))
35053 } else {
35054 inner.this.clone()
35055 }
35056 } else {
35057 expr.clone()
35058 }
35059 }
35060 _ => expr.clone(),
35061 }
35062 }
35063
35064 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
35065 fn timestamp_string_has_timezone(ts: &str) -> bool {
35066 let trimmed = ts.trim();
35067 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
35068 if let Some(last_space) = trimmed.rfind(' ') {
35069 let suffix = &trimmed[last_space + 1..];
35070 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
35071 let rest = &suffix[1..];
35072 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
35073 return true;
35074 }
35075 }
35076 }
35077 // Check for named timezone abbreviations
35078 let ts_lower = trimmed.to_ascii_lowercase();
35079 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
35080 for abbrev in &tz_abbrevs {
35081 if ts_lower.ends_with(abbrev) {
35082 return true;
35083 }
35084 }
35085 false
35086 }
35087
35088 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
35089 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
35090 use crate::expressions::{Cast, DataType, Literal};
35091 match expr {
35092 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
35093 let Literal::Timestamp(s) = lit.as_ref() else {
35094 unreachable!()
35095 };
35096 let tz = func_name.starts_with("TIMESTAMP");
35097 Expression::Cast(Box::new(Cast {
35098 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35099 to: if tz {
35100 DataType::Timestamp {
35101 timezone: true,
35102 precision: None,
35103 }
35104 } else {
35105 DataType::Timestamp {
35106 timezone: false,
35107 precision: None,
35108 }
35109 },
35110 trailing_comments: vec![],
35111 double_colon_syntax: false,
35112 format: None,
35113 default: None,
35114 inferred_type: None,
35115 }))
35116 }
35117 other => other,
35118 }
35119 }
35120
35121 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
35122 fn maybe_cast_ts(expr: Expression) -> Expression {
35123 use crate::expressions::{Cast, DataType, Literal};
35124 match expr {
35125 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
35126 let Literal::Timestamp(s) = lit.as_ref() else {
35127 unreachable!()
35128 };
35129 Expression::Cast(Box::new(Cast {
35130 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35131 to: DataType::Timestamp {
35132 timezone: false,
35133 precision: None,
35134 },
35135 trailing_comments: vec![],
35136 double_colon_syntax: false,
35137 format: None,
35138 default: None,
35139 inferred_type: None,
35140 }))
35141 }
35142 other => other,
35143 }
35144 }
35145
35146 /// Convert DATE 'x' literal to CAST('x' AS DATE)
35147 fn date_literal_to_cast(expr: Expression) -> Expression {
35148 use crate::expressions::{Cast, DataType, Literal};
35149 match expr {
35150 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
35151 let Literal::Date(s) = lit.as_ref() else {
35152 unreachable!()
35153 };
35154 Expression::Cast(Box::new(Cast {
35155 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35156 to: DataType::Date,
35157 trailing_comments: vec![],
35158 double_colon_syntax: false,
35159 format: None,
35160 default: None,
35161 inferred_type: None,
35162 }))
35163 }
35164 other => other,
35165 }
35166 }
35167
35168 /// Ensure an expression that should be a date is CAST(... AS DATE).
35169 /// Handles both DATE literals and string literals that look like dates.
35170 fn ensure_cast_date(expr: Expression) -> Expression {
35171 use crate::expressions::{Cast, DataType, Literal};
35172 match expr {
35173 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
35174 let Literal::Date(s) = lit.as_ref() else {
35175 unreachable!()
35176 };
35177 Expression::Cast(Box::new(Cast {
35178 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35179 to: DataType::Date,
35180 trailing_comments: vec![],
35181 double_colon_syntax: false,
35182 format: None,
35183 default: None,
35184 inferred_type: None,
35185 }))
35186 }
35187 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35188 // String literal that should be a date -> CAST('s' AS DATE)
35189 Expression::Cast(Box::new(Cast {
35190 this: expr,
35191 to: DataType::Date,
35192 trailing_comments: vec![],
35193 double_colon_syntax: false,
35194 format: None,
35195 default: None,
35196 inferred_type: None,
35197 }))
35198 }
35199 // Already a CAST or other expression -> leave as-is
35200 other => other,
35201 }
35202 }
35203
35204 /// Force CAST(expr AS DATE) for any expression (not just literals)
35205 /// Skips if the expression is already a CAST to DATE
35206 fn force_cast_date(expr: Expression) -> Expression {
35207 use crate::expressions::{Cast, DataType};
35208 // If it's already a CAST to DATE, don't double-wrap
35209 if let Expression::Cast(ref c) = expr {
35210 if matches!(c.to, DataType::Date) {
35211 return expr;
35212 }
35213 }
35214 Expression::Cast(Box::new(Cast {
35215 this: expr,
35216 to: DataType::Date,
35217 trailing_comments: vec![],
35218 double_colon_syntax: false,
35219 format: None,
35220 default: None,
35221 inferred_type: None,
35222 }))
35223 }
35224
35225 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
35226 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
35227 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
35228 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
35229
35230 fn ensure_to_date_preserved(expr: Expression) -> Expression {
35231 use crate::expressions::{Function, Literal};
35232 if matches!(expr, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)))
35233 {
35234 Expression::Function(Box::new(Function::new(
35235 Self::PRESERVED_TO_DATE.to_string(),
35236 vec![expr],
35237 )))
35238 } else {
35239 expr
35240 }
35241 }
35242
35243 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
35244 fn try_cast_date(expr: Expression) -> Expression {
35245 use crate::expressions::{Cast, DataType};
35246 Expression::TryCast(Box::new(Cast {
35247 this: expr,
35248 to: DataType::Date,
35249 trailing_comments: vec![],
35250 double_colon_syntax: false,
35251 format: None,
35252 default: None,
35253 inferred_type: None,
35254 }))
35255 }
35256
35257 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
35258 fn double_cast_timestamp_date(expr: Expression) -> Expression {
35259 use crate::expressions::{Cast, DataType};
35260 let inner = Expression::Cast(Box::new(Cast {
35261 this: expr,
35262 to: DataType::Timestamp {
35263 timezone: false,
35264 precision: None,
35265 },
35266 trailing_comments: vec![],
35267 double_colon_syntax: false,
35268 format: None,
35269 default: None,
35270 inferred_type: None,
35271 }));
35272 Expression::Cast(Box::new(Cast {
35273 this: inner,
35274 to: DataType::Date,
35275 trailing_comments: vec![],
35276 double_colon_syntax: false,
35277 format: None,
35278 default: None,
35279 inferred_type: None,
35280 }))
35281 }
35282
35283 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
35284 fn double_cast_datetime_date(expr: Expression) -> Expression {
35285 use crate::expressions::{Cast, DataType};
35286 let inner = Expression::Cast(Box::new(Cast {
35287 this: expr,
35288 to: DataType::Custom {
35289 name: "DATETIME".to_string(),
35290 },
35291 trailing_comments: vec![],
35292 double_colon_syntax: false,
35293 format: None,
35294 default: None,
35295 inferred_type: None,
35296 }));
35297 Expression::Cast(Box::new(Cast {
35298 this: inner,
35299 to: DataType::Date,
35300 trailing_comments: vec![],
35301 double_colon_syntax: false,
35302 format: None,
35303 default: None,
35304 inferred_type: None,
35305 }))
35306 }
35307
35308 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
35309 fn double_cast_datetime2_date(expr: Expression) -> Expression {
35310 use crate::expressions::{Cast, DataType};
35311 let inner = Expression::Cast(Box::new(Cast {
35312 this: expr,
35313 to: DataType::Custom {
35314 name: "DATETIME2".to_string(),
35315 },
35316 trailing_comments: vec![],
35317 double_colon_syntax: false,
35318 format: None,
35319 default: None,
35320 inferred_type: None,
35321 }));
35322 Expression::Cast(Box::new(Cast {
35323 this: inner,
35324 to: DataType::Date,
35325 trailing_comments: vec![],
35326 double_colon_syntax: false,
35327 format: None,
35328 default: None,
35329 inferred_type: None,
35330 }))
35331 }
35332
35333 /// Convert Hive/Java-style date format strings to C-style (strftime) format
35334 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
35335 fn hive_format_to_c_format(fmt: &str) -> String {
35336 let mut result = String::new();
35337 let chars: Vec<char> = fmt.chars().collect();
35338 let mut i = 0;
35339 while i < chars.len() {
35340 match chars[i] {
35341 'y' => {
35342 let mut count = 0;
35343 while i < chars.len() && chars[i] == 'y' {
35344 count += 1;
35345 i += 1;
35346 }
35347 if count >= 4 {
35348 result.push_str("%Y");
35349 } else if count == 2 {
35350 result.push_str("%y");
35351 } else {
35352 result.push_str("%Y");
35353 }
35354 }
35355 'M' => {
35356 let mut count = 0;
35357 while i < chars.len() && chars[i] == 'M' {
35358 count += 1;
35359 i += 1;
35360 }
35361 if count >= 3 {
35362 result.push_str("%b");
35363 } else if count == 2 {
35364 result.push_str("%m");
35365 } else {
35366 result.push_str("%m");
35367 }
35368 }
35369 'd' => {
35370 let mut _count = 0;
35371 while i < chars.len() && chars[i] == 'd' {
35372 _count += 1;
35373 i += 1;
35374 }
35375 result.push_str("%d");
35376 }
35377 'H' => {
35378 let mut _count = 0;
35379 while i < chars.len() && chars[i] == 'H' {
35380 _count += 1;
35381 i += 1;
35382 }
35383 result.push_str("%H");
35384 }
35385 'h' => {
35386 let mut _count = 0;
35387 while i < chars.len() && chars[i] == 'h' {
35388 _count += 1;
35389 i += 1;
35390 }
35391 result.push_str("%I");
35392 }
35393 'm' => {
35394 let mut _count = 0;
35395 while i < chars.len() && chars[i] == 'm' {
35396 _count += 1;
35397 i += 1;
35398 }
35399 result.push_str("%M");
35400 }
35401 's' => {
35402 let mut _count = 0;
35403 while i < chars.len() && chars[i] == 's' {
35404 _count += 1;
35405 i += 1;
35406 }
35407 result.push_str("%S");
35408 }
35409 'S' => {
35410 // Fractional seconds - skip
35411 while i < chars.len() && chars[i] == 'S' {
35412 i += 1;
35413 }
35414 result.push_str("%f");
35415 }
35416 'a' => {
35417 // AM/PM
35418 while i < chars.len() && chars[i] == 'a' {
35419 i += 1;
35420 }
35421 result.push_str("%p");
35422 }
35423 'E' => {
35424 let mut count = 0;
35425 while i < chars.len() && chars[i] == 'E' {
35426 count += 1;
35427 i += 1;
35428 }
35429 if count >= 4 {
35430 result.push_str("%A");
35431 } else {
35432 result.push_str("%a");
35433 }
35434 }
35435 '\'' => {
35436 // Quoted literal text - pass through the quotes and content
35437 result.push('\'');
35438 i += 1;
35439 while i < chars.len() && chars[i] != '\'' {
35440 result.push(chars[i]);
35441 i += 1;
35442 }
35443 if i < chars.len() {
35444 result.push('\'');
35445 i += 1;
35446 }
35447 }
35448 c => {
35449 result.push(c);
35450 i += 1;
35451 }
35452 }
35453 }
35454 result
35455 }
35456
35457 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
35458 fn hive_format_to_presto_format(fmt: &str) -> String {
35459 let c_fmt = Self::hive_format_to_c_format(fmt);
35460 // Presto uses %T for HH:MM:SS
35461 c_fmt.replace("%H:%M:%S", "%T")
35462 }
35463
35464 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
35465 fn ensure_cast_timestamp(expr: Expression) -> Expression {
35466 use crate::expressions::{Cast, DataType, Literal};
35467 match expr {
35468 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
35469 let Literal::Timestamp(s) = lit.as_ref() else {
35470 unreachable!()
35471 };
35472 Expression::Cast(Box::new(Cast {
35473 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35474 to: DataType::Timestamp {
35475 timezone: false,
35476 precision: None,
35477 },
35478 trailing_comments: vec![],
35479 double_colon_syntax: false,
35480 format: None,
35481 default: None,
35482 inferred_type: None,
35483 }))
35484 }
35485 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35486 Expression::Cast(Box::new(Cast {
35487 this: expr,
35488 to: DataType::Timestamp {
35489 timezone: false,
35490 precision: None,
35491 },
35492 trailing_comments: vec![],
35493 double_colon_syntax: false,
35494 format: None,
35495 default: None,
35496 inferred_type: None,
35497 }))
35498 }
35499 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
35500 let Literal::Datetime(s) = lit.as_ref() else {
35501 unreachable!()
35502 };
35503 Expression::Cast(Box::new(Cast {
35504 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35505 to: DataType::Timestamp {
35506 timezone: false,
35507 precision: None,
35508 },
35509 trailing_comments: vec![],
35510 double_colon_syntax: false,
35511 format: None,
35512 default: None,
35513 inferred_type: None,
35514 }))
35515 }
35516 other => other,
35517 }
35518 }
35519
35520 /// Force CAST to TIMESTAMP for any expression (not just literals)
35521 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
35522 fn force_cast_timestamp(expr: Expression) -> Expression {
35523 use crate::expressions::{Cast, DataType};
35524 // Don't double-wrap if already a CAST to TIMESTAMP
35525 if let Expression::Cast(ref c) = expr {
35526 if matches!(c.to, DataType::Timestamp { .. }) {
35527 return expr;
35528 }
35529 }
35530 Expression::Cast(Box::new(Cast {
35531 this: expr,
35532 to: DataType::Timestamp {
35533 timezone: false,
35534 precision: None,
35535 },
35536 trailing_comments: vec![],
35537 double_colon_syntax: false,
35538 format: None,
35539 default: None,
35540 inferred_type: None,
35541 }))
35542 }
35543
35544 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
35545 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
35546 use crate::expressions::{Cast, DataType, Literal};
35547 match expr {
35548 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
35549 let Literal::Timestamp(s) = lit.as_ref() else {
35550 unreachable!()
35551 };
35552 Expression::Cast(Box::new(Cast {
35553 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35554 to: DataType::Timestamp {
35555 timezone: true,
35556 precision: None,
35557 },
35558 trailing_comments: vec![],
35559 double_colon_syntax: false,
35560 format: None,
35561 default: None,
35562 inferred_type: None,
35563 }))
35564 }
35565 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35566 Expression::Cast(Box::new(Cast {
35567 this: expr,
35568 to: DataType::Timestamp {
35569 timezone: true,
35570 precision: None,
35571 },
35572 trailing_comments: vec![],
35573 double_colon_syntax: false,
35574 format: None,
35575 default: None,
35576 inferred_type: None,
35577 }))
35578 }
35579 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
35580 let Literal::Datetime(s) = lit.as_ref() else {
35581 unreachable!()
35582 };
35583 Expression::Cast(Box::new(Cast {
35584 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35585 to: DataType::Timestamp {
35586 timezone: true,
35587 precision: None,
35588 },
35589 trailing_comments: vec![],
35590 double_colon_syntax: false,
35591 format: None,
35592 default: None,
35593 inferred_type: None,
35594 }))
35595 }
35596 other => other,
35597 }
35598 }
35599
35600 /// Ensure expression is CAST to DATETIME (for BigQuery)
35601 fn ensure_cast_datetime(expr: Expression) -> Expression {
35602 use crate::expressions::{Cast, DataType, Literal};
35603 match expr {
35604 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35605 Expression::Cast(Box::new(Cast {
35606 this: expr,
35607 to: DataType::Custom {
35608 name: "DATETIME".to_string(),
35609 },
35610 trailing_comments: vec![],
35611 double_colon_syntax: false,
35612 format: None,
35613 default: None,
35614 inferred_type: None,
35615 }))
35616 }
35617 other => other,
35618 }
35619 }
35620
35621 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
35622 fn force_cast_datetime(expr: Expression) -> Expression {
35623 use crate::expressions::{Cast, DataType};
35624 if let Expression::Cast(ref c) = expr {
35625 if let DataType::Custom { ref name } = c.to {
35626 if name.eq_ignore_ascii_case("DATETIME") {
35627 return expr;
35628 }
35629 }
35630 }
35631 Expression::Cast(Box::new(Cast {
35632 this: expr,
35633 to: DataType::Custom {
35634 name: "DATETIME".to_string(),
35635 },
35636 trailing_comments: vec![],
35637 double_colon_syntax: false,
35638 format: None,
35639 default: None,
35640 inferred_type: None,
35641 }))
35642 }
35643
35644 /// Ensure expression is CAST to DATETIME2 (for TSQL)
35645 fn ensure_cast_datetime2(expr: Expression) -> Expression {
35646 use crate::expressions::{Cast, DataType, Literal};
35647 match expr {
35648 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35649 Expression::Cast(Box::new(Cast {
35650 this: expr,
35651 to: DataType::Custom {
35652 name: "DATETIME2".to_string(),
35653 },
35654 trailing_comments: vec![],
35655 double_colon_syntax: false,
35656 format: None,
35657 default: None,
35658 inferred_type: None,
35659 }))
35660 }
35661 other => other,
35662 }
35663 }
35664
35665 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
35666 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
35667 use crate::expressions::{Cast, DataType, Literal};
35668 match expr {
35669 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
35670 let Literal::Timestamp(s) = lit.as_ref() else {
35671 unreachable!()
35672 };
35673 Expression::Cast(Box::new(Cast {
35674 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35675 to: DataType::Timestamp {
35676 timezone: true,
35677 precision: None,
35678 },
35679 trailing_comments: vec![],
35680 double_colon_syntax: false,
35681 format: None,
35682 default: None,
35683 inferred_type: None,
35684 }))
35685 }
35686 other => other,
35687 }
35688 }
35689
35690 /// Convert BigQuery format string to Snowflake format string
35691 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
35692 use crate::expressions::Literal;
35693 if let Expression::Literal(lit) = format_expr {
35694 if let Literal::String(s) = lit.as_ref() {
35695 let sf = s
35696 .replace("%Y", "yyyy")
35697 .replace("%m", "mm")
35698 .replace("%d", "DD")
35699 .replace("%H", "HH24")
35700 .replace("%M", "MI")
35701 .replace("%S", "SS")
35702 .replace("%b", "mon")
35703 .replace("%B", "Month")
35704 .replace("%e", "FMDD");
35705 Expression::Literal(Box::new(Literal::String(sf)))
35706 } else {
35707 format_expr.clone()
35708 }
35709 } else {
35710 format_expr.clone()
35711 }
35712 }
35713
35714 /// Convert BigQuery format string to DuckDB format string
35715 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
35716 use crate::expressions::Literal;
35717 if let Expression::Literal(lit) = format_expr {
35718 if let Literal::String(s) = lit.as_ref() {
35719 let duck = s
35720 .replace("%T", "%H:%M:%S")
35721 .replace("%F", "%Y-%m-%d")
35722 .replace("%D", "%m/%d/%y")
35723 .replace("%x", "%m/%d/%y")
35724 .replace("%c", "%a %b %-d %H:%M:%S %Y")
35725 .replace("%e", "%-d")
35726 .replace("%E6S", "%S.%f");
35727 Expression::Literal(Box::new(Literal::String(duck)))
35728 } else {
35729 format_expr.clone()
35730 }
35731 } else {
35732 format_expr.clone()
35733 }
35734 }
35735
35736 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
35737 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
35738 use crate::expressions::Literal;
35739 if let Expression::Literal(lit) = format_expr {
35740 if let Literal::String(s) = lit.as_ref() {
35741 // Replace format elements from longest to shortest to avoid partial matches
35742 let result = s
35743 .replace("YYYYMMDD", "%Y%m%d")
35744 .replace("YYYY", "%Y")
35745 .replace("YY", "%y")
35746 .replace("MONTH", "%B")
35747 .replace("MON", "%b")
35748 .replace("MM", "%m")
35749 .replace("DD", "%d")
35750 .replace("HH24", "%H")
35751 .replace("HH12", "%I")
35752 .replace("HH", "%I")
35753 .replace("MI", "%M")
35754 .replace("SSTZH", "%S%z")
35755 .replace("SS", "%S")
35756 .replace("TZH", "%z");
35757 Expression::Literal(Box::new(Literal::String(result)))
35758 } else {
35759 format_expr.clone()
35760 }
35761 } else {
35762 format_expr.clone()
35763 }
35764 }
35765
35766 /// Normalize BigQuery format strings for BQ->BQ output
35767 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
35768 use crate::expressions::Literal;
35769 if let Expression::Literal(lit) = format_expr {
35770 if let Literal::String(s) = lit.as_ref() {
35771 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
35772 Expression::Literal(Box::new(Literal::String(norm)))
35773 } else {
35774 format_expr.clone()
35775 }
35776 } else {
35777 format_expr.clone()
35778 }
35779 }
35780}
35781
35782#[cfg(test)]
35783mod tests {
35784 use super::*;
35785
35786 #[test]
35787 fn test_dialect_type_from_str() {
35788 assert_eq!(
35789 "postgres".parse::<DialectType>().unwrap(),
35790 DialectType::PostgreSQL
35791 );
35792 assert_eq!(
35793 "postgresql".parse::<DialectType>().unwrap(),
35794 DialectType::PostgreSQL
35795 );
35796 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
35797 assert_eq!(
35798 "bigquery".parse::<DialectType>().unwrap(),
35799 DialectType::BigQuery
35800 );
35801 }
35802
35803 #[test]
35804 fn test_basic_transpile() {
35805 let dialect = Dialect::get(DialectType::Generic);
35806 let result = dialect
35807 .transpile("SELECT 1", DialectType::PostgreSQL)
35808 .unwrap();
35809 assert_eq!(result.len(), 1);
35810 assert_eq!(result[0], "SELECT 1");
35811 }
35812
35813 #[test]
35814 fn test_function_transformation_mysql() {
35815 // NVL should be transformed to IFNULL in MySQL
35816 let dialect = Dialect::get(DialectType::Generic);
35817 let result = dialect
35818 .transpile("SELECT NVL(a, b)", DialectType::MySQL)
35819 .unwrap();
35820 assert_eq!(result[0], "SELECT IFNULL(a, b)");
35821 }
35822
35823 #[test]
35824 fn test_get_path_duckdb() {
35825 // Test: step by step
35826 let snowflake = Dialect::get(DialectType::Snowflake);
35827
35828 // Step 1: Parse and check what Snowflake produces as intermediate
35829 let result_sf_sf = snowflake
35830 .transpile(
35831 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
35832 DialectType::Snowflake,
35833 )
35834 .unwrap();
35835 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
35836
35837 // Step 2: DuckDB target
35838 let result_sf_dk = snowflake
35839 .transpile(
35840 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
35841 DialectType::DuckDB,
35842 )
35843 .unwrap();
35844 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
35845
35846 // Step 3: GET_PATH directly
35847 let result_gp = snowflake
35848 .transpile(
35849 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
35850 DialectType::DuckDB,
35851 )
35852 .unwrap();
35853 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
35854 }
35855
35856 #[test]
35857 fn test_function_transformation_postgres() {
35858 // IFNULL should be transformed to COALESCE in PostgreSQL
35859 let dialect = Dialect::get(DialectType::Generic);
35860 let result = dialect
35861 .transpile("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
35862 .unwrap();
35863 assert_eq!(result[0], "SELECT COALESCE(a, b)");
35864
35865 // NVL should also be transformed to COALESCE
35866 let result = dialect
35867 .transpile("SELECT NVL(a, b)", DialectType::PostgreSQL)
35868 .unwrap();
35869 assert_eq!(result[0], "SELECT COALESCE(a, b)");
35870 }
35871
35872 #[test]
35873 fn test_hive_cast_to_trycast() {
35874 // Hive CAST should become TRY_CAST for targets that support it
35875 let hive = Dialect::get(DialectType::Hive);
35876 let result = hive
35877 .transpile("CAST(1 AS INT)", DialectType::DuckDB)
35878 .unwrap();
35879 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
35880
35881 let result = hive
35882 .transpile("CAST(1 AS INT)", DialectType::Presto)
35883 .unwrap();
35884 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
35885 }
35886
35887 #[test]
35888 fn test_hive_array_identity() {
35889 // Hive ARRAY<DATE> should preserve angle bracket syntax
35890 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
35891 let hive = Dialect::get(DialectType::Hive);
35892
35893 // Test via transpile (this works)
35894 let result = hive.transpile(sql, DialectType::Hive).unwrap();
35895 eprintln!("Hive ARRAY via transpile: {}", result[0]);
35896 assert!(
35897 result[0].contains("ARRAY<DATE>"),
35898 "transpile: Expected ARRAY<DATE>, got: {}",
35899 result[0]
35900 );
35901
35902 // Test via parse -> transform -> generate (identity test path)
35903 let ast = hive.parse(sql).unwrap();
35904 let transformed = hive.transform(ast[0].clone()).unwrap();
35905 let output = hive.generate(&transformed).unwrap();
35906 eprintln!("Hive ARRAY via identity path: {}", output);
35907 assert!(
35908 output.contains("ARRAY<DATE>"),
35909 "identity path: Expected ARRAY<DATE>, got: {}",
35910 output
35911 );
35912 }
35913
35914 #[test]
35915 fn test_starrocks_delete_between_expansion() {
35916 // StarRocks doesn't support BETWEEN in DELETE statements
35917 let dialect = Dialect::get(DialectType::Generic);
35918
35919 // BETWEEN should be expanded to >= AND <= in DELETE
35920 let result = dialect
35921 .transpile(
35922 "DELETE FROM t WHERE a BETWEEN b AND c",
35923 DialectType::StarRocks,
35924 )
35925 .unwrap();
35926 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
35927
35928 // NOT BETWEEN should be expanded to < OR > in DELETE
35929 let result = dialect
35930 .transpile(
35931 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
35932 DialectType::StarRocks,
35933 )
35934 .unwrap();
35935 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
35936
35937 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
35938 let result = dialect
35939 .transpile(
35940 "SELECT * FROM t WHERE a BETWEEN b AND c",
35941 DialectType::StarRocks,
35942 )
35943 .unwrap();
35944 assert!(
35945 result[0].contains("BETWEEN"),
35946 "BETWEEN should be preserved in SELECT"
35947 );
35948 }
35949
35950 #[test]
35951 fn test_snowflake_ltrim_rtrim_parse() {
35952 let sf = Dialect::get(DialectType::Snowflake);
35953 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
35954 let result = sf.transpile(sql, DialectType::DuckDB);
35955 match &result {
35956 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
35957 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
35958 }
35959 assert!(
35960 result.is_ok(),
35961 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
35962 result.err()
35963 );
35964 }
35965
35966 #[test]
35967 fn test_duckdb_count_if_parse() {
35968 let duck = Dialect::get(DialectType::DuckDB);
35969 let sql = "COUNT_IF(x)";
35970 let result = duck.transpile(sql, DialectType::DuckDB);
35971 match &result {
35972 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
35973 Err(e) => eprintln!("COUNT_IF error: {}", e),
35974 }
35975 assert!(
35976 result.is_ok(),
35977 "Expected successful parse of COUNT_IF(x), got error: {:?}",
35978 result.err()
35979 );
35980 }
35981
35982 #[test]
35983 fn test_tsql_cast_tinyint_parse() {
35984 let tsql = Dialect::get(DialectType::TSQL);
35985 let sql = "CAST(X AS TINYINT)";
35986 let result = tsql.transpile(sql, DialectType::DuckDB);
35987 match &result {
35988 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
35989 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
35990 }
35991 assert!(
35992 result.is_ok(),
35993 "Expected successful transpile, got error: {:?}",
35994 result.err()
35995 );
35996 }
35997
35998 #[test]
35999 fn test_pg_hash_bitwise_xor() {
36000 let dialect = Dialect::get(DialectType::PostgreSQL);
36001 let result = dialect
36002 .transpile("x # y", DialectType::PostgreSQL)
36003 .unwrap();
36004 assert_eq!(result[0], "x # y");
36005 }
36006
36007 #[test]
36008 fn test_pg_array_to_duckdb() {
36009 let dialect = Dialect::get(DialectType::PostgreSQL);
36010 let result = dialect
36011 .transpile("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
36012 .unwrap();
36013 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
36014 }
36015
36016 #[test]
36017 fn test_array_remove_bigquery() {
36018 let dialect = Dialect::get(DialectType::Generic);
36019 let result = dialect
36020 .transpile("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
36021 .unwrap();
36022 assert_eq!(
36023 result[0],
36024 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
36025 );
36026 }
36027
36028 #[test]
36029 fn test_map_clickhouse_case() {
36030 let dialect = Dialect::get(DialectType::Generic);
36031 let parsed = dialect
36032 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
36033 .unwrap();
36034 eprintln!("MAP parsed: {:?}", parsed);
36035 let result = dialect
36036 .transpile(
36037 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
36038 DialectType::ClickHouse,
36039 )
36040 .unwrap();
36041 eprintln!("MAP result: {}", result[0]);
36042 }
36043
36044 #[test]
36045 fn test_generate_date_array_presto() {
36046 let dialect = Dialect::get(DialectType::Generic);
36047 let result = dialect.transpile(
36048 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
36049 DialectType::Presto,
36050 ).unwrap();
36051 eprintln!("GDA -> Presto: {}", result[0]);
36052 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
36053 }
36054
36055 #[test]
36056 fn test_generate_date_array_postgres() {
36057 let dialect = Dialect::get(DialectType::Generic);
36058 let result = dialect.transpile(
36059 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
36060 DialectType::PostgreSQL,
36061 ).unwrap();
36062 eprintln!("GDA -> PostgreSQL: {}", result[0]);
36063 }
36064
36065 #[test]
36066 fn test_generate_date_array_snowflake() {
36067 std::thread::Builder::new()
36068 .stack_size(16 * 1024 * 1024)
36069 .spawn(|| {
36070 let dialect = Dialect::get(DialectType::Generic);
36071 let result = dialect.transpile(
36072 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
36073 DialectType::Snowflake,
36074 ).unwrap();
36075 eprintln!("GDA -> Snowflake: {}", result[0]);
36076 })
36077 .unwrap()
36078 .join()
36079 .unwrap();
36080 }
36081
36082 #[test]
36083 fn test_array_length_generate_date_array_snowflake() {
36084 let dialect = Dialect::get(DialectType::Generic);
36085 let result = dialect.transpile(
36086 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
36087 DialectType::Snowflake,
36088 ).unwrap();
36089 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
36090 }
36091
36092 #[test]
36093 fn test_generate_date_array_mysql() {
36094 let dialect = Dialect::get(DialectType::Generic);
36095 let result = dialect.transpile(
36096 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
36097 DialectType::MySQL,
36098 ).unwrap();
36099 eprintln!("GDA -> MySQL: {}", result[0]);
36100 }
36101
36102 #[test]
36103 fn test_generate_date_array_redshift() {
36104 let dialect = Dialect::get(DialectType::Generic);
36105 let result = dialect.transpile(
36106 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
36107 DialectType::Redshift,
36108 ).unwrap();
36109 eprintln!("GDA -> Redshift: {}", result[0]);
36110 }
36111
36112 #[test]
36113 fn test_generate_date_array_tsql() {
36114 let dialect = Dialect::get(DialectType::Generic);
36115 let result = dialect.transpile(
36116 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
36117 DialectType::TSQL,
36118 ).unwrap();
36119 eprintln!("GDA -> TSQL: {}", result[0]);
36120 }
36121
36122 #[test]
36123 fn test_struct_colon_syntax() {
36124 let dialect = Dialect::get(DialectType::Generic);
36125 // Test without colon first
36126 let result = dialect.transpile(
36127 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
36128 DialectType::ClickHouse,
36129 );
36130 match result {
36131 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
36132 Err(e) => eprintln!("STRUCT no colon error: {}", e),
36133 }
36134 // Now test with colon
36135 let result = dialect.transpile(
36136 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
36137 DialectType::ClickHouse,
36138 );
36139 match result {
36140 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
36141 Err(e) => eprintln!("STRUCT colon error: {}", e),
36142 }
36143 }
36144
36145 #[test]
36146 fn test_generate_date_array_cte_wrapped_mysql() {
36147 let dialect = Dialect::get(DialectType::Generic);
36148 let result = dialect.transpile(
36149 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
36150 DialectType::MySQL,
36151 ).unwrap();
36152 eprintln!("GDA CTE -> MySQL: {}", result[0]);
36153 }
36154
36155 #[test]
36156 fn test_generate_date_array_cte_wrapped_tsql() {
36157 let dialect = Dialect::get(DialectType::Generic);
36158 let result = dialect.transpile(
36159 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
36160 DialectType::TSQL,
36161 ).unwrap();
36162 eprintln!("GDA CTE -> TSQL: {}", result[0]);
36163 }
36164
36165 #[test]
36166 fn test_decode_literal_no_null_check() {
36167 // Oracle DECODE with all literals should produce simple equality, no IS NULL
36168 let dialect = Dialect::get(DialectType::Oracle);
36169 let result = dialect
36170 .transpile("SELECT decode(1,2,3,4)", DialectType::DuckDB)
36171 .unwrap();
36172 assert_eq!(
36173 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
36174 "Literal DECODE should not have IS NULL checks"
36175 );
36176 }
36177
36178 #[test]
36179 fn test_decode_column_vs_literal_no_null_check() {
36180 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
36181 let dialect = Dialect::get(DialectType::Oracle);
36182 let result = dialect
36183 .transpile("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
36184 .unwrap();
36185 assert_eq!(
36186 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
36187 "Column vs literal DECODE should not have IS NULL checks"
36188 );
36189 }
36190
36191 #[test]
36192 fn test_decode_column_vs_column_keeps_null_check() {
36193 // Oracle DECODE with column vs column should keep null-safe comparison
36194 let dialect = Dialect::get(DialectType::Oracle);
36195 let result = dialect
36196 .transpile("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
36197 .unwrap();
36198 assert!(
36199 result[0].contains("IS NULL"),
36200 "Column vs column DECODE should have IS NULL checks, got: {}",
36201 result[0]
36202 );
36203 }
36204
36205 #[test]
36206 fn test_decode_null_search() {
36207 // Oracle DECODE with NULL search should use IS NULL
36208 let dialect = Dialect::get(DialectType::Oracle);
36209 let result = dialect
36210 .transpile("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
36211 .unwrap();
36212 assert_eq!(
36213 result[0],
36214 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
36215 );
36216 }
36217
36218 // =========================================================================
36219 // REGEXP function transpilation tests
36220 // =========================================================================
36221
36222 #[test]
36223 fn test_regexp_substr_snowflake_to_duckdb_2arg() {
36224 let dialect = Dialect::get(DialectType::Snowflake);
36225 let result = dialect
36226 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern')", DialectType::DuckDB)
36227 .unwrap();
36228 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
36229 }
36230
36231 #[test]
36232 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos1() {
36233 let dialect = Dialect::get(DialectType::Snowflake);
36234 let result = dialect
36235 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 1)", DialectType::DuckDB)
36236 .unwrap();
36237 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
36238 }
36239
36240 #[test]
36241 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos_gt1() {
36242 let dialect = Dialect::get(DialectType::Snowflake);
36243 let result = dialect
36244 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 3)", DialectType::DuckDB)
36245 .unwrap();
36246 assert_eq!(
36247 result[0],
36248 "SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(s, 3), ''), 'pattern')"
36249 );
36250 }
36251
36252 #[test]
36253 fn test_regexp_substr_snowflake_to_duckdb_4arg_occ_gt1() {
36254 let dialect = Dialect::get(DialectType::Snowflake);
36255 let result = dialect
36256 .transpile(
36257 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 3)",
36258 DialectType::DuckDB,
36259 )
36260 .unwrap();
36261 assert_eq!(
36262 result[0],
36263 "SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, 'pattern'), 3)"
36264 );
36265 }
36266
36267 #[test]
36268 fn test_regexp_substr_snowflake_to_duckdb_5arg_e_flag() {
36269 let dialect = Dialect::get(DialectType::Snowflake);
36270 let result = dialect
36271 .transpile(
36272 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')",
36273 DialectType::DuckDB,
36274 )
36275 .unwrap();
36276 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
36277 }
36278
36279 #[test]
36280 fn test_regexp_substr_snowflake_to_duckdb_6arg_group0() {
36281 let dialect = Dialect::get(DialectType::Snowflake);
36282 let result = dialect
36283 .transpile(
36284 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
36285 DialectType::DuckDB,
36286 )
36287 .unwrap();
36288 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
36289 }
36290
36291 #[test]
36292 fn test_regexp_substr_snowflake_identity_strip_group0() {
36293 let dialect = Dialect::get(DialectType::Snowflake);
36294 let result = dialect
36295 .transpile(
36296 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
36297 DialectType::Snowflake,
36298 )
36299 .unwrap();
36300 assert_eq!(result[0], "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')");
36301 }
36302
36303 #[test]
36304 fn test_regexp_substr_all_snowflake_to_duckdb_2arg() {
36305 let dialect = Dialect::get(DialectType::Snowflake);
36306 let result = dialect
36307 .transpile(
36308 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')",
36309 DialectType::DuckDB,
36310 )
36311 .unwrap();
36312 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
36313 }
36314
36315 #[test]
36316 fn test_regexp_substr_all_snowflake_to_duckdb_3arg_pos_gt1() {
36317 let dialect = Dialect::get(DialectType::Snowflake);
36318 let result = dialect
36319 .transpile(
36320 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 3)",
36321 DialectType::DuckDB,
36322 )
36323 .unwrap();
36324 assert_eq!(
36325 result[0],
36326 "SELECT REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')"
36327 );
36328 }
36329
36330 #[test]
36331 fn test_regexp_substr_all_snowflake_to_duckdb_5arg_e_flag() {
36332 let dialect = Dialect::get(DialectType::Snowflake);
36333 let result = dialect
36334 .transpile(
36335 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')",
36336 DialectType::DuckDB,
36337 )
36338 .unwrap();
36339 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
36340 }
36341
36342 #[test]
36343 fn test_regexp_substr_all_snowflake_to_duckdb_6arg_group0() {
36344 let dialect = Dialect::get(DialectType::Snowflake);
36345 let result = dialect
36346 .transpile(
36347 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
36348 DialectType::DuckDB,
36349 )
36350 .unwrap();
36351 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
36352 }
36353
36354 #[test]
36355 fn test_regexp_substr_all_snowflake_identity_strip_group0() {
36356 let dialect = Dialect::get(DialectType::Snowflake);
36357 let result = dialect
36358 .transpile(
36359 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
36360 DialectType::Snowflake,
36361 )
36362 .unwrap();
36363 assert_eq!(
36364 result[0],
36365 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')"
36366 );
36367 }
36368
36369 #[test]
36370 fn test_regexp_count_snowflake_to_duckdb_2arg() {
36371 let dialect = Dialect::get(DialectType::Snowflake);
36372 let result = dialect
36373 .transpile("SELECT REGEXP_COUNT(s, 'pattern')", DialectType::DuckDB)
36374 .unwrap();
36375 assert_eq!(
36376 result[0],
36377 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, 'pattern')) END"
36378 );
36379 }
36380
36381 #[test]
36382 fn test_regexp_count_snowflake_to_duckdb_3arg() {
36383 let dialect = Dialect::get(DialectType::Snowflake);
36384 let result = dialect
36385 .transpile("SELECT REGEXP_COUNT(s, 'pattern', 3)", DialectType::DuckDB)
36386 .unwrap();
36387 assert_eq!(
36388 result[0],
36389 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')) END"
36390 );
36391 }
36392
36393 #[test]
36394 fn test_regexp_count_snowflake_to_duckdb_4arg_flags() {
36395 let dialect = Dialect::get(DialectType::Snowflake);
36396 let result = dialect
36397 .transpile(
36398 "SELECT REGEXP_COUNT(s, 'pattern', 1, 'i')",
36399 DialectType::DuckDB,
36400 )
36401 .unwrap();
36402 assert_eq!(
36403 result[0],
36404 "SELECT CASE WHEN '(?i)' || 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 1), '(?i)' || 'pattern')) END"
36405 );
36406 }
36407
36408 #[test]
36409 fn test_regexp_count_snowflake_to_duckdb_4arg_flags_literal_string() {
36410 let dialect = Dialect::get(DialectType::Snowflake);
36411 let result = dialect
36412 .transpile(
36413 "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')",
36414 DialectType::DuckDB,
36415 )
36416 .unwrap();
36417 assert_eq!(
36418 result[0],
36419 "SELECT CASE WHEN '(?im)' || 'L' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('Hello World', 1), '(?im)' || 'L')) END"
36420 );
36421 }
36422
36423 #[test]
36424 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos1_occ1() {
36425 let dialect = Dialect::get(DialectType::Snowflake);
36426 let result = dialect
36427 .transpile(
36428 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 1, 1)",
36429 DialectType::DuckDB,
36430 )
36431 .unwrap();
36432 assert_eq!(result[0], "SELECT REGEXP_REPLACE(s, 'pattern', 'repl')");
36433 }
36434
36435 #[test]
36436 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ0() {
36437 let dialect = Dialect::get(DialectType::Snowflake);
36438 let result = dialect
36439 .transpile(
36440 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 0)",
36441 DialectType::DuckDB,
36442 )
36443 .unwrap();
36444 assert_eq!(
36445 result[0],
36446 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl', 'g')"
36447 );
36448 }
36449
36450 #[test]
36451 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ1() {
36452 let dialect = Dialect::get(DialectType::Snowflake);
36453 let result = dialect
36454 .transpile(
36455 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 1)",
36456 DialectType::DuckDB,
36457 )
36458 .unwrap();
36459 assert_eq!(
36460 result[0],
36461 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl')"
36462 );
36463 }
36464
36465 #[test]
36466 fn test_rlike_snowflake_to_duckdb_2arg() {
36467 let dialect = Dialect::get(DialectType::Snowflake);
36468 let result = dialect
36469 .transpile("SELECT RLIKE(a, b)", DialectType::DuckDB)
36470 .unwrap();
36471 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b)");
36472 }
36473
36474 #[test]
36475 fn test_rlike_snowflake_to_duckdb_3arg_flags() {
36476 let dialect = Dialect::get(DialectType::Snowflake);
36477 let result = dialect
36478 .transpile("SELECT RLIKE(a, b, 'i')", DialectType::DuckDB)
36479 .unwrap();
36480 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b, 'i')");
36481 }
36482
36483 #[test]
36484 fn test_regexp_extract_all_bigquery_to_snowflake_no_capture() {
36485 let dialect = Dialect::get(DialectType::BigQuery);
36486 let result = dialect
36487 .transpile(
36488 "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')",
36489 DialectType::Snowflake,
36490 )
36491 .unwrap();
36492 assert_eq!(result[0], "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')");
36493 }
36494
36495 #[test]
36496 fn test_regexp_extract_all_bigquery_to_snowflake_with_capture() {
36497 let dialect = Dialect::get(DialectType::BigQuery);
36498 let result = dialect
36499 .transpile(
36500 "SELECT REGEXP_EXTRACT_ALL(s, '(a)[0-9]')",
36501 DialectType::Snowflake,
36502 )
36503 .unwrap();
36504 assert_eq!(
36505 result[0],
36506 "SELECT REGEXP_SUBSTR_ALL(s, '(a)[0-9]', 1, 1, 'c', 1)"
36507 );
36508 }
36509
36510 #[test]
36511 fn test_regexp_instr_snowflake_to_duckdb_2arg() {
36512 let handle = std::thread::Builder::new()
36513 .stack_size(16 * 1024 * 1024)
36514 .spawn(|| {
36515 let dialect = Dialect::get(DialectType::Snowflake);
36516 let result = dialect
36517 .transpile("SELECT REGEXP_INSTR(s, 'pattern')", DialectType::DuckDB)
36518 .unwrap();
36519 // Should produce a CASE WHEN expression
36520 assert!(
36521 result[0].contains("CASE WHEN"),
36522 "Expected CASE WHEN in result: {}",
36523 result[0]
36524 );
36525 assert!(
36526 result[0].contains("LIST_SUM"),
36527 "Expected LIST_SUM in result: {}",
36528 result[0]
36529 );
36530 })
36531 .unwrap();
36532 handle.join().unwrap();
36533 }
36534
36535 #[test]
36536 fn test_array_except_generic_to_duckdb() {
36537 // Use larger stack to avoid overflow from deeply nested expression Drop
36538 let handle = std::thread::Builder::new()
36539 .stack_size(16 * 1024 * 1024)
36540 .spawn(|| {
36541 let dialect = Dialect::get(DialectType::Generic);
36542 let result = dialect
36543 .transpile(
36544 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
36545 DialectType::DuckDB,
36546 )
36547 .unwrap();
36548 eprintln!("ARRAY_EXCEPT Generic->DuckDB: {}", result[0]);
36549 assert!(
36550 result[0].contains("CASE WHEN"),
36551 "Expected CASE WHEN: {}",
36552 result[0]
36553 );
36554 assert!(
36555 result[0].contains("LIST_FILTER"),
36556 "Expected LIST_FILTER: {}",
36557 result[0]
36558 );
36559 assert!(
36560 result[0].contains("LIST_DISTINCT"),
36561 "Expected LIST_DISTINCT: {}",
36562 result[0]
36563 );
36564 assert!(
36565 result[0].contains("IS NOT DISTINCT FROM"),
36566 "Expected IS NOT DISTINCT FROM: {}",
36567 result[0]
36568 );
36569 assert!(
36570 result[0].contains("= 0"),
36571 "Expected = 0 filter: {}",
36572 result[0]
36573 );
36574 })
36575 .unwrap();
36576 handle.join().unwrap();
36577 }
36578
36579 #[test]
36580 fn test_array_except_generic_to_snowflake() {
36581 let dialect = Dialect::get(DialectType::Generic);
36582 let result = dialect
36583 .transpile(
36584 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
36585 DialectType::Snowflake,
36586 )
36587 .unwrap();
36588 eprintln!("ARRAY_EXCEPT Generic->Snowflake: {}", result[0]);
36589 assert_eq!(result[0], "SELECT ARRAY_EXCEPT([1, 2, 3], [2])");
36590 }
36591
36592 #[test]
36593 fn test_array_except_generic_to_presto() {
36594 let dialect = Dialect::get(DialectType::Generic);
36595 let result = dialect
36596 .transpile(
36597 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
36598 DialectType::Presto,
36599 )
36600 .unwrap();
36601 eprintln!("ARRAY_EXCEPT Generic->Presto: {}", result[0]);
36602 assert_eq!(result[0], "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])");
36603 }
36604
36605 #[test]
36606 fn test_array_except_snowflake_to_duckdb() {
36607 let handle = std::thread::Builder::new()
36608 .stack_size(16 * 1024 * 1024)
36609 .spawn(|| {
36610 let dialect = Dialect::get(DialectType::Snowflake);
36611 let result = dialect
36612 .transpile("SELECT ARRAY_EXCEPT([1, 2, 3], [2])", DialectType::DuckDB)
36613 .unwrap();
36614 eprintln!("ARRAY_EXCEPT Snowflake->DuckDB: {}", result[0]);
36615 assert!(
36616 result[0].contains("CASE WHEN"),
36617 "Expected CASE WHEN: {}",
36618 result[0]
36619 );
36620 assert!(
36621 result[0].contains("LIST_TRANSFORM"),
36622 "Expected LIST_TRANSFORM: {}",
36623 result[0]
36624 );
36625 })
36626 .unwrap();
36627 handle.join().unwrap();
36628 }
36629
36630 #[test]
36631 fn test_array_contains_snowflake_to_snowflake() {
36632 let dialect = Dialect::get(DialectType::Snowflake);
36633 let result = dialect
36634 .transpile(
36635 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
36636 DialectType::Snowflake,
36637 )
36638 .unwrap();
36639 eprintln!("ARRAY_CONTAINS Snowflake->Snowflake: {}", result[0]);
36640 assert_eq!(result[0], "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])");
36641 }
36642
36643 #[test]
36644 fn test_array_contains_snowflake_to_duckdb() {
36645 let dialect = Dialect::get(DialectType::Snowflake);
36646 let result = dialect
36647 .transpile(
36648 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
36649 DialectType::DuckDB,
36650 )
36651 .unwrap();
36652 eprintln!("ARRAY_CONTAINS Snowflake->DuckDB: {}", result[0]);
36653 assert!(
36654 result[0].contains("CASE WHEN"),
36655 "Expected CASE WHEN: {}",
36656 result[0]
36657 );
36658 assert!(
36659 result[0].contains("NULLIF"),
36660 "Expected NULLIF: {}",
36661 result[0]
36662 );
36663 assert!(
36664 result[0].contains("ARRAY_CONTAINS"),
36665 "Expected ARRAY_CONTAINS: {}",
36666 result[0]
36667 );
36668 }
36669
36670 #[test]
36671 fn test_array_distinct_snowflake_to_duckdb() {
36672 let dialect = Dialect::get(DialectType::Snowflake);
36673 let result = dialect
36674 .transpile(
36675 "SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])",
36676 DialectType::DuckDB,
36677 )
36678 .unwrap();
36679 eprintln!("ARRAY_DISTINCT Snowflake->DuckDB: {}", result[0]);
36680 assert!(
36681 result[0].contains("CASE WHEN"),
36682 "Expected CASE WHEN: {}",
36683 result[0]
36684 );
36685 assert!(
36686 result[0].contains("LIST_DISTINCT"),
36687 "Expected LIST_DISTINCT: {}",
36688 result[0]
36689 );
36690 assert!(
36691 result[0].contains("LIST_APPEND"),
36692 "Expected LIST_APPEND: {}",
36693 result[0]
36694 );
36695 assert!(
36696 result[0].contains("LIST_FILTER"),
36697 "Expected LIST_FILTER: {}",
36698 result[0]
36699 );
36700 }
36701}