polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile`](Dialect::transpile) to another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, FunctionBody, Null};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Token, Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_ascii_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 0,
341 0,
342 )),
343 }
344 }
345}
346
347/// Trait that each concrete SQL dialect must implement.
348///
349/// `DialectImpl` provides the configuration hooks and per-expression transform logic
350/// that distinguish one dialect from another. Implementors supply:
351///
352/// - A [`DialectType`] identifier.
353/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
354/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
355/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
356/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
357/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
358///
359/// The default implementations are no-ops, so a minimal dialect only needs to provide
360/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
361/// standard SQL.
362pub trait DialectImpl {
363 /// Returns the [`DialectType`] that identifies this dialect.
364 fn dialect_type(&self) -> DialectType;
365
366 /// Returns the tokenizer configuration for this dialect.
367 ///
368 /// Override to customize identifier quoting characters, string escape rules,
369 /// comment styles, and other lexing behavior.
370 fn tokenizer_config(&self) -> TokenizerConfig {
371 TokenizerConfig::default()
372 }
373
374 /// Returns the generator configuration for this dialect.
375 ///
376 /// Override to customize identifier quoting style, function name casing,
377 /// keyword casing, and other SQL generation behavior.
378 fn generator_config(&self) -> GeneratorConfig {
379 GeneratorConfig::default()
380 }
381
382 /// Returns a generator configuration tailored to a specific expression.
383 ///
384 /// Override this for hybrid dialects like Athena that route to different SQL engines
385 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
386 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
387 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
388 self.generator_config()
389 }
390
391 /// Transforms a single expression node for this dialect, without recursing into children.
392 ///
393 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
394 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
395 /// typically include function renaming, operator substitution, and type mapping.
396 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
397 Ok(expr)
398 }
399
400 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
401 ///
402 /// Override this to apply structural rewrites that must see the entire tree at once,
403 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
404 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
405 fn preprocess(&self, expr: Expression) -> Result<Expression> {
406 Ok(expr)
407 }
408}
409
410/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
411/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
412///
413/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
414/// and then nested element/field types are recursed into. This ensures that dialect-level
415/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
416fn transform_data_type_recursive<F>(
417 dt: crate::expressions::DataType,
418 transform_fn: &F,
419) -> Result<crate::expressions::DataType>
420where
421 F: Fn(Expression) -> Result<Expression>,
422{
423 use crate::expressions::DataType;
424 // First, transform the outermost type through the expression system
425 let dt_expr = transform_fn(Expression::DataType(dt))?;
426 let dt = match dt_expr {
427 Expression::DataType(d) => d,
428 _ => {
429 return Ok(match dt_expr {
430 _ => DataType::Custom {
431 name: "UNKNOWN".to_string(),
432 },
433 })
434 }
435 };
436 // Then recurse into nested types
437 match dt {
438 DataType::Array {
439 element_type,
440 dimension,
441 } => {
442 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
443 Ok(DataType::Array {
444 element_type: Box::new(inner),
445 dimension,
446 })
447 }
448 DataType::List { element_type } => {
449 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
450 Ok(DataType::List {
451 element_type: Box::new(inner),
452 })
453 }
454 DataType::Struct { fields, nested } => {
455 let mut new_fields = Vec::new();
456 for mut field in fields {
457 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
458 new_fields.push(field);
459 }
460 Ok(DataType::Struct {
461 fields: new_fields,
462 nested,
463 })
464 }
465 DataType::Map {
466 key_type,
467 value_type,
468 } => {
469 let k = transform_data_type_recursive(*key_type, transform_fn)?;
470 let v = transform_data_type_recursive(*value_type, transform_fn)?;
471 Ok(DataType::Map {
472 key_type: Box::new(k),
473 value_type: Box::new(v),
474 })
475 }
476 other => Ok(other),
477 }
478}
479
480/// Convert DuckDB C-style format strings to Presto C-style format strings.
481/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
482#[cfg(feature = "transpile")]
483fn duckdb_to_presto_format(fmt: &str) -> String {
484 // Order matters: handle longer patterns first to avoid partial replacements
485 let mut result = fmt.to_string();
486 // First pass: mark multi-char patterns with placeholders
487 result = result.replace("%-m", "\x01NOPADM\x01");
488 result = result.replace("%-d", "\x01NOPADD\x01");
489 result = result.replace("%-I", "\x01NOPADI\x01");
490 result = result.replace("%-H", "\x01NOPADH\x01");
491 result = result.replace("%H:%M:%S", "\x01HMS\x01");
492 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
493 // Now convert individual specifiers
494 result = result.replace("%M", "%i");
495 result = result.replace("%S", "%s");
496 // Restore multi-char patterns with Presto equivalents
497 result = result.replace("\x01NOPADM\x01", "%c");
498 result = result.replace("\x01NOPADD\x01", "%e");
499 result = result.replace("\x01NOPADI\x01", "%l");
500 result = result.replace("\x01NOPADH\x01", "%k");
501 result = result.replace("\x01HMS\x01", "%T");
502 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
503 result
504}
505
506/// Convert DuckDB C-style format strings to BigQuery format strings.
507/// BigQuery uses a mix of strftime-like directives.
508#[cfg(feature = "transpile")]
509fn duckdb_to_bigquery_format(fmt: &str) -> String {
510 let mut result = fmt.to_string();
511 // Handle longer patterns first
512 result = result.replace("%-d", "%e");
513 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
514 result = result.replace("%Y-%m-%d", "%F");
515 result = result.replace("%H:%M:%S", "%T");
516 result
517}
518
519/// Applies a transform function bottom-up through an entire expression tree.
520///
521/// This is the core tree-rewriting engine used by the dialect system. It performs
522/// a post-order (children-first) traversal: for each node, all children are recursively
523/// transformed before the node itself is passed to `transform_fn`. This bottom-up
524/// strategy means that when `transform_fn` sees a node, its children have already
525/// been rewritten, which simplifies pattern matching on sub-expressions.
526///
527/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
528/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
529/// function calls, CASE expressions, date/time functions, and more.
530///
531/// # Arguments
532///
533/// * `expr` - The root expression to transform (consumed).
534/// * `transform_fn` - A closure that receives each expression node (after its children
535/// have been transformed) and returns a possibly-rewritten expression.
536///
537/// # Errors
538///
539/// Returns an error if `transform_fn` returns an error for any node.
540pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
541where
542 F: Fn(Expression) -> Result<Expression>,
543{
544 use crate::expressions::BinaryOp;
545
546 // Helper macro to transform binary ops with Box<BinaryOp>
547 macro_rules! transform_binary {
548 ($variant:ident, $op:expr) => {{
549 let left = transform_recursive($op.left, transform_fn)?;
550 let right = transform_recursive($op.right, transform_fn)?;
551 Expression::$variant(Box::new(BinaryOp {
552 left,
553 right,
554 left_comments: $op.left_comments,
555 operator_comments: $op.operator_comments,
556 trailing_comments: $op.trailing_comments,
557 inferred_type: $op.inferred_type,
558 }))
559 }};
560 }
561
562 // Fast path: leaf nodes never need child traversal, apply transform directly
563 if matches!(
564 &expr,
565 Expression::Literal(_)
566 | Expression::Boolean(_)
567 | Expression::Null(_)
568 | Expression::Identifier(_)
569 | Expression::Star(_)
570 | Expression::Parameter(_)
571 | Expression::Placeholder(_)
572 | Expression::SessionParameter(_)
573 ) {
574 return transform_fn(expr);
575 }
576
577 // First recursively transform children, then apply the transform function
578 let expr = match expr {
579 Expression::Select(mut select) => {
580 select.expressions = select
581 .expressions
582 .into_iter()
583 .map(|e| transform_recursive(e, transform_fn))
584 .collect::<Result<Vec<_>>>()?;
585
586 // Transform FROM clause
587 if let Some(mut from) = select.from.take() {
588 from.expressions = from
589 .expressions
590 .into_iter()
591 .map(|e| transform_recursive(e, transform_fn))
592 .collect::<Result<Vec<_>>>()?;
593 select.from = Some(from);
594 }
595
596 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
597 select.joins = select
598 .joins
599 .into_iter()
600 .map(|mut join| {
601 join.this = transform_recursive(join.this, transform_fn)?;
602 if let Some(on) = join.on.take() {
603 join.on = Some(transform_recursive(on, transform_fn)?);
604 }
605 // Wrap join in Expression::Join to allow transform_fn to transform it
606 match transform_fn(Expression::Join(Box::new(join)))? {
607 Expression::Join(j) => Ok(*j),
608 _ => Err(crate::error::Error::parse(
609 "Join transformation returned non-join expression",
610 0,
611 0,
612 0,
613 0,
614 )),
615 }
616 })
617 .collect::<Result<Vec<_>>>()?;
618
619 // Transform LATERAL VIEW expressions (Hive/Spark)
620 select.lateral_views = select
621 .lateral_views
622 .into_iter()
623 .map(|mut lv| {
624 lv.this = transform_recursive(lv.this, transform_fn)?;
625 Ok(lv)
626 })
627 .collect::<Result<Vec<_>>>()?;
628
629 // Transform WHERE clause
630 if let Some(mut where_clause) = select.where_clause.take() {
631 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
632 select.where_clause = Some(where_clause);
633 }
634
635 // Transform GROUP BY
636 if let Some(mut group_by) = select.group_by.take() {
637 group_by.expressions = group_by
638 .expressions
639 .into_iter()
640 .map(|e| transform_recursive(e, transform_fn))
641 .collect::<Result<Vec<_>>>()?;
642 select.group_by = Some(group_by);
643 }
644
645 // Transform HAVING
646 if let Some(mut having) = select.having.take() {
647 having.this = transform_recursive(having.this, transform_fn)?;
648 select.having = Some(having);
649 }
650
651 // Transform WITH (CTEs)
652 if let Some(mut with) = select.with.take() {
653 with.ctes = with
654 .ctes
655 .into_iter()
656 .map(|mut cte| {
657 let original = cte.this.clone();
658 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
659 cte
660 })
661 .collect();
662 select.with = Some(with);
663 }
664
665 // Transform ORDER BY
666 if let Some(mut order) = select.order_by.take() {
667 order.expressions = order
668 .expressions
669 .into_iter()
670 .map(|o| {
671 let mut o = o;
672 let original = o.this.clone();
673 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
674 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
675 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
676 Ok(Expression::Ordered(transformed)) => *transformed,
677 Ok(_) | Err(_) => o,
678 }
679 })
680 .collect();
681 select.order_by = Some(order);
682 }
683
684 // Transform WINDOW clause order_by
685 if let Some(ref mut windows) = select.windows {
686 for nw in windows.iter_mut() {
687 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
688 .into_iter()
689 .map(|o| {
690 let mut o = o;
691 let original = o.this.clone();
692 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
693 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
694 Ok(Expression::Ordered(transformed)) => *transformed,
695 Ok(_) | Err(_) => o,
696 }
697 })
698 .collect();
699 }
700 }
701
702 // Transform QUALIFY
703 if let Some(mut qual) = select.qualify.take() {
704 qual.this = transform_recursive(qual.this, transform_fn)?;
705 select.qualify = Some(qual);
706 }
707
708 Expression::Select(select)
709 }
710 Expression::Function(mut f) => {
711 f.args = f
712 .args
713 .into_iter()
714 .map(|e| transform_recursive(e, transform_fn))
715 .collect::<Result<Vec<_>>>()?;
716 Expression::Function(f)
717 }
718 Expression::AggregateFunction(mut f) => {
719 f.args = f
720 .args
721 .into_iter()
722 .map(|e| transform_recursive(e, transform_fn))
723 .collect::<Result<Vec<_>>>()?;
724 if let Some(filter) = f.filter {
725 f.filter = Some(transform_recursive(filter, transform_fn)?);
726 }
727 Expression::AggregateFunction(f)
728 }
729 Expression::WindowFunction(mut wf) => {
730 wf.this = transform_recursive(wf.this, transform_fn)?;
731 wf.over.partition_by = wf
732 .over
733 .partition_by
734 .into_iter()
735 .map(|e| transform_recursive(e, transform_fn))
736 .collect::<Result<Vec<_>>>()?;
737 // Transform order_by items through Expression::Ordered wrapper
738 wf.over.order_by = wf
739 .over
740 .order_by
741 .into_iter()
742 .map(|o| {
743 let mut o = o;
744 o.this = transform_recursive(o.this, transform_fn)?;
745 match transform_fn(Expression::Ordered(Box::new(o)))? {
746 Expression::Ordered(transformed) => Ok(*transformed),
747 _ => Err(crate::error::Error::parse(
748 "Ordered transformation returned non-Ordered expression",
749 0,
750 0,
751 0,
752 0,
753 )),
754 }
755 })
756 .collect::<Result<Vec<_>>>()?;
757 Expression::WindowFunction(wf)
758 }
759 Expression::Alias(mut a) => {
760 a.this = transform_recursive(a.this, transform_fn)?;
761 Expression::Alias(a)
762 }
763 Expression::Cast(mut c) => {
764 c.this = transform_recursive(c.this, transform_fn)?;
765 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
766 c.to = transform_data_type_recursive(c.to, transform_fn)?;
767 Expression::Cast(c)
768 }
769 Expression::And(op) => transform_binary!(And, *op),
770 Expression::Or(op) => transform_binary!(Or, *op),
771 Expression::Add(op) => transform_binary!(Add, *op),
772 Expression::Sub(op) => transform_binary!(Sub, *op),
773 Expression::Mul(op) => transform_binary!(Mul, *op),
774 Expression::Div(op) => transform_binary!(Div, *op),
775 Expression::Eq(op) => transform_binary!(Eq, *op),
776 Expression::Lt(op) => transform_binary!(Lt, *op),
777 Expression::Gt(op) => transform_binary!(Gt, *op),
778 Expression::Paren(mut p) => {
779 p.this = transform_recursive(p.this, transform_fn)?;
780 Expression::Paren(p)
781 }
782 Expression::Coalesce(mut f) => {
783 f.expressions = f
784 .expressions
785 .into_iter()
786 .map(|e| transform_recursive(e, transform_fn))
787 .collect::<Result<Vec<_>>>()?;
788 Expression::Coalesce(f)
789 }
790 Expression::IfNull(mut f) => {
791 f.this = transform_recursive(f.this, transform_fn)?;
792 f.expression = transform_recursive(f.expression, transform_fn)?;
793 Expression::IfNull(f)
794 }
795 Expression::Nvl(mut f) => {
796 f.this = transform_recursive(f.this, transform_fn)?;
797 f.expression = transform_recursive(f.expression, transform_fn)?;
798 Expression::Nvl(f)
799 }
800 Expression::In(mut i) => {
801 i.this = transform_recursive(i.this, transform_fn)?;
802 i.expressions = i
803 .expressions
804 .into_iter()
805 .map(|e| transform_recursive(e, transform_fn))
806 .collect::<Result<Vec<_>>>()?;
807 if let Some(query) = i.query {
808 i.query = Some(transform_recursive(query, transform_fn)?);
809 }
810 Expression::In(i)
811 }
812 Expression::Not(mut n) => {
813 n.this = transform_recursive(n.this, transform_fn)?;
814 Expression::Not(n)
815 }
816 Expression::ArraySlice(mut s) => {
817 s.this = transform_recursive(s.this, transform_fn)?;
818 if let Some(start) = s.start {
819 s.start = Some(transform_recursive(start, transform_fn)?);
820 }
821 if let Some(end) = s.end {
822 s.end = Some(transform_recursive(end, transform_fn)?);
823 }
824 Expression::ArraySlice(s)
825 }
826 Expression::Subscript(mut s) => {
827 s.this = transform_recursive(s.this, transform_fn)?;
828 s.index = transform_recursive(s.index, transform_fn)?;
829 Expression::Subscript(s)
830 }
831 Expression::Array(mut a) => {
832 a.expressions = a
833 .expressions
834 .into_iter()
835 .map(|e| transform_recursive(e, transform_fn))
836 .collect::<Result<Vec<_>>>()?;
837 Expression::Array(a)
838 }
839 Expression::Struct(mut s) => {
840 let mut new_fields = Vec::new();
841 for (name, expr) in s.fields {
842 let transformed = transform_recursive(expr, transform_fn)?;
843 new_fields.push((name, transformed));
844 }
845 s.fields = new_fields;
846 Expression::Struct(s)
847 }
848 Expression::NamedArgument(mut na) => {
849 na.value = transform_recursive(na.value, transform_fn)?;
850 Expression::NamedArgument(na)
851 }
852 Expression::MapFunc(mut m) => {
853 m.keys = m
854 .keys
855 .into_iter()
856 .map(|e| transform_recursive(e, transform_fn))
857 .collect::<Result<Vec<_>>>()?;
858 m.values = m
859 .values
860 .into_iter()
861 .map(|e| transform_recursive(e, transform_fn))
862 .collect::<Result<Vec<_>>>()?;
863 Expression::MapFunc(m)
864 }
865 Expression::ArrayFunc(mut a) => {
866 a.expressions = a
867 .expressions
868 .into_iter()
869 .map(|e| transform_recursive(e, transform_fn))
870 .collect::<Result<Vec<_>>>()?;
871 Expression::ArrayFunc(a)
872 }
873 Expression::Lambda(mut l) => {
874 l.body = transform_recursive(l.body, transform_fn)?;
875 Expression::Lambda(l)
876 }
877 Expression::JsonExtract(mut f) => {
878 f.this = transform_recursive(f.this, transform_fn)?;
879 f.path = transform_recursive(f.path, transform_fn)?;
880 Expression::JsonExtract(f)
881 }
882 Expression::JsonExtractScalar(mut f) => {
883 f.this = transform_recursive(f.this, transform_fn)?;
884 f.path = transform_recursive(f.path, transform_fn)?;
885 Expression::JsonExtractScalar(f)
886 }
887
888 // ===== UnaryFunc-based expressions =====
889 // These all have a single `this: Expression` child
890 Expression::Length(mut f) => {
891 f.this = transform_recursive(f.this, transform_fn)?;
892 Expression::Length(f)
893 }
894 Expression::Upper(mut f) => {
895 f.this = transform_recursive(f.this, transform_fn)?;
896 Expression::Upper(f)
897 }
898 Expression::Lower(mut f) => {
899 f.this = transform_recursive(f.this, transform_fn)?;
900 Expression::Lower(f)
901 }
902 Expression::LTrim(mut f) => {
903 f.this = transform_recursive(f.this, transform_fn)?;
904 Expression::LTrim(f)
905 }
906 Expression::RTrim(mut f) => {
907 f.this = transform_recursive(f.this, transform_fn)?;
908 Expression::RTrim(f)
909 }
910 Expression::Reverse(mut f) => {
911 f.this = transform_recursive(f.this, transform_fn)?;
912 Expression::Reverse(f)
913 }
914 Expression::Abs(mut f) => {
915 f.this = transform_recursive(f.this, transform_fn)?;
916 Expression::Abs(f)
917 }
918 Expression::Ceil(mut f) => {
919 f.this = transform_recursive(f.this, transform_fn)?;
920 Expression::Ceil(f)
921 }
922 Expression::Floor(mut f) => {
923 f.this = transform_recursive(f.this, transform_fn)?;
924 Expression::Floor(f)
925 }
926 Expression::Sign(mut f) => {
927 f.this = transform_recursive(f.this, transform_fn)?;
928 Expression::Sign(f)
929 }
930 Expression::Sqrt(mut f) => {
931 f.this = transform_recursive(f.this, transform_fn)?;
932 Expression::Sqrt(f)
933 }
934 Expression::Cbrt(mut f) => {
935 f.this = transform_recursive(f.this, transform_fn)?;
936 Expression::Cbrt(f)
937 }
938 Expression::Ln(mut f) => {
939 f.this = transform_recursive(f.this, transform_fn)?;
940 Expression::Ln(f)
941 }
942 Expression::Log(mut f) => {
943 f.this = transform_recursive(f.this, transform_fn)?;
944 if let Some(base) = f.base {
945 f.base = Some(transform_recursive(base, transform_fn)?);
946 }
947 Expression::Log(f)
948 }
949 Expression::Exp(mut f) => {
950 f.this = transform_recursive(f.this, transform_fn)?;
951 Expression::Exp(f)
952 }
953 Expression::Date(mut f) => {
954 f.this = transform_recursive(f.this, transform_fn)?;
955 Expression::Date(f)
956 }
957 Expression::Stddev(mut f) => {
958 f.this = transform_recursive(f.this, transform_fn)?;
959 Expression::Stddev(f)
960 }
961 Expression::Variance(mut f) => {
962 f.this = transform_recursive(f.this, transform_fn)?;
963 Expression::Variance(f)
964 }
965
966 // ===== BinaryFunc-based expressions =====
967 Expression::ModFunc(mut f) => {
968 f.this = transform_recursive(f.this, transform_fn)?;
969 f.expression = transform_recursive(f.expression, transform_fn)?;
970 Expression::ModFunc(f)
971 }
972 Expression::Power(mut f) => {
973 f.this = transform_recursive(f.this, transform_fn)?;
974 f.expression = transform_recursive(f.expression, transform_fn)?;
975 Expression::Power(f)
976 }
977 Expression::MapFromArrays(mut f) => {
978 f.this = transform_recursive(f.this, transform_fn)?;
979 f.expression = transform_recursive(f.expression, transform_fn)?;
980 Expression::MapFromArrays(f)
981 }
982 Expression::ElementAt(mut f) => {
983 f.this = transform_recursive(f.this, transform_fn)?;
984 f.expression = transform_recursive(f.expression, transform_fn)?;
985 Expression::ElementAt(f)
986 }
987 Expression::MapContainsKey(mut f) => {
988 f.this = transform_recursive(f.this, transform_fn)?;
989 f.expression = transform_recursive(f.expression, transform_fn)?;
990 Expression::MapContainsKey(f)
991 }
992 Expression::Left(mut f) => {
993 f.this = transform_recursive(f.this, transform_fn)?;
994 f.length = transform_recursive(f.length, transform_fn)?;
995 Expression::Left(f)
996 }
997 Expression::Right(mut f) => {
998 f.this = transform_recursive(f.this, transform_fn)?;
999 f.length = transform_recursive(f.length, transform_fn)?;
1000 Expression::Right(f)
1001 }
1002 Expression::Repeat(mut f) => {
1003 f.this = transform_recursive(f.this, transform_fn)?;
1004 f.times = transform_recursive(f.times, transform_fn)?;
1005 Expression::Repeat(f)
1006 }
1007
1008 // ===== Complex function expressions =====
1009 Expression::Substring(mut f) => {
1010 f.this = transform_recursive(f.this, transform_fn)?;
1011 f.start = transform_recursive(f.start, transform_fn)?;
1012 if let Some(len) = f.length {
1013 f.length = Some(transform_recursive(len, transform_fn)?);
1014 }
1015 Expression::Substring(f)
1016 }
1017 Expression::Replace(mut f) => {
1018 f.this = transform_recursive(f.this, transform_fn)?;
1019 f.old = transform_recursive(f.old, transform_fn)?;
1020 f.new = transform_recursive(f.new, transform_fn)?;
1021 Expression::Replace(f)
1022 }
1023 Expression::ConcatWs(mut f) => {
1024 f.separator = transform_recursive(f.separator, transform_fn)?;
1025 f.expressions = f
1026 .expressions
1027 .into_iter()
1028 .map(|e| transform_recursive(e, transform_fn))
1029 .collect::<Result<Vec<_>>>()?;
1030 Expression::ConcatWs(f)
1031 }
1032 Expression::Trim(mut f) => {
1033 f.this = transform_recursive(f.this, transform_fn)?;
1034 if let Some(chars) = f.characters {
1035 f.characters = Some(transform_recursive(chars, transform_fn)?);
1036 }
1037 Expression::Trim(f)
1038 }
1039 Expression::Split(mut f) => {
1040 f.this = transform_recursive(f.this, transform_fn)?;
1041 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
1042 Expression::Split(f)
1043 }
1044 Expression::Lpad(mut f) => {
1045 f.this = transform_recursive(f.this, transform_fn)?;
1046 f.length = transform_recursive(f.length, transform_fn)?;
1047 if let Some(fill) = f.fill {
1048 f.fill = Some(transform_recursive(fill, transform_fn)?);
1049 }
1050 Expression::Lpad(f)
1051 }
1052 Expression::Rpad(mut f) => {
1053 f.this = transform_recursive(f.this, transform_fn)?;
1054 f.length = transform_recursive(f.length, transform_fn)?;
1055 if let Some(fill) = f.fill {
1056 f.fill = Some(transform_recursive(fill, transform_fn)?);
1057 }
1058 Expression::Rpad(f)
1059 }
1060
1061 // ===== Conditional expressions =====
1062 Expression::Case(mut c) => {
1063 if let Some(operand) = c.operand {
1064 c.operand = Some(transform_recursive(operand, transform_fn)?);
1065 }
1066 c.whens = c
1067 .whens
1068 .into_iter()
1069 .map(|(cond, then)| {
1070 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
1071 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
1072 (new_cond, new_then)
1073 })
1074 .collect();
1075 if let Some(else_expr) = c.else_ {
1076 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
1077 }
1078 Expression::Case(c)
1079 }
1080 Expression::IfFunc(mut f) => {
1081 f.condition = transform_recursive(f.condition, transform_fn)?;
1082 f.true_value = transform_recursive(f.true_value, transform_fn)?;
1083 if let Some(false_val) = f.false_value {
1084 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
1085 }
1086 Expression::IfFunc(f)
1087 }
1088
1089 // ===== Date/Time expressions =====
1090 Expression::DateAdd(mut f) => {
1091 f.this = transform_recursive(f.this, transform_fn)?;
1092 f.interval = transform_recursive(f.interval, transform_fn)?;
1093 Expression::DateAdd(f)
1094 }
1095 Expression::DateSub(mut f) => {
1096 f.this = transform_recursive(f.this, transform_fn)?;
1097 f.interval = transform_recursive(f.interval, transform_fn)?;
1098 Expression::DateSub(f)
1099 }
1100 Expression::DateDiff(mut f) => {
1101 f.this = transform_recursive(f.this, transform_fn)?;
1102 f.expression = transform_recursive(f.expression, transform_fn)?;
1103 Expression::DateDiff(f)
1104 }
1105 Expression::DateTrunc(mut f) => {
1106 f.this = transform_recursive(f.this, transform_fn)?;
1107 Expression::DateTrunc(f)
1108 }
1109 Expression::Extract(mut f) => {
1110 f.this = transform_recursive(f.this, transform_fn)?;
1111 Expression::Extract(f)
1112 }
1113
1114 // ===== JSON expressions =====
1115 Expression::JsonObject(mut f) => {
1116 f.pairs = f
1117 .pairs
1118 .into_iter()
1119 .map(|(k, v)| {
1120 let new_k = transform_recursive(k, transform_fn)?;
1121 let new_v = transform_recursive(v, transform_fn)?;
1122 Ok((new_k, new_v))
1123 })
1124 .collect::<Result<Vec<_>>>()?;
1125 Expression::JsonObject(f)
1126 }
1127
1128 // ===== Subquery expressions =====
1129 Expression::Subquery(mut s) => {
1130 s.this = transform_recursive(s.this, transform_fn)?;
1131 Expression::Subquery(s)
1132 }
1133 Expression::Exists(mut e) => {
1134 e.this = transform_recursive(e.this, transform_fn)?;
1135 Expression::Exists(e)
1136 }
1137 Expression::Describe(mut d) => {
1138 d.target = transform_recursive(d.target, transform_fn)?;
1139 Expression::Describe(d)
1140 }
1141
1142 // ===== Set operations =====
1143 Expression::Union(mut u) => {
1144 let left = std::mem::replace(&mut u.left, Expression::Null(Null));
1145 u.left = transform_recursive(left, transform_fn)?;
1146 let right = std::mem::replace(&mut u.right, Expression::Null(Null));
1147 u.right = transform_recursive(right, transform_fn)?;
1148 if let Some(mut with) = u.with.take() {
1149 with.ctes = with
1150 .ctes
1151 .into_iter()
1152 .map(|mut cte| {
1153 let original = cte.this.clone();
1154 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1155 cte
1156 })
1157 .collect();
1158 u.with = Some(with);
1159 }
1160 Expression::Union(u)
1161 }
1162 Expression::Intersect(mut i) => {
1163 let left = std::mem::replace(&mut i.left, Expression::Null(Null));
1164 i.left = transform_recursive(left, transform_fn)?;
1165 let right = std::mem::replace(&mut i.right, Expression::Null(Null));
1166 i.right = transform_recursive(right, transform_fn)?;
1167 if let Some(mut with) = i.with.take() {
1168 with.ctes = with
1169 .ctes
1170 .into_iter()
1171 .map(|mut cte| {
1172 let original = cte.this.clone();
1173 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1174 cte
1175 })
1176 .collect();
1177 i.with = Some(with);
1178 }
1179 Expression::Intersect(i)
1180 }
1181 Expression::Except(mut e) => {
1182 let left = std::mem::replace(&mut e.left, Expression::Null(Null));
1183 e.left = transform_recursive(left, transform_fn)?;
1184 let right = std::mem::replace(&mut e.right, Expression::Null(Null));
1185 e.right = transform_recursive(right, transform_fn)?;
1186 if let Some(mut with) = e.with.take() {
1187 with.ctes = with
1188 .ctes
1189 .into_iter()
1190 .map(|mut cte| {
1191 let original = cte.this.clone();
1192 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1193 cte
1194 })
1195 .collect();
1196 e.with = Some(with);
1197 }
1198 Expression::Except(e)
1199 }
1200
1201 // ===== DML expressions =====
1202 Expression::Insert(mut ins) => {
1203 // Transform VALUES clause expressions
1204 let mut new_values = Vec::new();
1205 for row in ins.values {
1206 let mut new_row = Vec::new();
1207 for e in row {
1208 new_row.push(transform_recursive(e, transform_fn)?);
1209 }
1210 new_values.push(new_row);
1211 }
1212 ins.values = new_values;
1213
1214 // Transform query (for INSERT ... SELECT)
1215 if let Some(query) = ins.query {
1216 ins.query = Some(transform_recursive(query, transform_fn)?);
1217 }
1218
1219 // Transform RETURNING clause
1220 let mut new_returning = Vec::new();
1221 for e in ins.returning {
1222 new_returning.push(transform_recursive(e, transform_fn)?);
1223 }
1224 ins.returning = new_returning;
1225
1226 // Transform ON CONFLICT clause
1227 if let Some(on_conflict) = ins.on_conflict {
1228 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
1229 }
1230
1231 Expression::Insert(ins)
1232 }
1233 Expression::Update(mut upd) => {
1234 upd.set = upd
1235 .set
1236 .into_iter()
1237 .map(|(id, val)| {
1238 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1239 (id, new_val)
1240 })
1241 .collect();
1242 if let Some(mut where_clause) = upd.where_clause.take() {
1243 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1244 upd.where_clause = Some(where_clause);
1245 }
1246 Expression::Update(upd)
1247 }
1248 Expression::Delete(mut del) => {
1249 if let Some(mut where_clause) = del.where_clause.take() {
1250 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1251 del.where_clause = Some(where_clause);
1252 }
1253 Expression::Delete(del)
1254 }
1255
1256 // ===== CTE expressions =====
1257 Expression::With(mut w) => {
1258 w.ctes = w
1259 .ctes
1260 .into_iter()
1261 .map(|mut cte| {
1262 let original = cte.this.clone();
1263 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1264 cte
1265 })
1266 .collect();
1267 Expression::With(w)
1268 }
1269 Expression::Cte(mut c) => {
1270 c.this = transform_recursive(c.this, transform_fn)?;
1271 Expression::Cte(c)
1272 }
1273
1274 // ===== Order expressions =====
1275 Expression::Ordered(mut o) => {
1276 o.this = transform_recursive(o.this, transform_fn)?;
1277 Expression::Ordered(o)
1278 }
1279
1280 // ===== Negation =====
1281 Expression::Neg(mut n) => {
1282 n.this = transform_recursive(n.this, transform_fn)?;
1283 Expression::Neg(n)
1284 }
1285
1286 // ===== Between =====
1287 Expression::Between(mut b) => {
1288 b.this = transform_recursive(b.this, transform_fn)?;
1289 b.low = transform_recursive(b.low, transform_fn)?;
1290 b.high = transform_recursive(b.high, transform_fn)?;
1291 Expression::Between(b)
1292 }
1293 Expression::IsNull(mut i) => {
1294 i.this = transform_recursive(i.this, transform_fn)?;
1295 Expression::IsNull(i)
1296 }
1297 Expression::IsTrue(mut i) => {
1298 i.this = transform_recursive(i.this, transform_fn)?;
1299 Expression::IsTrue(i)
1300 }
1301 Expression::IsFalse(mut i) => {
1302 i.this = transform_recursive(i.this, transform_fn)?;
1303 Expression::IsFalse(i)
1304 }
1305
1306 // ===== Like expressions =====
1307 Expression::Like(mut l) => {
1308 l.left = transform_recursive(l.left, transform_fn)?;
1309 l.right = transform_recursive(l.right, transform_fn)?;
1310 Expression::Like(l)
1311 }
1312 Expression::ILike(mut l) => {
1313 l.left = transform_recursive(l.left, transform_fn)?;
1314 l.right = transform_recursive(l.right, transform_fn)?;
1315 Expression::ILike(l)
1316 }
1317
1318 // ===== Additional binary ops not covered by macro =====
1319 Expression::Neq(op) => transform_binary!(Neq, *op),
1320 Expression::Lte(op) => transform_binary!(Lte, *op),
1321 Expression::Gte(op) => transform_binary!(Gte, *op),
1322 Expression::Mod(op) => transform_binary!(Mod, *op),
1323 Expression::Concat(op) => transform_binary!(Concat, *op),
1324 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1325 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1326 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1327 Expression::Is(op) => transform_binary!(Is, *op),
1328
1329 // ===== TryCast / SafeCast =====
1330 Expression::TryCast(mut c) => {
1331 c.this = transform_recursive(c.this, transform_fn)?;
1332 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1333 Expression::TryCast(c)
1334 }
1335 Expression::SafeCast(mut c) => {
1336 c.this = transform_recursive(c.this, transform_fn)?;
1337 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1338 Expression::SafeCast(c)
1339 }
1340
1341 // ===== Misc =====
1342 Expression::Unnest(mut f) => {
1343 f.this = transform_recursive(f.this, transform_fn)?;
1344 f.expressions = f
1345 .expressions
1346 .into_iter()
1347 .map(|e| transform_recursive(e, transform_fn))
1348 .collect::<Result<Vec<_>>>()?;
1349 Expression::Unnest(f)
1350 }
1351 Expression::Explode(mut f) => {
1352 f.this = transform_recursive(f.this, transform_fn)?;
1353 Expression::Explode(f)
1354 }
1355 Expression::GroupConcat(mut f) => {
1356 f.this = transform_recursive(f.this, transform_fn)?;
1357 Expression::GroupConcat(f)
1358 }
1359 Expression::StringAgg(mut f) => {
1360 f.this = transform_recursive(f.this, transform_fn)?;
1361 Expression::StringAgg(f)
1362 }
1363 Expression::ListAgg(mut f) => {
1364 f.this = transform_recursive(f.this, transform_fn)?;
1365 Expression::ListAgg(f)
1366 }
1367 Expression::ArrayAgg(mut f) => {
1368 f.this = transform_recursive(f.this, transform_fn)?;
1369 Expression::ArrayAgg(f)
1370 }
1371 Expression::ParseJson(mut f) => {
1372 f.this = transform_recursive(f.this, transform_fn)?;
1373 Expression::ParseJson(f)
1374 }
1375 Expression::ToJson(mut f) => {
1376 f.this = transform_recursive(f.this, transform_fn)?;
1377 Expression::ToJson(f)
1378 }
1379 Expression::JSONExtract(mut e) => {
1380 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1381 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1382 Expression::JSONExtract(e)
1383 }
1384 Expression::JSONExtractScalar(mut e) => {
1385 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1386 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1387 Expression::JSONExtractScalar(e)
1388 }
1389
1390 // StrToTime: recurse into this
1391 Expression::StrToTime(mut e) => {
1392 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1393 Expression::StrToTime(e)
1394 }
1395
1396 // UnixToTime: recurse into this
1397 Expression::UnixToTime(mut e) => {
1398 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1399 Expression::UnixToTime(e)
1400 }
1401
1402 // CreateTable: recurse into column defaults, on_update expressions, and data types
1403 Expression::CreateTable(mut ct) => {
1404 for col in &mut ct.columns {
1405 if let Some(default_expr) = col.default.take() {
1406 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1407 }
1408 if let Some(on_update_expr) = col.on_update.take() {
1409 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1410 }
1411 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1412 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1413 // contexts and may not produce correct results for DDL column definitions.
1414 // The DDL type mappings would need dedicated handling per source/target pair.
1415 }
1416 if let Some(as_select) = ct.as_select.take() {
1417 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1418 }
1419 Expression::CreateTable(ct)
1420 }
1421
1422 // CreateView: recurse into the view body query
1423 Expression::CreateView(mut cv) => {
1424 cv.query = transform_recursive(cv.query, transform_fn)?;
1425 Expression::CreateView(cv)
1426 }
1427
1428 // CreateTask: recurse into the task body
1429 Expression::CreateTask(mut ct) => {
1430 ct.body = transform_recursive(ct.body, transform_fn)?;
1431 Expression::CreateTask(ct)
1432 }
1433
1434 // CreateProcedure: recurse into body expressions
1435 Expression::CreateProcedure(mut cp) => {
1436 if let Some(body) = cp.body.take() {
1437 cp.body = Some(match body {
1438 FunctionBody::Expression(expr) => {
1439 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1440 }
1441 FunctionBody::Return(expr) => {
1442 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1443 }
1444 FunctionBody::Statements(stmts) => {
1445 let transformed_stmts = stmts
1446 .into_iter()
1447 .map(|s| transform_recursive(s, transform_fn))
1448 .collect::<Result<Vec<_>>>()?;
1449 FunctionBody::Statements(transformed_stmts)
1450 }
1451 other => other,
1452 });
1453 }
1454 Expression::CreateProcedure(cp)
1455 }
1456
1457 // CreateFunction: recurse into body expressions
1458 Expression::CreateFunction(mut cf) => {
1459 if let Some(body) = cf.body.take() {
1460 cf.body = Some(match body {
1461 FunctionBody::Expression(expr) => {
1462 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1463 }
1464 FunctionBody::Return(expr) => {
1465 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1466 }
1467 FunctionBody::Statements(stmts) => {
1468 let transformed_stmts = stmts
1469 .into_iter()
1470 .map(|s| transform_recursive(s, transform_fn))
1471 .collect::<Result<Vec<_>>>()?;
1472 FunctionBody::Statements(transformed_stmts)
1473 }
1474 other => other,
1475 });
1476 }
1477 Expression::CreateFunction(cf)
1478 }
1479
1480 // MemberOf: recurse into left and right operands
1481 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1482 // ArrayContainsAll (@>): recurse into left and right operands
1483 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1484 // ArrayContainedBy (<@): recurse into left and right operands
1485 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1486 // ArrayOverlaps (&&): recurse into left and right operands
1487 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1488 // TsMatch (@@): recurse into left and right operands
1489 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1490 // Adjacent (-|-): recurse into left and right operands
1491 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1492
1493 // Table: recurse into when (HistoricalData) and changes fields
1494 Expression::Table(mut t) => {
1495 if let Some(when) = t.when.take() {
1496 let transformed =
1497 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1498 if let Expression::HistoricalData(hd) = transformed {
1499 t.when = Some(hd);
1500 }
1501 }
1502 if let Some(changes) = t.changes.take() {
1503 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1504 if let Expression::Changes(c) = transformed {
1505 t.changes = Some(c);
1506 }
1507 }
1508 Expression::Table(t)
1509 }
1510
1511 // HistoricalData (Snowflake time travel): recurse into expression
1512 Expression::HistoricalData(mut hd) => {
1513 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1514 Expression::HistoricalData(hd)
1515 }
1516
1517 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1518 Expression::Changes(mut c) => {
1519 if let Some(at_before) = c.at_before.take() {
1520 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1521 }
1522 if let Some(end) = c.end.take() {
1523 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1524 }
1525 Expression::Changes(c)
1526 }
1527
1528 // TableArgument: TABLE(expr) or MODEL(expr)
1529 Expression::TableArgument(mut ta) => {
1530 ta.this = transform_recursive(ta.this, transform_fn)?;
1531 Expression::TableArgument(ta)
1532 }
1533
1534 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1535 Expression::JoinedTable(mut jt) => {
1536 jt.left = transform_recursive(jt.left, transform_fn)?;
1537 for join in &mut jt.joins {
1538 join.this = transform_recursive(
1539 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
1540 transform_fn,
1541 )?;
1542 if let Some(on) = join.on.take() {
1543 join.on = Some(transform_recursive(on, transform_fn)?);
1544 }
1545 }
1546 jt.lateral_views = jt
1547 .lateral_views
1548 .into_iter()
1549 .map(|mut lv| {
1550 lv.this = transform_recursive(lv.this, transform_fn)?;
1551 Ok(lv)
1552 })
1553 .collect::<Result<Vec<_>>>()?;
1554 Expression::JoinedTable(jt)
1555 }
1556
1557 // Lateral: LATERAL func() - recurse into the function expression
1558 Expression::Lateral(mut lat) => {
1559 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1560 Expression::Lateral(lat)
1561 }
1562
1563 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1564 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1565 // as a unit together with the WithinGroup wrapper
1566 Expression::WithinGroup(mut wg) => {
1567 wg.order_by = wg
1568 .order_by
1569 .into_iter()
1570 .map(|mut o| {
1571 let original = o.this.clone();
1572 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1573 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1574 Ok(Expression::Ordered(transformed)) => *transformed,
1575 Ok(_) | Err(_) => o,
1576 }
1577 })
1578 .collect();
1579 Expression::WithinGroup(wg)
1580 }
1581
1582 // Filter: recurse into both the aggregate and the filter condition
1583 Expression::Filter(mut f) => {
1584 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1585 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1586 Expression::Filter(f)
1587 }
1588
1589 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1590 Expression::BitwiseOrAgg(mut f) => {
1591 f.this = transform_recursive(f.this, transform_fn)?;
1592 Expression::BitwiseOrAgg(f)
1593 }
1594 Expression::BitwiseAndAgg(mut f) => {
1595 f.this = transform_recursive(f.this, transform_fn)?;
1596 Expression::BitwiseAndAgg(f)
1597 }
1598 Expression::BitwiseXorAgg(mut f) => {
1599 f.this = transform_recursive(f.this, transform_fn)?;
1600 Expression::BitwiseXorAgg(f)
1601 }
1602 Expression::PipeOperator(mut pipe) => {
1603 pipe.this = transform_recursive(pipe.this, transform_fn)?;
1604 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
1605 Expression::PipeOperator(pipe)
1606 }
1607
1608 // ArrayExcept/ArrayContains/ArrayDistinct: recurse into children
1609 Expression::ArrayExcept(mut f) => {
1610 f.this = transform_recursive(f.this, transform_fn)?;
1611 f.expression = transform_recursive(f.expression, transform_fn)?;
1612 Expression::ArrayExcept(f)
1613 }
1614 Expression::ArrayContains(mut f) => {
1615 f.this = transform_recursive(f.this, transform_fn)?;
1616 f.expression = transform_recursive(f.expression, transform_fn)?;
1617 Expression::ArrayContains(f)
1618 }
1619 Expression::ArrayDistinct(mut f) => {
1620 f.this = transform_recursive(f.this, transform_fn)?;
1621 Expression::ArrayDistinct(f)
1622 }
1623 Expression::ArrayPosition(mut f) => {
1624 f.this = transform_recursive(f.this, transform_fn)?;
1625 f.expression = transform_recursive(f.expression, transform_fn)?;
1626 Expression::ArrayPosition(f)
1627 }
1628
1629 // Pass through leaf nodes unchanged
1630 other => other,
1631 };
1632
1633 // Then apply the transform function
1634 transform_fn(expr)
1635}
1636
1637/// Returns the tokenizer config, generator config, and expression transform closure
1638/// for a built-in dialect type. This is the shared implementation used by both
1639/// `Dialect::get()` and custom dialect construction.
1640// ---------------------------------------------------------------------------
1641// Cached dialect configurations
1642// ---------------------------------------------------------------------------
1643
1644/// Pre-computed tokenizer + generator configs for a dialect, cached via `LazyLock`.
1645/// Transform closures are cheap (unit-struct method calls) and created fresh each time.
1646struct CachedDialectConfig {
1647 tokenizer_config: TokenizerConfig,
1648 generator_config: Arc<GeneratorConfig>,
1649}
1650
1651/// Declare a per-dialect `LazyLock<CachedDialectConfig>` static.
1652macro_rules! cached_dialect {
1653 ($static_name:ident, $dialect_struct:expr, $feature:literal) => {
1654 #[cfg(feature = $feature)]
1655 static $static_name: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
1656 let d = $dialect_struct;
1657 CachedDialectConfig {
1658 tokenizer_config: d.tokenizer_config(),
1659 generator_config: Arc::new(d.generator_config()),
1660 }
1661 });
1662 };
1663}
1664
1665static CACHED_GENERIC: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
1666 let d = GenericDialect;
1667 CachedDialectConfig {
1668 tokenizer_config: d.tokenizer_config(),
1669 generator_config: Arc::new(d.generator_config()),
1670 }
1671});
1672
1673cached_dialect!(CACHED_POSTGRESQL, PostgresDialect, "dialect-postgresql");
1674cached_dialect!(CACHED_MYSQL, MySQLDialect, "dialect-mysql");
1675cached_dialect!(CACHED_BIGQUERY, BigQueryDialect, "dialect-bigquery");
1676cached_dialect!(CACHED_SNOWFLAKE, SnowflakeDialect, "dialect-snowflake");
1677cached_dialect!(CACHED_DUCKDB, DuckDBDialect, "dialect-duckdb");
1678cached_dialect!(CACHED_TSQL, TSQLDialect, "dialect-tsql");
1679cached_dialect!(CACHED_ORACLE, OracleDialect, "dialect-oracle");
1680cached_dialect!(CACHED_HIVE, HiveDialect, "dialect-hive");
1681cached_dialect!(CACHED_SPARK, SparkDialect, "dialect-spark");
1682cached_dialect!(CACHED_SQLITE, SQLiteDialect, "dialect-sqlite");
1683cached_dialect!(CACHED_PRESTO, PrestoDialect, "dialect-presto");
1684cached_dialect!(CACHED_TRINO, TrinoDialect, "dialect-trino");
1685cached_dialect!(CACHED_REDSHIFT, RedshiftDialect, "dialect-redshift");
1686cached_dialect!(CACHED_CLICKHOUSE, ClickHouseDialect, "dialect-clickhouse");
1687cached_dialect!(CACHED_DATABRICKS, DatabricksDialect, "dialect-databricks");
1688cached_dialect!(CACHED_ATHENA, AthenaDialect, "dialect-athena");
1689cached_dialect!(CACHED_TERADATA, TeradataDialect, "dialect-teradata");
1690cached_dialect!(CACHED_DORIS, DorisDialect, "dialect-doris");
1691cached_dialect!(CACHED_STARROCKS, StarRocksDialect, "dialect-starrocks");
1692cached_dialect!(
1693 CACHED_MATERIALIZE,
1694 MaterializeDialect,
1695 "dialect-materialize"
1696);
1697cached_dialect!(CACHED_RISINGWAVE, RisingWaveDialect, "dialect-risingwave");
1698cached_dialect!(
1699 CACHED_SINGLESTORE,
1700 SingleStoreDialect,
1701 "dialect-singlestore"
1702);
1703cached_dialect!(
1704 CACHED_COCKROACHDB,
1705 CockroachDBDialect,
1706 "dialect-cockroachdb"
1707);
1708cached_dialect!(CACHED_TIDB, TiDBDialect, "dialect-tidb");
1709cached_dialect!(CACHED_DRUID, DruidDialect, "dialect-druid");
1710cached_dialect!(CACHED_SOLR, SolrDialect, "dialect-solr");
1711cached_dialect!(CACHED_TABLEAU, TableauDialect, "dialect-tableau");
1712cached_dialect!(CACHED_DUNE, DuneDialect, "dialect-dune");
1713cached_dialect!(CACHED_FABRIC, FabricDialect, "dialect-fabric");
1714cached_dialect!(CACHED_DRILL, DrillDialect, "dialect-drill");
1715cached_dialect!(CACHED_DREMIO, DremioDialect, "dialect-dremio");
1716cached_dialect!(CACHED_EXASOL, ExasolDialect, "dialect-exasol");
1717cached_dialect!(CACHED_DATAFUSION, DataFusionDialect, "dialect-datafusion");
1718
1719fn configs_for_dialect_type(
1720 dt: DialectType,
1721) -> (
1722 TokenizerConfig,
1723 Arc<GeneratorConfig>,
1724 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1725) {
1726 /// Clone configs from a cached static and pair with a fresh transform closure.
1727 macro_rules! from_cache {
1728 ($cache:expr, $dialect_struct:expr) => {{
1729 let c = &*$cache;
1730 (
1731 c.tokenizer_config.clone(),
1732 c.generator_config.clone(),
1733 Box::new(move |e| $dialect_struct.transform_expr(e)),
1734 )
1735 }};
1736 }
1737 match dt {
1738 #[cfg(feature = "dialect-postgresql")]
1739 DialectType::PostgreSQL => from_cache!(CACHED_POSTGRESQL, PostgresDialect),
1740 #[cfg(feature = "dialect-mysql")]
1741 DialectType::MySQL => from_cache!(CACHED_MYSQL, MySQLDialect),
1742 #[cfg(feature = "dialect-bigquery")]
1743 DialectType::BigQuery => from_cache!(CACHED_BIGQUERY, BigQueryDialect),
1744 #[cfg(feature = "dialect-snowflake")]
1745 DialectType::Snowflake => from_cache!(CACHED_SNOWFLAKE, SnowflakeDialect),
1746 #[cfg(feature = "dialect-duckdb")]
1747 DialectType::DuckDB => from_cache!(CACHED_DUCKDB, DuckDBDialect),
1748 #[cfg(feature = "dialect-tsql")]
1749 DialectType::TSQL => from_cache!(CACHED_TSQL, TSQLDialect),
1750 #[cfg(feature = "dialect-oracle")]
1751 DialectType::Oracle => from_cache!(CACHED_ORACLE, OracleDialect),
1752 #[cfg(feature = "dialect-hive")]
1753 DialectType::Hive => from_cache!(CACHED_HIVE, HiveDialect),
1754 #[cfg(feature = "dialect-spark")]
1755 DialectType::Spark => from_cache!(CACHED_SPARK, SparkDialect),
1756 #[cfg(feature = "dialect-sqlite")]
1757 DialectType::SQLite => from_cache!(CACHED_SQLITE, SQLiteDialect),
1758 #[cfg(feature = "dialect-presto")]
1759 DialectType::Presto => from_cache!(CACHED_PRESTO, PrestoDialect),
1760 #[cfg(feature = "dialect-trino")]
1761 DialectType::Trino => from_cache!(CACHED_TRINO, TrinoDialect),
1762 #[cfg(feature = "dialect-redshift")]
1763 DialectType::Redshift => from_cache!(CACHED_REDSHIFT, RedshiftDialect),
1764 #[cfg(feature = "dialect-clickhouse")]
1765 DialectType::ClickHouse => from_cache!(CACHED_CLICKHOUSE, ClickHouseDialect),
1766 #[cfg(feature = "dialect-databricks")]
1767 DialectType::Databricks => from_cache!(CACHED_DATABRICKS, DatabricksDialect),
1768 #[cfg(feature = "dialect-athena")]
1769 DialectType::Athena => from_cache!(CACHED_ATHENA, AthenaDialect),
1770 #[cfg(feature = "dialect-teradata")]
1771 DialectType::Teradata => from_cache!(CACHED_TERADATA, TeradataDialect),
1772 #[cfg(feature = "dialect-doris")]
1773 DialectType::Doris => from_cache!(CACHED_DORIS, DorisDialect),
1774 #[cfg(feature = "dialect-starrocks")]
1775 DialectType::StarRocks => from_cache!(CACHED_STARROCKS, StarRocksDialect),
1776 #[cfg(feature = "dialect-materialize")]
1777 DialectType::Materialize => from_cache!(CACHED_MATERIALIZE, MaterializeDialect),
1778 #[cfg(feature = "dialect-risingwave")]
1779 DialectType::RisingWave => from_cache!(CACHED_RISINGWAVE, RisingWaveDialect),
1780 #[cfg(feature = "dialect-singlestore")]
1781 DialectType::SingleStore => from_cache!(CACHED_SINGLESTORE, SingleStoreDialect),
1782 #[cfg(feature = "dialect-cockroachdb")]
1783 DialectType::CockroachDB => from_cache!(CACHED_COCKROACHDB, CockroachDBDialect),
1784 #[cfg(feature = "dialect-tidb")]
1785 DialectType::TiDB => from_cache!(CACHED_TIDB, TiDBDialect),
1786 #[cfg(feature = "dialect-druid")]
1787 DialectType::Druid => from_cache!(CACHED_DRUID, DruidDialect),
1788 #[cfg(feature = "dialect-solr")]
1789 DialectType::Solr => from_cache!(CACHED_SOLR, SolrDialect),
1790 #[cfg(feature = "dialect-tableau")]
1791 DialectType::Tableau => from_cache!(CACHED_TABLEAU, TableauDialect),
1792 #[cfg(feature = "dialect-dune")]
1793 DialectType::Dune => from_cache!(CACHED_DUNE, DuneDialect),
1794 #[cfg(feature = "dialect-fabric")]
1795 DialectType::Fabric => from_cache!(CACHED_FABRIC, FabricDialect),
1796 #[cfg(feature = "dialect-drill")]
1797 DialectType::Drill => from_cache!(CACHED_DRILL, DrillDialect),
1798 #[cfg(feature = "dialect-dremio")]
1799 DialectType::Dremio => from_cache!(CACHED_DREMIO, DremioDialect),
1800 #[cfg(feature = "dialect-exasol")]
1801 DialectType::Exasol => from_cache!(CACHED_EXASOL, ExasolDialect),
1802 #[cfg(feature = "dialect-datafusion")]
1803 DialectType::DataFusion => from_cache!(CACHED_DATAFUSION, DataFusionDialect),
1804 _ => from_cache!(CACHED_GENERIC, GenericDialect),
1805 }
1806}
1807
1808// ---------------------------------------------------------------------------
1809// Custom dialect registry
1810// ---------------------------------------------------------------------------
1811
1812static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1813 LazyLock::new(|| RwLock::new(HashMap::new()));
1814
1815struct CustomDialectConfig {
1816 name: String,
1817 base_dialect: DialectType,
1818 tokenizer_config: TokenizerConfig,
1819 generator_config: GeneratorConfig,
1820 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1821 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1822}
1823
1824/// Fluent builder for creating and registering custom SQL dialects.
1825///
1826/// A custom dialect is based on an existing built-in dialect and allows selective
1827/// overrides of tokenizer configuration, generator configuration, and expression
1828/// transforms.
1829///
1830/// # Example
1831///
1832/// ```rust,ignore
1833/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1834/// use polyglot_sql::generator::NormalizeFunctions;
1835///
1836/// CustomDialectBuilder::new("my_postgres")
1837/// .based_on(DialectType::PostgreSQL)
1838/// .generator_config_modifier(|gc| {
1839/// gc.normalize_functions = NormalizeFunctions::Lower;
1840/// })
1841/// .register()
1842/// .unwrap();
1843///
1844/// let d = Dialect::get_by_name("my_postgres").unwrap();
1845/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1846/// let sql = d.generate(&exprs[0]).unwrap();
1847/// assert_eq!(sql, "select count(*)");
1848///
1849/// polyglot_sql::unregister_custom_dialect("my_postgres");
1850/// ```
1851pub struct CustomDialectBuilder {
1852 name: String,
1853 base_dialect: DialectType,
1854 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1855 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1856 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1857 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1858}
1859
1860impl CustomDialectBuilder {
1861 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1862 pub fn new(name: impl Into<String>) -> Self {
1863 Self {
1864 name: name.into(),
1865 base_dialect: DialectType::Generic,
1866 tokenizer_modifier: None,
1867 generator_modifier: None,
1868 transform: None,
1869 preprocess: None,
1870 }
1871 }
1872
1873 /// Set the base built-in dialect to inherit configuration from.
1874 pub fn based_on(mut self, dialect: DialectType) -> Self {
1875 self.base_dialect = dialect;
1876 self
1877 }
1878
1879 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1880 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1881 where
1882 F: FnOnce(&mut TokenizerConfig) + 'static,
1883 {
1884 self.tokenizer_modifier = Some(Box::new(f));
1885 self
1886 }
1887
1888 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1889 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1890 where
1891 F: FnOnce(&mut GeneratorConfig) + 'static,
1892 {
1893 self.generator_modifier = Some(Box::new(f));
1894 self
1895 }
1896
1897 /// Set a custom per-node expression transform function.
1898 ///
1899 /// This replaces the base dialect's transform. It is called on every expression
1900 /// node during the recursive transform pass.
1901 pub fn transform_fn<F>(mut self, f: F) -> Self
1902 where
1903 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1904 {
1905 self.transform = Some(Arc::new(f));
1906 self
1907 }
1908
1909 /// Set a custom whole-tree preprocessing function.
1910 ///
1911 /// This replaces the base dialect's built-in preprocessing. It is called once
1912 /// on the entire expression tree before the recursive per-node transform.
1913 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1914 where
1915 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1916 {
1917 self.preprocess = Some(Arc::new(f));
1918 self
1919 }
1920
1921 /// Build the custom dialect configuration and register it in the global registry.
1922 ///
1923 /// Returns an error if:
1924 /// - The name collides with a built-in dialect name
1925 /// - A custom dialect with the same name is already registered
1926 pub fn register(self) -> Result<()> {
1927 // Reject names that collide with built-in dialects
1928 if DialectType::from_str(&self.name).is_ok() {
1929 return Err(crate::error::Error::parse(
1930 format!(
1931 "Cannot register custom dialect '{}': name collides with built-in dialect",
1932 self.name
1933 ),
1934 0,
1935 0,
1936 0,
1937 0,
1938 ));
1939 }
1940
1941 // Get base configs
1942 let (mut tok_config, arc_gen_config, _base_transform) =
1943 configs_for_dialect_type(self.base_dialect);
1944 let mut gen_config = (*arc_gen_config).clone();
1945
1946 // Apply modifiers
1947 if let Some(tok_mod) = self.tokenizer_modifier {
1948 tok_mod(&mut tok_config);
1949 }
1950 if let Some(gen_mod) = self.generator_modifier {
1951 gen_mod(&mut gen_config);
1952 }
1953
1954 let config = CustomDialectConfig {
1955 name: self.name.clone(),
1956 base_dialect: self.base_dialect,
1957 tokenizer_config: tok_config,
1958 generator_config: gen_config,
1959 transform: self.transform,
1960 preprocess: self.preprocess,
1961 };
1962
1963 register_custom_dialect(config)
1964 }
1965}
1966
1967use std::str::FromStr;
1968
1969fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
1970 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
1971 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
1972 })?;
1973
1974 if registry.contains_key(&config.name) {
1975 return Err(crate::error::Error::parse(
1976 format!("Custom dialect '{}' is already registered", config.name),
1977 0,
1978 0,
1979 0,
1980 0,
1981 ));
1982 }
1983
1984 registry.insert(config.name.clone(), Arc::new(config));
1985 Ok(())
1986}
1987
1988/// Remove a custom dialect from the global registry.
1989///
1990/// Returns `true` if a dialect with that name was found and removed,
1991/// `false` if no such custom dialect existed.
1992pub fn unregister_custom_dialect(name: &str) -> bool {
1993 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
1994 registry.remove(name).is_some()
1995 } else {
1996 false
1997 }
1998}
1999
2000fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
2001 CUSTOM_DIALECT_REGISTRY
2002 .read()
2003 .ok()
2004 .and_then(|registry| registry.get(name).cloned())
2005}
2006
2007/// Main entry point for dialect-specific SQL operations.
2008///
2009/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
2010/// transformer for a specific SQL database engine. It is the high-level API through
2011/// which callers parse, generate, transform, and transpile SQL.
2012///
2013/// # Usage
2014///
2015/// ```rust,ignore
2016/// use polyglot_sql::dialects::{Dialect, DialectType};
2017///
2018/// // Parse PostgreSQL SQL into an AST
2019/// let pg = Dialect::get(DialectType::PostgreSQL);
2020/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
2021///
2022/// // Transpile from PostgreSQL to BigQuery
2023/// let results = pg.transpile("SELECT NOW()", DialectType::BigQuery)?;
2024/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
2025/// ```
2026///
2027/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
2028/// The struct is `Send + Sync` safe so it can be shared across threads.
2029pub struct Dialect {
2030 dialect_type: DialectType,
2031 tokenizer: Tokenizer,
2032 generator_config: Arc<GeneratorConfig>,
2033 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
2034 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
2035 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
2036 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
2037 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
2038}
2039
2040/// Options for [`Dialect::transpile_with`].
2041///
2042/// Use [`TranspileOptions::default`] for defaults, then tweak the fields you need.
2043/// The struct is marked `#[non_exhaustive]` so new fields can be added without
2044/// breaking the API.
2045///
2046/// The struct derives `Serialize`/`Deserialize` using camelCase field names so
2047/// it can be round-tripped over JSON bridges (C FFI, WASM) without mapping.
2048#[derive(Debug, Clone, Default, Serialize, Deserialize)]
2049#[serde(rename_all = "camelCase", default)]
2050#[non_exhaustive]
2051pub struct TranspileOptions {
2052 /// Whether to pretty-print the output SQL.
2053 pub pretty: bool,
2054}
2055
2056impl TranspileOptions {
2057 /// Construct options with pretty-printing enabled.
2058 pub fn pretty() -> Self {
2059 Self { pretty: true }
2060 }
2061}
2062
2063/// A value that can be used as the target dialect in [`Dialect::transpile`] /
2064/// [`Dialect::transpile_with`].
2065///
2066/// Implemented for [`DialectType`] (built-in dialect enum) and `&Dialect` (any
2067/// dialect handle, including custom ones). End users do not normally need to
2068/// implement this trait themselves.
2069pub trait TranspileTarget {
2070 /// Invoke `f` with a reference to the resolved target dialect.
2071 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R;
2072}
2073
2074impl TranspileTarget for DialectType {
2075 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
2076 f(&Dialect::get(self))
2077 }
2078}
2079
2080impl TranspileTarget for &Dialect {
2081 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
2082 f(self)
2083 }
2084}
2085
2086impl Dialect {
2087 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
2088 ///
2089 /// This is the primary constructor. It initializes the tokenizer, generator config,
2090 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
2091 /// For hybrid dialects like Athena, it also sets up expression-specific generator
2092 /// config routing.
2093 pub fn get(dialect_type: DialectType) -> Self {
2094 let (tokenizer_config, generator_config, transformer) =
2095 configs_for_dialect_type(dialect_type);
2096
2097 // Set up expression-specific generator config for hybrid dialects
2098 let generator_config_for_expr: Option<
2099 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
2100 > = match dialect_type {
2101 #[cfg(feature = "dialect-athena")]
2102 DialectType::Athena => Some(Box::new(|expr| {
2103 AthenaDialect.generator_config_for_expr(expr)
2104 })),
2105 _ => None,
2106 };
2107
2108 Self {
2109 dialect_type,
2110 tokenizer: Tokenizer::new(tokenizer_config),
2111 generator_config,
2112 transformer,
2113 generator_config_for_expr,
2114 custom_preprocess: None,
2115 }
2116 }
2117
2118 /// Look up a dialect by string name.
2119 ///
2120 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
2121 /// falls back to the custom dialect registry. Returns `None` if no dialect
2122 /// with the given name exists.
2123 pub fn get_by_name(name: &str) -> Option<Self> {
2124 // Try built-in first
2125 if let Ok(dt) = DialectType::from_str(name) {
2126 return Some(Self::get(dt));
2127 }
2128
2129 // Try custom registry
2130 let config = get_custom_dialect_config(name)?;
2131 Some(Self::from_custom_config(&config))
2132 }
2133
2134 /// Construct a `Dialect` from a custom dialect configuration.
2135 fn from_custom_config(config: &CustomDialectConfig) -> Self {
2136 // Build the transformer: use custom if provided, else use base dialect's
2137 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
2138 if let Some(ref custom_transform) = config.transform {
2139 let t = Arc::clone(custom_transform);
2140 Box::new(move |e| t(e))
2141 } else {
2142 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
2143 base_transform
2144 };
2145
2146 // Build the custom preprocess: use custom if provided
2147 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
2148 config.preprocess.as_ref().map(|p| {
2149 let p = Arc::clone(p);
2150 Box::new(move |e: Expression| p(e))
2151 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
2152 });
2153
2154 Self {
2155 dialect_type: config.base_dialect,
2156 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
2157 generator_config: Arc::new(config.generator_config.clone()),
2158 transformer,
2159 generator_config_for_expr: None,
2160 custom_preprocess,
2161 }
2162 }
2163
2164 /// Get the dialect type
2165 pub fn dialect_type(&self) -> DialectType {
2166 self.dialect_type
2167 }
2168
2169 /// Get the generator configuration
2170 pub fn generator_config(&self) -> &GeneratorConfig {
2171 &self.generator_config
2172 }
2173
2174 /// Parses a SQL string into a list of [`Expression`] AST nodes.
2175 ///
2176 /// The input may contain multiple semicolon-separated statements; each one
2177 /// produces a separate element in the returned vector. Tokenization uses
2178 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
2179 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
2180 let tokens = self.tokenizer.tokenize(sql)?;
2181 let config = crate::parser::ParserConfig {
2182 dialect: Some(self.dialect_type),
2183 ..Default::default()
2184 };
2185 let mut parser = Parser::with_source(tokens, config, sql.to_string());
2186 parser.parse()
2187 }
2188
2189 /// Tokenize SQL using this dialect's tokenizer configuration.
2190 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
2191 self.tokenizer.tokenize(sql)
2192 }
2193
2194 /// Get the generator config for a specific expression (supports hybrid dialects).
2195 /// Returns an owned `GeneratorConfig` suitable for mutation before generation.
2196 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
2197 if let Some(ref config_fn) = self.generator_config_for_expr {
2198 config_fn(expr)
2199 } else {
2200 (*self.generator_config).clone()
2201 }
2202 }
2203
2204 /// Generates a SQL string from an [`Expression`] AST node.
2205 ///
2206 /// The output uses this dialect's generator configuration for identifier quoting,
2207 /// keyword casing, function name normalization, and syntax style. The result is
2208 /// a single-line (non-pretty) SQL string.
2209 pub fn generate(&self, expr: &Expression) -> Result<String> {
2210 // Fast path: when no per-expression config override, share the Arc cheaply.
2211 if self.generator_config_for_expr.is_none() {
2212 let mut generator = Generator::with_arc_config(self.generator_config.clone());
2213 return generator.generate(expr);
2214 }
2215 let config = self.get_config_for_expr(expr);
2216 let mut generator = Generator::with_config(config);
2217 generator.generate(expr)
2218 }
2219
2220 /// Generate SQL from an expression with pretty printing enabled
2221 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
2222 let mut config = self.get_config_for_expr(expr);
2223 config.pretty = true;
2224 let mut generator = Generator::with_config(config);
2225 generator.generate(expr)
2226 }
2227
2228 /// Generate SQL from an expression with source dialect info (for transpilation)
2229 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
2230 let mut config = self.get_config_for_expr(expr);
2231 config.source_dialect = Some(source);
2232 let mut generator = Generator::with_config(config);
2233 generator.generate(expr)
2234 }
2235
2236 /// Generate SQL from an expression with pretty printing and source dialect info
2237 pub fn generate_pretty_with_source(
2238 &self,
2239 expr: &Expression,
2240 source: DialectType,
2241 ) -> Result<String> {
2242 let mut config = self.get_config_for_expr(expr);
2243 config.pretty = true;
2244 config.source_dialect = Some(source);
2245 let mut generator = Generator::with_config(config);
2246 generator.generate(expr)
2247 }
2248
2249 /// Generate SQL from an expression with forced identifier quoting (identify=True)
2250 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
2251 let mut config = self.get_config_for_expr(expr);
2252 config.always_quote_identifiers = true;
2253 let mut generator = Generator::with_config(config);
2254 generator.generate(expr)
2255 }
2256
2257 /// Generate SQL from an expression with pretty printing and forced identifier quoting
2258 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
2259 let mut config = (*self.generator_config).clone();
2260 config.pretty = true;
2261 config.always_quote_identifiers = true;
2262 let mut generator = Generator::with_config(config);
2263 generator.generate(expr)
2264 }
2265
2266 /// Generate SQL from an expression with caller-specified config overrides
2267 pub fn generate_with_overrides(
2268 &self,
2269 expr: &Expression,
2270 overrides: impl FnOnce(&mut GeneratorConfig),
2271 ) -> Result<String> {
2272 let mut config = self.get_config_for_expr(expr);
2273 overrides(&mut config);
2274 let mut generator = Generator::with_config(config);
2275 generator.generate(expr)
2276 }
2277
2278 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
2279 ///
2280 /// The transformation proceeds in two phases:
2281 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
2282 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
2283 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
2284 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
2285 ///
2286 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
2287 /// and for identity transforms (normalizing SQL within the same dialect).
2288 pub fn transform(&self, expr: Expression) -> Result<Expression> {
2289 // Apply preprocessing transforms based on dialect
2290 let preprocessed = self.preprocess(expr)?;
2291 // Then apply recursive transformation
2292 transform_recursive(preprocessed, &self.transformer)
2293 }
2294
2295 /// Apply dialect-specific preprocessing transforms
2296 fn preprocess(&self, expr: Expression) -> Result<Expression> {
2297 // If a custom preprocess function is set, use it instead of the built-in logic
2298 if let Some(ref custom_preprocess) = self.custom_preprocess {
2299 return custom_preprocess(expr);
2300 }
2301
2302 #[cfg(any(
2303 feature = "dialect-mysql",
2304 feature = "dialect-postgresql",
2305 feature = "dialect-bigquery",
2306 feature = "dialect-snowflake",
2307 feature = "dialect-tsql",
2308 feature = "dialect-spark",
2309 feature = "dialect-databricks",
2310 feature = "dialect-hive",
2311 feature = "dialect-sqlite",
2312 feature = "dialect-trino",
2313 feature = "dialect-presto",
2314 feature = "dialect-duckdb",
2315 feature = "dialect-redshift",
2316 feature = "dialect-starrocks",
2317 feature = "dialect-oracle",
2318 feature = "dialect-clickhouse",
2319 ))]
2320 use crate::transforms;
2321
2322 match self.dialect_type {
2323 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
2324 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
2325 #[cfg(feature = "dialect-mysql")]
2326 DialectType::MySQL => {
2327 let expr = transforms::eliminate_qualify(expr)?;
2328 let expr = transforms::eliminate_full_outer_join(expr)?;
2329 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2330 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2331 Ok(expr)
2332 }
2333 // PostgreSQL doesn't support QUALIFY
2334 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
2335 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
2336 #[cfg(feature = "dialect-postgresql")]
2337 DialectType::PostgreSQL => {
2338 let expr = transforms::eliminate_qualify(expr)?;
2339 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2340 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
2341 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
2342 // Only normalize when sqlglot would fully parse (no body) —
2343 // sqlglot falls back to Command for complex function bodies,
2344 // preserving the original text including TO.
2345 let expr = if let Expression::CreateFunction(mut cf) = expr {
2346 if cf.body.is_none() {
2347 for opt in &mut cf.set_options {
2348 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
2349 &mut opt.value
2350 {
2351 *use_to = false;
2352 }
2353 }
2354 }
2355 Expression::CreateFunction(cf)
2356 } else {
2357 expr
2358 };
2359 Ok(expr)
2360 }
2361 // BigQuery doesn't support DISTINCT ON or CTE column aliases
2362 #[cfg(feature = "dialect-bigquery")]
2363 DialectType::BigQuery => {
2364 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2365 let expr = transforms::pushdown_cte_column_names(expr)?;
2366 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
2367 Ok(expr)
2368 }
2369 // Snowflake
2370 #[cfg(feature = "dialect-snowflake")]
2371 DialectType::Snowflake => {
2372 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2373 let expr = transforms::eliminate_window_clause(expr)?;
2374 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
2375 Ok(expr)
2376 }
2377 // TSQL doesn't support QUALIFY
2378 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
2379 // TSQL doesn't support CTEs in subqueries (hoist to top level)
2380 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
2381 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
2382 #[cfg(feature = "dialect-tsql")]
2383 DialectType::TSQL => {
2384 let expr = transforms::eliminate_qualify(expr)?;
2385 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2386 let expr = transforms::ensure_bools(expr)?;
2387 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2388 let expr = transforms::move_ctes_to_top_level(expr)?;
2389 let expr = transforms::qualify_derived_table_outputs(expr)?;
2390 Ok(expr)
2391 }
2392 // Spark doesn't support QUALIFY (but Databricks does)
2393 // Spark doesn't support CTEs in subqueries (hoist to top level)
2394 #[cfg(feature = "dialect-spark")]
2395 DialectType::Spark => {
2396 let expr = transforms::eliminate_qualify(expr)?;
2397 let expr = transforms::add_auto_table_alias(expr)?;
2398 let expr = transforms::simplify_nested_paren_values(expr)?;
2399 let expr = transforms::move_ctes_to_top_level(expr)?;
2400 Ok(expr)
2401 }
2402 // Databricks supports QUALIFY natively
2403 // Databricks doesn't support CTEs in subqueries (hoist to top level)
2404 #[cfg(feature = "dialect-databricks")]
2405 DialectType::Databricks => {
2406 let expr = transforms::add_auto_table_alias(expr)?;
2407 let expr = transforms::simplify_nested_paren_values(expr)?;
2408 let expr = transforms::move_ctes_to_top_level(expr)?;
2409 Ok(expr)
2410 }
2411 // Hive doesn't support QUALIFY or CTEs in subqueries
2412 #[cfg(feature = "dialect-hive")]
2413 DialectType::Hive => {
2414 let expr = transforms::eliminate_qualify(expr)?;
2415 let expr = transforms::move_ctes_to_top_level(expr)?;
2416 Ok(expr)
2417 }
2418 // SQLite doesn't support QUALIFY
2419 #[cfg(feature = "dialect-sqlite")]
2420 DialectType::SQLite => {
2421 let expr = transforms::eliminate_qualify(expr)?;
2422 Ok(expr)
2423 }
2424 // Trino doesn't support QUALIFY
2425 #[cfg(feature = "dialect-trino")]
2426 DialectType::Trino => {
2427 let expr = transforms::eliminate_qualify(expr)?;
2428 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
2429 Ok(expr)
2430 }
2431 // Presto doesn't support QUALIFY or WINDOW clause
2432 #[cfg(feature = "dialect-presto")]
2433 DialectType::Presto => {
2434 let expr = transforms::eliminate_qualify(expr)?;
2435 let expr = transforms::eliminate_window_clause(expr)?;
2436 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
2437 Ok(expr)
2438 }
2439 // DuckDB supports QUALIFY - no elimination needed
2440 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
2441 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
2442 #[cfg(feature = "dialect-duckdb")]
2443 DialectType::DuckDB => {
2444 let expr = transforms::expand_posexplode_duckdb(expr)?;
2445 let expr = transforms::expand_like_any(expr)?;
2446 Ok(expr)
2447 }
2448 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
2449 #[cfg(feature = "dialect-redshift")]
2450 DialectType::Redshift => {
2451 let expr = transforms::eliminate_qualify(expr)?;
2452 let expr = transforms::eliminate_window_clause(expr)?;
2453 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2454 Ok(expr)
2455 }
2456 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
2457 #[cfg(feature = "dialect-starrocks")]
2458 DialectType::StarRocks => {
2459 let expr = transforms::eliminate_qualify(expr)?;
2460 let expr = transforms::expand_between_in_delete(expr)?;
2461 Ok(expr)
2462 }
2463 // DataFusion supports QUALIFY and semi/anti joins natively
2464 #[cfg(feature = "dialect-datafusion")]
2465 DialectType::DataFusion => Ok(expr),
2466 // Oracle doesn't support QUALIFY
2467 #[cfg(feature = "dialect-oracle")]
2468 DialectType::Oracle => {
2469 let expr = transforms::eliminate_qualify(expr)?;
2470 Ok(expr)
2471 }
2472 // Drill - no special preprocessing needed
2473 #[cfg(feature = "dialect-drill")]
2474 DialectType::Drill => Ok(expr),
2475 // Teradata - no special preprocessing needed
2476 #[cfg(feature = "dialect-teradata")]
2477 DialectType::Teradata => Ok(expr),
2478 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
2479 #[cfg(feature = "dialect-clickhouse")]
2480 DialectType::ClickHouse => {
2481 let expr = transforms::no_limit_order_by_union(expr)?;
2482 Ok(expr)
2483 }
2484 // Other dialects - no preprocessing
2485 _ => Ok(expr),
2486 }
2487 }
2488
2489 /// Transpile SQL from this dialect to the given target dialect.
2490 ///
2491 /// The target may be specified as either a built-in [`DialectType`] enum variant
2492 /// or as a reference to a [`Dialect`] handle (built-in or custom). Both work:
2493 ///
2494 /// ```rust,ignore
2495 /// let pg = Dialect::get(DialectType::PostgreSQL);
2496 /// pg.transpile("SELECT NOW()", DialectType::BigQuery)?; // enum
2497 /// pg.transpile("SELECT NOW()", &custom_dialect)?; // handle
2498 /// ```
2499 ///
2500 /// For pretty-printing or other options, use [`transpile_with`](Self::transpile_with).
2501 pub fn transpile<T: TranspileTarget>(&self, sql: &str, target: T) -> Result<Vec<String>> {
2502 self.transpile_with(sql, target, TranspileOptions::default())
2503 }
2504
2505 /// Transpile SQL with configurable [`TranspileOptions`] (e.g. pretty-printing).
2506 pub fn transpile_with<T: TranspileTarget>(
2507 &self,
2508 sql: &str,
2509 target: T,
2510 opts: TranspileOptions,
2511 ) -> Result<Vec<String>> {
2512 target.with_dialect(|td| self.transpile_inner(sql, td, opts.pretty))
2513 }
2514
2515 #[cfg(not(feature = "transpile"))]
2516 fn transpile_inner(
2517 &self,
2518 sql: &str,
2519 target_dialect: &Dialect,
2520 pretty: bool,
2521 ) -> Result<Vec<String>> {
2522 let target = target_dialect.dialect_type;
2523 // Without the transpile feature, only same-dialect or to/from generic is supported
2524 if self.dialect_type != target
2525 && self.dialect_type != DialectType::Generic
2526 && target != DialectType::Generic
2527 {
2528 return Err(crate::error::Error::parse(
2529 "Cross-dialect transpilation not available in this build",
2530 0,
2531 0,
2532 0,
2533 0,
2534 ));
2535 }
2536
2537 let expressions = self.parse(sql)?;
2538 let generic_identity =
2539 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2540
2541 if generic_identity {
2542 return expressions
2543 .into_iter()
2544 .map(|expr| {
2545 if pretty {
2546 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2547 } else {
2548 target_dialect.generate_with_source(&expr, self.dialect_type)
2549 }
2550 })
2551 .collect();
2552 }
2553
2554 expressions
2555 .into_iter()
2556 .map(|expr| {
2557 let transformed = target_dialect.transform(expr)?;
2558 if pretty {
2559 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
2560 } else {
2561 target_dialect.generate_with_source(&transformed, self.dialect_type)
2562 }
2563 })
2564 .collect()
2565 }
2566
2567 #[cfg(feature = "transpile")]
2568 fn transpile_inner(
2569 &self,
2570 sql: &str,
2571 target_dialect: &Dialect,
2572 pretty: bool,
2573 ) -> Result<Vec<String>> {
2574 let target = target_dialect.dialect_type;
2575 let expressions = self.parse(sql)?;
2576 let generic_identity =
2577 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2578
2579 if generic_identity {
2580 return expressions
2581 .into_iter()
2582 .map(|expr| {
2583 if pretty {
2584 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2585 } else {
2586 target_dialect.generate_with_source(&expr, self.dialect_type)
2587 }
2588 })
2589 .collect();
2590 }
2591
2592 expressions
2593 .into_iter()
2594 .map(|expr| {
2595 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
2596 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
2597 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
2598 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
2599 use crate::expressions::DataType as DT;
2600 transform_recursive(expr, &|e| match e {
2601 Expression::DataType(DT::VarChar { .. }) => {
2602 Ok(Expression::DataType(DT::Text))
2603 }
2604 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
2605 _ => Ok(e),
2606 })?
2607 } else {
2608 expr
2609 };
2610
2611 // When source and target differ, first normalize the source dialect's
2612 // AST constructs to standard SQL, so that the target dialect can handle them.
2613 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
2614 let normalized =
2615 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
2616 self.transform(expr)?
2617 } else {
2618 expr
2619 };
2620
2621 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
2622 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
2623 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
2624 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
2625 let normalized =
2626 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2627 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2628 {
2629 transform_recursive(normalized, &|e| {
2630 if let Expression::Function(ref f) = e {
2631 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
2632 // Check if first arg is JSON_QUERY and second is JSON_VALUE
2633 if let (
2634 Expression::Function(ref jq),
2635 Expression::Function(ref jv),
2636 ) = (&f.args[0], &f.args[1])
2637 {
2638 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
2639 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
2640 {
2641 // Unwrap: return just JSON_QUERY(...)
2642 return Ok(f.args[0].clone());
2643 }
2644 }
2645 }
2646 }
2647 Ok(e)
2648 })?
2649 } else {
2650 normalized
2651 };
2652
2653 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
2654 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
2655 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
2656 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2657 && !matches!(target, DialectType::Snowflake)
2658 {
2659 transform_recursive(normalized, &|e| {
2660 if let Expression::Function(ref f) = e {
2661 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
2662 return Ok(Expression::Localtime(Box::new(
2663 crate::expressions::Localtime { this: None },
2664 )));
2665 }
2666 }
2667 Ok(e)
2668 })?
2669 } else {
2670 normalized
2671 };
2672
2673 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
2674 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
2675 // transform. DuckDB requires the count argument to be BIGINT.
2676 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2677 && matches!(target, DialectType::DuckDB)
2678 {
2679 transform_recursive(normalized, &|e| {
2680 if let Expression::Function(ref f) = e {
2681 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
2682 // Check if first arg is space string literal
2683 if let Expression::Literal(ref lit) = f.args[0] {
2684 if let crate::expressions::Literal::String(ref s) = lit.as_ref()
2685 {
2686 if s == " " {
2687 // Wrap second arg in CAST(... AS BIGINT) if not already
2688 if !matches!(f.args[1], Expression::Cast(_)) {
2689 let mut new_args = f.args.clone();
2690 new_args[1] = Expression::Cast(Box::new(
2691 crate::expressions::Cast {
2692 this: new_args[1].clone(),
2693 to: crate::expressions::DataType::BigInt {
2694 length: None,
2695 },
2696 trailing_comments: Vec::new(),
2697 double_colon_syntax: false,
2698 format: None,
2699 default: None,
2700 inferred_type: None,
2701 },
2702 ));
2703 return Ok(Expression::Function(Box::new(
2704 crate::expressions::Function {
2705 name: f.name.clone(),
2706 args: new_args,
2707 distinct: f.distinct,
2708 trailing_comments: f
2709 .trailing_comments
2710 .clone(),
2711 use_bracket_syntax: f.use_bracket_syntax,
2712 no_parens: f.no_parens,
2713 quoted: f.quoted,
2714 span: None,
2715 inferred_type: None,
2716 },
2717 )));
2718 }
2719 }
2720 }
2721 }
2722 }
2723 }
2724 Ok(e)
2725 })?
2726 } else {
2727 normalized
2728 };
2729
2730 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
2731 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
2732 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2733 && !matches!(target, DialectType::BigQuery)
2734 {
2735 crate::transforms::propagate_struct_field_names(normalized)?
2736 } else {
2737 normalized
2738 };
2739
2740 // Snowflake source to DuckDB target: RANDOM()/RANDOM(seed) -> scaled RANDOM()
2741 // Snowflake RANDOM() returns integer in [-2^63, 2^63-1], DuckDB RANDOM() returns float [0, 1)
2742 // Skip RANDOM inside UNIFORM/NORMAL/ZIPF/RANDSTR generator args since those
2743 // functions handle their generator args differently (as float seeds).
2744 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2745 && matches!(target, DialectType::DuckDB)
2746 {
2747 fn make_scaled_random() -> Expression {
2748 let lower =
2749 Expression::Literal(Box::new(crate::expressions::Literal::Number(
2750 "-9.223372036854776E+18".to_string(),
2751 )));
2752 let upper =
2753 Expression::Literal(Box::new(crate::expressions::Literal::Number(
2754 "9.223372036854776e+18".to_string(),
2755 )));
2756 let random_call = Expression::Random(crate::expressions::Random);
2757 let range_size = Expression::Paren(Box::new(crate::expressions::Paren {
2758 this: Expression::Sub(Box::new(crate::expressions::BinaryOp {
2759 left: upper,
2760 right: lower.clone(),
2761 left_comments: vec![],
2762 operator_comments: vec![],
2763 trailing_comments: vec![],
2764 inferred_type: None,
2765 })),
2766 trailing_comments: vec![],
2767 }));
2768 let scaled = Expression::Mul(Box::new(crate::expressions::BinaryOp {
2769 left: random_call,
2770 right: range_size,
2771 left_comments: vec![],
2772 operator_comments: vec![],
2773 trailing_comments: vec![],
2774 inferred_type: None,
2775 }));
2776 let shifted = Expression::Add(Box::new(crate::expressions::BinaryOp {
2777 left: lower,
2778 right: scaled,
2779 left_comments: vec![],
2780 operator_comments: vec![],
2781 trailing_comments: vec![],
2782 inferred_type: None,
2783 }));
2784 Expression::Cast(Box::new(crate::expressions::Cast {
2785 this: shifted,
2786 to: crate::expressions::DataType::BigInt { length: None },
2787 trailing_comments: vec![],
2788 double_colon_syntax: false,
2789 format: None,
2790 default: None,
2791 inferred_type: None,
2792 }))
2793 }
2794
2795 // Pre-process: protect seeded RANDOM(seed) inside UNIFORM/NORMAL/ZIPF/RANDSTR
2796 // by converting Rand{seed: Some(s)} to Function{name:"RANDOM", args:[s]}.
2797 // This prevents transform_recursive (which is bottom-up) from expanding
2798 // seeded RANDOM into make_scaled_random() and losing the seed value.
2799 // Unseeded RANDOM()/Rand{seed:None} is left as-is so it gets expanded
2800 // and then un-expanded back to Expression::Random by the code below.
2801 let normalized = transform_recursive(normalized, &|e| {
2802 if let Expression::Function(ref f) = e {
2803 let n = f.name.to_ascii_uppercase();
2804 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" || n == "RANDSTR" {
2805 if let Expression::Function(mut f) = e {
2806 for arg in f.args.iter_mut() {
2807 if let Expression::Rand(ref r) = arg {
2808 if r.lower.is_none() && r.upper.is_none() {
2809 if let Some(ref seed) = r.seed {
2810 // Convert Rand{seed: Some(s)} to Function("RANDOM", [s])
2811 // so it won't be expanded by the RANDOM expansion below
2812 *arg = Expression::Function(Box::new(
2813 crate::expressions::Function::new(
2814 "RANDOM".to_string(),
2815 vec![*seed.clone()],
2816 ),
2817 ));
2818 }
2819 }
2820 }
2821 }
2822 return Ok(Expression::Function(f));
2823 }
2824 }
2825 }
2826 Ok(e)
2827 })?;
2828
2829 // transform_recursive processes bottom-up, so RANDOM() (unseeded) inside
2830 // generator functions (UNIFORM, NORMAL, ZIPF) gets expanded before
2831 // we see the parent. We detect this and undo the expansion by replacing
2832 // the expanded pattern back with Expression::Random.
2833 // Seeded RANDOM(seed) was already protected above as Function("RANDOM", [seed]).
2834 // Note: RANDSTR is NOT included here — it needs the expanded form for unseeded
2835 // RANDOM() since the DuckDB handler uses the expanded SQL as-is in the hash.
2836 transform_recursive(normalized, &|e| {
2837 if let Expression::Function(ref f) = e {
2838 let n = f.name.to_ascii_uppercase();
2839 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" {
2840 if let Expression::Function(mut f) = e {
2841 for arg in f.args.iter_mut() {
2842 // Detect expanded RANDOM pattern: CAST(-9.22... + RANDOM() * (...) AS BIGINT)
2843 if let Expression::Cast(ref cast) = arg {
2844 if matches!(
2845 cast.to,
2846 crate::expressions::DataType::BigInt { .. }
2847 ) {
2848 if let Expression::Add(ref add) = cast.this {
2849 if let Expression::Literal(ref lit) = add.left {
2850 if let crate::expressions::Literal::Number(
2851 ref num,
2852 ) = lit.as_ref()
2853 {
2854 if num == "-9.223372036854776E+18" {
2855 *arg = Expression::Random(
2856 crate::expressions::Random,
2857 );
2858 }
2859 }
2860 }
2861 }
2862 }
2863 }
2864 }
2865 return Ok(Expression::Function(f));
2866 }
2867 return Ok(e);
2868 }
2869 }
2870 match e {
2871 Expression::Random(_) => Ok(make_scaled_random()),
2872 // Rand(seed) with no bounds: drop seed and expand
2873 // (DuckDB RANDOM doesn't support seeds)
2874 Expression::Rand(ref r) if r.lower.is_none() && r.upper.is_none() => {
2875 Ok(make_scaled_random())
2876 }
2877 _ => Ok(e),
2878 }
2879 })?
2880 } else {
2881 normalized
2882 };
2883
2884 // Apply cross-dialect semantic normalizations
2885 let normalized =
2886 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
2887
2888 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
2889 // (SELECT UNNEST(..., max_depth => 2)) subquery
2890 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
2891 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2892 && matches!(target, DialectType::DuckDB)
2893 {
2894 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
2895 } else {
2896 normalized
2897 };
2898
2899 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
2900 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
2901 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2902 && matches!(
2903 target,
2904 DialectType::DuckDB
2905 | DialectType::Presto
2906 | DialectType::Trino
2907 | DialectType::Athena
2908 | DialectType::Spark
2909 | DialectType::Databricks
2910 ) {
2911 crate::transforms::unnest_alias_to_column_alias(normalized)?
2912 } else if matches!(self.dialect_type, DialectType::BigQuery)
2913 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
2914 {
2915 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
2916 // but don't convert alias format (no _t0 wrapper)
2917 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
2918 // For Redshift: strip UNNEST when arg is a column reference path
2919 if matches!(target, DialectType::Redshift) {
2920 crate::transforms::strip_unnest_column_refs(result)?
2921 } else {
2922 result
2923 }
2924 } else {
2925 normalized
2926 };
2927
2928 // For Presto/Trino targets from PostgreSQL/Redshift source:
2929 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
2930 let normalized = if matches!(
2931 self.dialect_type,
2932 DialectType::PostgreSQL | DialectType::Redshift
2933 ) && matches!(
2934 target,
2935 DialectType::Presto | DialectType::Trino | DialectType::Athena
2936 ) {
2937 crate::transforms::wrap_unnest_join_aliases(normalized)?
2938 } else {
2939 normalized
2940 };
2941
2942 // Eliminate DISTINCT ON with target-dialect awareness
2943 // This must happen after source transform (which may produce DISTINCT ON)
2944 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
2945 let normalized =
2946 crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
2947
2948 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
2949 let normalized = if matches!(target, DialectType::Snowflake) {
2950 Self::transform_generate_date_array_snowflake(normalized)?
2951 } else {
2952 normalized
2953 };
2954
2955 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
2956 let normalized = if matches!(
2957 target,
2958 DialectType::Spark | DialectType::Databricks | DialectType::Hive
2959 ) {
2960 crate::transforms::unnest_to_explode_select(normalized)?
2961 } else {
2962 normalized
2963 };
2964
2965 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
2966 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
2967 crate::transforms::no_limit_order_by_union(normalized)?
2968 } else {
2969 normalized
2970 };
2971
2972 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
2973 // Python sqlglot does this in the TSQL generator, but we can't do it there
2974 // because it would break TSQL -> TSQL identity
2975 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
2976 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2977 {
2978 transform_recursive(normalized, &|e| {
2979 if let Expression::Count(ref c) = e {
2980 // Build COUNT_BIG(...) as an AggregateFunction
2981 let args = if c.star {
2982 vec![Expression::Star(crate::expressions::Star {
2983 table: None,
2984 except: None,
2985 replace: None,
2986 rename: None,
2987 trailing_comments: Vec::new(),
2988 span: None,
2989 })]
2990 } else if let Some(ref this) = c.this {
2991 vec![this.clone()]
2992 } else {
2993 vec![]
2994 };
2995 Ok(Expression::AggregateFunction(Box::new(
2996 crate::expressions::AggregateFunction {
2997 name: "COUNT_BIG".to_string(),
2998 args,
2999 distinct: c.distinct,
3000 filter: c.filter.clone(),
3001 order_by: Vec::new(),
3002 limit: None,
3003 ignore_nulls: None,
3004 inferred_type: None,
3005 },
3006 )))
3007 } else {
3008 Ok(e)
3009 }
3010 })?
3011 } else {
3012 normalized
3013 };
3014
3015 let transformed = target_dialect.transform(normalized)?;
3016
3017 // DuckDB target: when FROM is RANGE(n), replace SEQ's ROW_NUMBER pattern with `range`
3018 let transformed = if matches!(target, DialectType::DuckDB) {
3019 Self::seq_rownum_to_range(transformed)?
3020 } else {
3021 transformed
3022 };
3023
3024 let mut sql = if pretty {
3025 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
3026 } else {
3027 target_dialect.generate_with_source(&transformed, self.dialect_type)?
3028 };
3029
3030 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
3031 if pretty && target == DialectType::Snowflake {
3032 sql = Self::normalize_snowflake_pretty(sql);
3033 }
3034
3035 Ok(sql)
3036 })
3037 .collect()
3038 }
3039}
3040
3041// Transpile-only methods: cross-dialect normalization and helpers
3042#[cfg(feature = "transpile")]
3043impl Dialect {
3044 /// For DuckDB target: when FROM clause contains RANGE(n), replace
3045 /// `(ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1)` with `range` in select expressions.
3046 /// This handles SEQ1/2/4/8 → RANGE transpilation from Snowflake.
3047 fn seq_rownum_to_range(expr: Expression) -> Result<Expression> {
3048 if let Expression::Select(mut select) = expr {
3049 // Check if FROM contains a RANGE function
3050 let has_range_from = if let Some(ref from) = select.from {
3051 from.expressions.iter().any(|e| {
3052 // Check for direct RANGE(...) or aliased RANGE(...)
3053 match e {
3054 Expression::Function(f) => f.name.eq_ignore_ascii_case("RANGE"),
3055 Expression::Alias(a) => {
3056 matches!(&a.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("RANGE"))
3057 }
3058 _ => false,
3059 }
3060 })
3061 } else {
3062 false
3063 };
3064
3065 if has_range_from {
3066 // Replace the ROW_NUMBER pattern in select expressions
3067 select.expressions = select
3068 .expressions
3069 .into_iter()
3070 .map(|e| Self::replace_rownum_with_range(e))
3071 .collect();
3072 }
3073
3074 Ok(Expression::Select(select))
3075 } else {
3076 Ok(expr)
3077 }
3078 }
3079
3080 /// Replace `(ROW_NUMBER() OVER (...) - 1)` with `range` column reference
3081 fn replace_rownum_with_range(expr: Expression) -> Expression {
3082 match expr {
3083 // Match: (ROW_NUMBER() OVER (...) - 1) % N → range % N
3084 Expression::Mod(op) => {
3085 let new_left = Self::try_replace_rownum_paren(&op.left);
3086 Expression::Mod(Box::new(crate::expressions::BinaryOp {
3087 left: new_left,
3088 right: op.right,
3089 left_comments: op.left_comments,
3090 operator_comments: op.operator_comments,
3091 trailing_comments: op.trailing_comments,
3092 inferred_type: op.inferred_type,
3093 }))
3094 }
3095 // Match: (CASE WHEN (ROW...) % N >= ... THEN ... ELSE ... END)
3096 Expression::Paren(p) => {
3097 let inner = Self::replace_rownum_with_range(p.this);
3098 Expression::Paren(Box::new(crate::expressions::Paren {
3099 this: inner,
3100 trailing_comments: p.trailing_comments,
3101 }))
3102 }
3103 Expression::Case(mut c) => {
3104 // Replace ROW_NUMBER in WHEN conditions and THEN expressions
3105 c.whens = c
3106 .whens
3107 .into_iter()
3108 .map(|(cond, then)| {
3109 (
3110 Self::replace_rownum_with_range(cond),
3111 Self::replace_rownum_with_range(then),
3112 )
3113 })
3114 .collect();
3115 if let Some(else_) = c.else_ {
3116 c.else_ = Some(Self::replace_rownum_with_range(else_));
3117 }
3118 Expression::Case(c)
3119 }
3120 Expression::Gte(op) => Expression::Gte(Box::new(crate::expressions::BinaryOp {
3121 left: Self::replace_rownum_with_range(op.left),
3122 right: op.right,
3123 left_comments: op.left_comments,
3124 operator_comments: op.operator_comments,
3125 trailing_comments: op.trailing_comments,
3126 inferred_type: op.inferred_type,
3127 })),
3128 Expression::Sub(op) => Expression::Sub(Box::new(crate::expressions::BinaryOp {
3129 left: Self::replace_rownum_with_range(op.left),
3130 right: op.right,
3131 left_comments: op.left_comments,
3132 operator_comments: op.operator_comments,
3133 trailing_comments: op.trailing_comments,
3134 inferred_type: op.inferred_type,
3135 })),
3136 Expression::Alias(mut a) => {
3137 a.this = Self::replace_rownum_with_range(a.this);
3138 Expression::Alias(a)
3139 }
3140 other => other,
3141 }
3142 }
3143
3144 /// Check if an expression is `(ROW_NUMBER() OVER (...) - 1)` and replace with `range`
3145 fn try_replace_rownum_paren(expr: &Expression) -> Expression {
3146 if let Expression::Paren(ref p) = expr {
3147 if let Expression::Sub(ref sub) = p.this {
3148 if let Expression::WindowFunction(ref wf) = sub.left {
3149 if let Expression::Function(ref f) = wf.this {
3150 if f.name.eq_ignore_ascii_case("ROW_NUMBER") {
3151 if let Expression::Literal(ref lit) = sub.right {
3152 if let crate::expressions::Literal::Number(ref n) = lit.as_ref() {
3153 if n == "1" {
3154 return Expression::column("range");
3155 }
3156 }
3157 }
3158 }
3159 }
3160 }
3161 }
3162 }
3163 expr.clone()
3164 }
3165
3166 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
3167 /// Converts:
3168 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
3169 /// To:
3170 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
3171 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)) AS _t0(seq, key, path, index, alias, this)
3172 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
3173 use crate::expressions::*;
3174 transform_recursive(expr, &|e| {
3175 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
3176 if let Expression::ArraySize(ref af) = e {
3177 if let Expression::Function(ref f) = af.this {
3178 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
3179 let result = Self::convert_array_size_gda_snowflake(f)?;
3180 return Ok(result);
3181 }
3182 }
3183 }
3184
3185 let Expression::Select(mut sel) = e else {
3186 return Ok(e);
3187 };
3188
3189 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
3190 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
3191 let mut gda_join_idx: Option<usize> = None;
3192
3193 for (idx, join) in sel.joins.iter().enumerate() {
3194 // The join.this may be:
3195 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
3196 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
3197 let (unnest_ref, alias_name) = match &join.this {
3198 Expression::Unnest(ref unnest) => {
3199 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
3200 (Some(unnest.as_ref()), alias)
3201 }
3202 Expression::Alias(ref a) => {
3203 if let Expression::Unnest(ref unnest) = a.this {
3204 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
3205 } else {
3206 (None, None)
3207 }
3208 }
3209 _ => (None, None),
3210 };
3211
3212 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
3213 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
3214 if let Expression::Function(ref f) = unnest.this {
3215 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
3216 let start_expr = f.args[0].clone();
3217 let end_expr = f.args[1].clone();
3218 let step = f.args.get(2).cloned();
3219
3220 // Extract unit from step interval
3221 let unit = if let Some(Expression::Interval(ref iv)) = step {
3222 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3223 Some(format!("{:?}", unit).to_ascii_uppercase())
3224 } else if let Some(ref this) = iv.this {
3225 // The interval may be stored as a string like "1 MONTH"
3226 if let Expression::Literal(lit) = this {
3227 if let Literal::String(ref s) = lit.as_ref() {
3228 let parts: Vec<&str> = s.split_whitespace().collect();
3229 if parts.len() == 2 {
3230 Some(parts[1].to_ascii_uppercase())
3231 } else if parts.len() == 1 {
3232 // Single word like "MONTH" or just "1"
3233 let upper = parts[0].to_ascii_uppercase();
3234 if matches!(
3235 upper.as_str(),
3236 "YEAR"
3237 | "QUARTER"
3238 | "MONTH"
3239 | "WEEK"
3240 | "DAY"
3241 | "HOUR"
3242 | "MINUTE"
3243 | "SECOND"
3244 ) {
3245 Some(upper)
3246 } else {
3247 None
3248 }
3249 } else {
3250 None
3251 }
3252 } else {
3253 None
3254 }
3255 } else {
3256 None
3257 }
3258 } else {
3259 None
3260 }
3261 } else {
3262 None
3263 };
3264
3265 if let Some(unit_str) = unit {
3266 gda_info = Some((alias, start_expr, end_expr, unit_str));
3267 gda_join_idx = Some(idx);
3268 }
3269 }
3270 }
3271 }
3272 if gda_info.is_some() {
3273 break;
3274 }
3275 }
3276
3277 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
3278 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
3279 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
3280 let result = Self::try_transform_from_gda_snowflake(sel);
3281 return result;
3282 };
3283 let join_idx = gda_join_idx.unwrap();
3284
3285 // Build ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)
3286 // ARRAY_GENERATE_RANGE uses exclusive end, and we need DATEDIFF + 1 values
3287 // (inclusive date range), so the exclusive end is DATEDIFF + 1.
3288 let datediff = Expression::Function(Box::new(Function::new(
3289 "DATEDIFF".to_string(),
3290 vec![
3291 Expression::boxed_column(Column {
3292 name: Identifier::new(&unit_str),
3293 table: None,
3294 join_mark: false,
3295 trailing_comments: vec![],
3296 span: None,
3297 inferred_type: None,
3298 }),
3299 start_expr.clone(),
3300 end_expr.clone(),
3301 ],
3302 )));
3303 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
3304 left: datediff,
3305 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3306 left_comments: vec![],
3307 operator_comments: vec![],
3308 trailing_comments: vec![],
3309 inferred_type: None,
3310 }));
3311
3312 let array_gen_range = Expression::Function(Box::new(Function::new(
3313 "ARRAY_GENERATE_RANGE".to_string(),
3314 vec![
3315 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
3316 datediff_plus_one,
3317 ],
3318 )));
3319
3320 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
3321 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3322 name: Identifier::new("INPUT"),
3323 value: array_gen_range,
3324 separator: crate::expressions::NamedArgSeparator::DArrow,
3325 }));
3326 let flatten = Expression::Function(Box::new(Function::new(
3327 "FLATTEN".to_string(),
3328 vec![flatten_input],
3329 )));
3330
3331 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
3332 let alias_table = Alias {
3333 this: flatten,
3334 alias: Identifier::new("_t0"),
3335 column_aliases: vec![
3336 Identifier::new("seq"),
3337 Identifier::new("key"),
3338 Identifier::new("path"),
3339 Identifier::new("index"),
3340 Identifier::new(&alias_name),
3341 Identifier::new("this"),
3342 ],
3343 pre_alias_comments: vec![],
3344 trailing_comments: vec![],
3345 inferred_type: None,
3346 };
3347 let lateral_expr = Expression::Lateral(Box::new(Lateral {
3348 this: Box::new(Expression::Alias(Box::new(alias_table))),
3349 view: None,
3350 outer: None,
3351 alias: None,
3352 alias_quoted: false,
3353 cross_apply: None,
3354 ordinality: None,
3355 column_aliases: vec![],
3356 }));
3357
3358 // Remove the original join and add to FROM expressions
3359 sel.joins.remove(join_idx);
3360 if let Some(ref mut from) = sel.from {
3361 from.expressions.push(lateral_expr);
3362 }
3363
3364 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
3365 let dateadd_expr = Expression::Function(Box::new(Function::new(
3366 "DATEADD".to_string(),
3367 vec![
3368 Expression::boxed_column(Column {
3369 name: Identifier::new(&unit_str),
3370 table: None,
3371 join_mark: false,
3372 trailing_comments: vec![],
3373 span: None,
3374 inferred_type: None,
3375 }),
3376 Expression::Cast(Box::new(Cast {
3377 this: Expression::boxed_column(Column {
3378 name: Identifier::new(&alias_name),
3379 table: None,
3380 join_mark: false,
3381 trailing_comments: vec![],
3382 span: None,
3383 inferred_type: None,
3384 }),
3385 to: DataType::Int {
3386 length: None,
3387 integer_spelling: false,
3388 },
3389 trailing_comments: vec![],
3390 double_colon_syntax: false,
3391 format: None,
3392 default: None,
3393 inferred_type: None,
3394 })),
3395 Expression::Cast(Box::new(Cast {
3396 this: start_expr.clone(),
3397 to: DataType::Date,
3398 trailing_comments: vec![],
3399 double_colon_syntax: false,
3400 format: None,
3401 default: None,
3402 inferred_type: None,
3403 })),
3404 ],
3405 )));
3406
3407 // Replace references to the alias in the SELECT list
3408 let new_exprs: Vec<Expression> = sel
3409 .expressions
3410 .iter()
3411 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
3412 .collect();
3413 sel.expressions = new_exprs;
3414
3415 Ok(Expression::Select(sel))
3416 })
3417 }
3418
3419 /// Helper: replace column references to `alias_name` with dateadd expression
3420 fn replace_column_ref_with_dateadd(
3421 expr: &Expression,
3422 alias_name: &str,
3423 dateadd: &Expression,
3424 ) -> Expression {
3425 use crate::expressions::*;
3426 match expr {
3427 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
3428 // Plain column reference -> DATEADD(...) AS alias_name
3429 Expression::Alias(Box::new(Alias {
3430 this: dateadd.clone(),
3431 alias: Identifier::new(alias_name),
3432 column_aliases: vec![],
3433 pre_alias_comments: vec![],
3434 trailing_comments: vec![],
3435 inferred_type: None,
3436 }))
3437 }
3438 Expression::Alias(a) => {
3439 // Check if the inner expression references the alias
3440 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
3441 Expression::Alias(Box::new(Alias {
3442 this: new_this,
3443 alias: a.alias.clone(),
3444 column_aliases: a.column_aliases.clone(),
3445 pre_alias_comments: a.pre_alias_comments.clone(),
3446 trailing_comments: a.trailing_comments.clone(),
3447 inferred_type: None,
3448 }))
3449 }
3450 _ => expr.clone(),
3451 }
3452 }
3453
3454 /// Helper: replace column references in inner expression (not top-level)
3455 fn replace_column_ref_inner(
3456 expr: &Expression,
3457 alias_name: &str,
3458 dateadd: &Expression,
3459 ) -> Expression {
3460 use crate::expressions::*;
3461 match expr {
3462 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
3463 dateadd.clone()
3464 }
3465 Expression::Add(op) => {
3466 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3467 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3468 Expression::Add(Box::new(BinaryOp {
3469 left,
3470 right,
3471 left_comments: op.left_comments.clone(),
3472 operator_comments: op.operator_comments.clone(),
3473 trailing_comments: op.trailing_comments.clone(),
3474 inferred_type: None,
3475 }))
3476 }
3477 Expression::Sub(op) => {
3478 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3479 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3480 Expression::Sub(Box::new(BinaryOp {
3481 left,
3482 right,
3483 left_comments: op.left_comments.clone(),
3484 operator_comments: op.operator_comments.clone(),
3485 trailing_comments: op.trailing_comments.clone(),
3486 inferred_type: None,
3487 }))
3488 }
3489 Expression::Mul(op) => {
3490 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3491 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3492 Expression::Mul(Box::new(BinaryOp {
3493 left,
3494 right,
3495 left_comments: op.left_comments.clone(),
3496 operator_comments: op.operator_comments.clone(),
3497 trailing_comments: op.trailing_comments.clone(),
3498 inferred_type: None,
3499 }))
3500 }
3501 _ => expr.clone(),
3502 }
3503 }
3504
3505 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
3506 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
3507 fn try_transform_from_gda_snowflake(
3508 mut sel: Box<crate::expressions::Select>,
3509 ) -> Result<Expression> {
3510 use crate::expressions::*;
3511
3512 // Extract GDA info from FROM clause
3513 let mut gda_info: Option<(
3514 usize,
3515 String,
3516 Expression,
3517 Expression,
3518 String,
3519 Option<(String, Vec<Identifier>)>,
3520 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
3521
3522 if let Some(ref from) = sel.from {
3523 for (idx, table_expr) in from.expressions.iter().enumerate() {
3524 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
3525 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
3526 let (unnest_opt, outer_alias_info) = match table_expr {
3527 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
3528 Expression::Alias(ref a) => {
3529 if let Expression::Unnest(ref unnest) = a.this {
3530 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
3531 (Some(unnest.as_ref()), Some(alias_info))
3532 } else {
3533 (None, None)
3534 }
3535 }
3536 _ => (None, None),
3537 };
3538
3539 if let Some(unnest) = unnest_opt {
3540 // Check for GENERATE_DATE_ARRAY function
3541 let func_opt = match &unnest.this {
3542 Expression::Function(ref f)
3543 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
3544 && f.args.len() >= 2 =>
3545 {
3546 Some(f)
3547 }
3548 // Also check for GenerateSeries (from earlier normalization)
3549 _ => None,
3550 };
3551
3552 if let Some(f) = func_opt {
3553 let start_expr = f.args[0].clone();
3554 let end_expr = f.args[1].clone();
3555 let step = f.args.get(2).cloned();
3556
3557 // Extract unit and column name
3558 let unit = Self::extract_interval_unit_str(&step);
3559 let col_name = outer_alias_info
3560 .as_ref()
3561 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
3562 .unwrap_or_else(|| "value".to_string());
3563
3564 if let Some(unit_str) = unit {
3565 gda_info = Some((
3566 idx,
3567 col_name,
3568 start_expr,
3569 end_expr,
3570 unit_str,
3571 outer_alias_info,
3572 ));
3573 break;
3574 }
3575 }
3576 }
3577 }
3578 }
3579
3580 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
3581 else {
3582 return Ok(Expression::Select(sel));
3583 };
3584
3585 // Build the Snowflake subquery:
3586 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3587 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(seq, key, path, index, col_name, this))
3588
3589 // DATEDIFF(unit, start, end)
3590 let datediff = Expression::Function(Box::new(Function::new(
3591 "DATEDIFF".to_string(),
3592 vec![
3593 Expression::boxed_column(Column {
3594 name: Identifier::new(&unit_str),
3595 table: None,
3596 join_mark: false,
3597 trailing_comments: vec![],
3598 span: None,
3599 inferred_type: None,
3600 }),
3601 start_expr.clone(),
3602 end_expr.clone(),
3603 ],
3604 )));
3605 // DATEDIFF(...) + 1
3606 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
3607 left: datediff,
3608 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3609 left_comments: vec![],
3610 operator_comments: vec![],
3611 trailing_comments: vec![],
3612 inferred_type: None,
3613 }));
3614
3615 let array_gen_range = Expression::Function(Box::new(Function::new(
3616 "ARRAY_GENERATE_RANGE".to_string(),
3617 vec![
3618 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
3619 datediff_plus_one,
3620 ],
3621 )));
3622
3623 // TABLE(FLATTEN(INPUT => ...))
3624 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3625 name: Identifier::new("INPUT"),
3626 value: array_gen_range,
3627 separator: crate::expressions::NamedArgSeparator::DArrow,
3628 }));
3629 let flatten = Expression::Function(Box::new(Function::new(
3630 "FLATTEN".to_string(),
3631 vec![flatten_input],
3632 )));
3633
3634 // Determine alias name for the table: use outer alias or _t0
3635 let table_alias_name = outer_alias_info
3636 .as_ref()
3637 .map(|(name, _)| name.clone())
3638 .unwrap_or_else(|| "_t0".to_string());
3639
3640 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
3641 let table_func =
3642 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3643 let flatten_aliased = Expression::Alias(Box::new(Alias {
3644 this: table_func,
3645 alias: Identifier::new(&table_alias_name),
3646 column_aliases: vec![
3647 Identifier::new("seq"),
3648 Identifier::new("key"),
3649 Identifier::new("path"),
3650 Identifier::new("index"),
3651 Identifier::new(&col_name),
3652 Identifier::new("this"),
3653 ],
3654 pre_alias_comments: vec![],
3655 trailing_comments: vec![],
3656 inferred_type: None,
3657 }));
3658
3659 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3660 let dateadd_expr = Expression::Function(Box::new(Function::new(
3661 "DATEADD".to_string(),
3662 vec![
3663 Expression::boxed_column(Column {
3664 name: Identifier::new(&unit_str),
3665 table: None,
3666 join_mark: false,
3667 trailing_comments: vec![],
3668 span: None,
3669 inferred_type: None,
3670 }),
3671 Expression::Cast(Box::new(Cast {
3672 this: Expression::boxed_column(Column {
3673 name: Identifier::new(&col_name),
3674 table: None,
3675 join_mark: false,
3676 trailing_comments: vec![],
3677 span: None,
3678 inferred_type: None,
3679 }),
3680 to: DataType::Int {
3681 length: None,
3682 integer_spelling: false,
3683 },
3684 trailing_comments: vec![],
3685 double_colon_syntax: false,
3686 format: None,
3687 default: None,
3688 inferred_type: None,
3689 })),
3690 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
3691 start_expr.clone(),
3692 ],
3693 )));
3694 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3695 this: dateadd_expr,
3696 alias: Identifier::new(&col_name),
3697 column_aliases: vec![],
3698 pre_alias_comments: vec![],
3699 trailing_comments: vec![],
3700 inferred_type: None,
3701 }));
3702
3703 // Build inner SELECT
3704 let mut inner_select = Select::new();
3705 inner_select.expressions = vec![dateadd_aliased];
3706 inner_select.from = Some(From {
3707 expressions: vec![flatten_aliased],
3708 });
3709
3710 let inner_select_expr = Expression::Select(Box::new(inner_select));
3711 let subquery = Expression::Subquery(Box::new(Subquery {
3712 this: inner_select_expr,
3713 alias: None,
3714 column_aliases: vec![],
3715 order_by: None,
3716 limit: None,
3717 offset: None,
3718 distribute_by: None,
3719 sort_by: None,
3720 cluster_by: None,
3721 lateral: false,
3722 modifiers_inside: false,
3723 trailing_comments: vec![],
3724 inferred_type: None,
3725 }));
3726
3727 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
3728 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
3729 Expression::Alias(Box::new(Alias {
3730 this: subquery,
3731 alias: Identifier::new(&alias_name),
3732 column_aliases: col_aliases,
3733 pre_alias_comments: vec![],
3734 trailing_comments: vec![],
3735 inferred_type: None,
3736 }))
3737 } else {
3738 subquery
3739 };
3740
3741 // Replace the FROM expression
3742 if let Some(ref mut from) = sel.from {
3743 from.expressions[from_idx] = replacement;
3744 }
3745
3746 Ok(Expression::Select(sel))
3747 }
3748
3749 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
3750 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
3751 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(...))))
3752 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
3753 use crate::expressions::*;
3754
3755 let start_expr = f.args[0].clone();
3756 let end_expr = f.args[1].clone();
3757 let step = f.args.get(2).cloned();
3758 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
3759 let col_name = "value";
3760
3761 // Build the inner subquery: same as try_transform_from_gda_snowflake
3762 let datediff = Expression::Function(Box::new(Function::new(
3763 "DATEDIFF".to_string(),
3764 vec![
3765 Expression::boxed_column(Column {
3766 name: Identifier::new(&unit_str),
3767 table: None,
3768 join_mark: false,
3769 trailing_comments: vec![],
3770 span: None,
3771 inferred_type: None,
3772 }),
3773 start_expr.clone(),
3774 end_expr.clone(),
3775 ],
3776 )));
3777 // DATEDIFF(...) + 1
3778 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
3779 left: datediff,
3780 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3781 left_comments: vec![],
3782 operator_comments: vec![],
3783 trailing_comments: vec![],
3784 inferred_type: None,
3785 }));
3786
3787 let array_gen_range = Expression::Function(Box::new(Function::new(
3788 "ARRAY_GENERATE_RANGE".to_string(),
3789 vec![
3790 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
3791 datediff_plus_one,
3792 ],
3793 )));
3794
3795 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3796 name: Identifier::new("INPUT"),
3797 value: array_gen_range,
3798 separator: crate::expressions::NamedArgSeparator::DArrow,
3799 }));
3800 let flatten = Expression::Function(Box::new(Function::new(
3801 "FLATTEN".to_string(),
3802 vec![flatten_input],
3803 )));
3804
3805 let table_func =
3806 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3807 let flatten_aliased = Expression::Alias(Box::new(Alias {
3808 this: table_func,
3809 alias: Identifier::new("_t0"),
3810 column_aliases: vec![
3811 Identifier::new("seq"),
3812 Identifier::new("key"),
3813 Identifier::new("path"),
3814 Identifier::new("index"),
3815 Identifier::new(col_name),
3816 Identifier::new("this"),
3817 ],
3818 pre_alias_comments: vec![],
3819 trailing_comments: vec![],
3820 inferred_type: None,
3821 }));
3822
3823 let dateadd_expr = Expression::Function(Box::new(Function::new(
3824 "DATEADD".to_string(),
3825 vec![
3826 Expression::boxed_column(Column {
3827 name: Identifier::new(&unit_str),
3828 table: None,
3829 join_mark: false,
3830 trailing_comments: vec![],
3831 span: None,
3832 inferred_type: None,
3833 }),
3834 Expression::Cast(Box::new(Cast {
3835 this: Expression::boxed_column(Column {
3836 name: Identifier::new(col_name),
3837 table: None,
3838 join_mark: false,
3839 trailing_comments: vec![],
3840 span: None,
3841 inferred_type: None,
3842 }),
3843 to: DataType::Int {
3844 length: None,
3845 integer_spelling: false,
3846 },
3847 trailing_comments: vec![],
3848 double_colon_syntax: false,
3849 format: None,
3850 default: None,
3851 inferred_type: None,
3852 })),
3853 start_expr.clone(),
3854 ],
3855 )));
3856 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3857 this: dateadd_expr,
3858 alias: Identifier::new(col_name),
3859 column_aliases: vec![],
3860 pre_alias_comments: vec![],
3861 trailing_comments: vec![],
3862 inferred_type: None,
3863 }));
3864
3865 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
3866 let mut inner_select = Select::new();
3867 inner_select.expressions = vec![dateadd_aliased];
3868 inner_select.from = Some(From {
3869 expressions: vec![flatten_aliased],
3870 });
3871
3872 // Wrap in subquery for the inner part
3873 let inner_subquery = Expression::Subquery(Box::new(Subquery {
3874 this: Expression::Select(Box::new(inner_select)),
3875 alias: None,
3876 column_aliases: vec![],
3877 order_by: None,
3878 limit: None,
3879 offset: None,
3880 distribute_by: None,
3881 sort_by: None,
3882 cluster_by: None,
3883 lateral: false,
3884 modifiers_inside: false,
3885 trailing_comments: vec![],
3886 inferred_type: None,
3887 }));
3888
3889 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
3890 let star = Expression::Star(Star {
3891 table: None,
3892 except: None,
3893 replace: None,
3894 rename: None,
3895 trailing_comments: vec![],
3896 span: None,
3897 });
3898 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
3899 this: star,
3900 distinct: false,
3901 filter: None,
3902 order_by: vec![],
3903 name: Some("ARRAY_AGG".to_string()),
3904 ignore_nulls: None,
3905 having_max: None,
3906 limit: None,
3907 inferred_type: None,
3908 }));
3909
3910 let mut outer_select = Select::new();
3911 outer_select.expressions = vec![array_agg];
3912 outer_select.from = Some(From {
3913 expressions: vec![inner_subquery],
3914 });
3915
3916 // Wrap in a subquery
3917 let outer_subquery = Expression::Subquery(Box::new(Subquery {
3918 this: Expression::Select(Box::new(outer_select)),
3919 alias: None,
3920 column_aliases: vec![],
3921 order_by: None,
3922 limit: None,
3923 offset: None,
3924 distribute_by: None,
3925 sort_by: None,
3926 cluster_by: None,
3927 lateral: false,
3928 modifiers_inside: false,
3929 trailing_comments: vec![],
3930 inferred_type: None,
3931 }));
3932
3933 // ARRAY_SIZE(subquery)
3934 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
3935 outer_subquery,
3936 ))))
3937 }
3938
3939 /// Extract interval unit string from an optional step expression.
3940 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
3941 use crate::expressions::*;
3942 if let Some(Expression::Interval(ref iv)) = step {
3943 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3944 return Some(format!("{:?}", unit).to_ascii_uppercase());
3945 }
3946 if let Some(ref this) = iv.this {
3947 if let Expression::Literal(lit) = this {
3948 if let Literal::String(ref s) = lit.as_ref() {
3949 let parts: Vec<&str> = s.split_whitespace().collect();
3950 if parts.len() == 2 {
3951 return Some(parts[1].to_ascii_uppercase());
3952 } else if parts.len() == 1 {
3953 let upper = parts[0].to_ascii_uppercase();
3954 if matches!(
3955 upper.as_str(),
3956 "YEAR"
3957 | "QUARTER"
3958 | "MONTH"
3959 | "WEEK"
3960 | "DAY"
3961 | "HOUR"
3962 | "MINUTE"
3963 | "SECOND"
3964 ) {
3965 return Some(upper);
3966 }
3967 }
3968 }
3969 }
3970 }
3971 }
3972 // Default to DAY if no step or no interval
3973 if step.is_none() {
3974 return Some("DAY".to_string());
3975 }
3976 None
3977 }
3978
3979 fn normalize_snowflake_pretty(mut sql: String) -> String {
3980 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
3981 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
3982 {
3983 sql = sql.replace(
3984 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
3985 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
3986 );
3987
3988 sql = sql.replace(
3989 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
3990 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
3991 );
3992
3993 sql = sql.replace(
3994 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
3995 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
3996 );
3997 }
3998
3999 sql
4000 }
4001
4002 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
4003 /// This handles cases where the same syntax has different semantics across dialects.
4004 fn cross_dialect_normalize(
4005 expr: Expression,
4006 source: DialectType,
4007 target: DialectType,
4008 ) -> Result<Expression> {
4009 use crate::expressions::{
4010 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
4011 Function, Identifier, IsNull, Literal, Null, Paren,
4012 };
4013
4014 // Helper to tag which kind of transform to apply
4015 #[derive(Debug)]
4016 enum Action {
4017 None,
4018 GreatestLeastNull,
4019 ArrayGenerateRange,
4020 Div0TypedDivision,
4021 ArrayAggCollectList,
4022 ArrayAggWithinGroupFilter,
4023 ArrayAggFilter,
4024 CastTimestampToDatetime,
4025 DateTruncWrapCast,
4026 ToDateToCast,
4027 ConvertTimezoneToExpr,
4028 SetToVariable,
4029 RegexpReplaceSnowflakeToDuckDB,
4030 BigQueryFunctionNormalize,
4031 BigQuerySafeDivide,
4032 BigQueryCastType,
4033 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
4034 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
4035 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
4036 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
4037 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
4038 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
4039 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
4040 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
4041 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target
4042 EpochConvert, // Expression::Epoch -> target-specific epoch function
4043 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
4044 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
4045 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
4046 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
4047 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
4048 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
4049 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
4050 TempTableHash, // TSQL #table -> temp table normalization
4051 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
4052 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
4053 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
4054 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
4055 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
4056 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
4057 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
4058 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
4059 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
4060 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake; also DuckDB CAST(x AS JSON)
4061 DuckDBTryCastJsonToTryJsonParse, // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
4062 DuckDBJsonFuncToJsonParse, // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
4063 DuckDBJsonValidToIsJson, // DuckDB json_valid(x) -> x IS JSON for Trino/Presto/Athena
4064 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
4065 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
4066 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
4067 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
4068 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
4069 ArrayAggToGroupConcat, // ARRAY_AGG(x) -> GROUP_CONCAT(x) for MySQL-like targets
4070 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
4071 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
4072 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
4073 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
4074 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
4075 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
4076 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
4077 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
4078 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
4079 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
4080 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
4081 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
4082 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
4083 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
4084 DollarParamConvert, // $foo -> @foo for BigQuery
4085 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
4086 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
4087 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
4088 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
4089 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
4090 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
4091 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
4092 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
4093 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
4094 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
4095 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
4096 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
4097 RespectNullsConvert, // RESPECT NULLS window function handling
4098 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
4099 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
4100 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
4101 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
4102 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
4103 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
4104 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
4105 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
4106 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
4107 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
4108 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
4109 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
4110 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
4111 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
4112 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
4113 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
4114 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
4115 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
4116 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
4117 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
4118 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
4119 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
4120 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
4121 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
4122 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
4123 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
4124 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
4125 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
4126 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
4127 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
4128 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
4129 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
4130 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
4131 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
4132 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
4133 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
4134 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
4135 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
4136 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
4137 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
4138 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
4139 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
4140 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
4141 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
4142 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
4143 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
4144 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
4145 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
4146 DecodeSimplify, // DECODE with null-safe -> simple = comparison
4147 ArraySumConvert, // ARRAY_SUM -> target-specific
4148 ArraySizeConvert, // ARRAY_SIZE -> target-specific
4149 ArrayAnyConvert, // ARRAY_ANY -> target-specific
4150 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
4151 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
4152 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
4153 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
4154 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
4155 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
4156 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
4157 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
4158 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
4159 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
4160 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
4161 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
4162 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
4163 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
4164 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
4165 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
4166 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
4167 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
4168 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
4169 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
4170 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
4171 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
4172 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
4173 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
4174 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
4175 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
4176 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
4177 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
4178 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
4179 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
4180 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
4181 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
4182 RegexpSubstrSnowflakeToDuckDB, // REGEXP_SUBSTR(s, p, ...) -> REGEXP_EXTRACT variants for DuckDB
4183 RegexpSubstrSnowflakeIdentity, // REGEXP_SUBSTR/REGEXP_SUBSTR_ALL strip trailing group=0 for Snowflake identity
4184 RegexpSubstrAllSnowflakeToDuckDB, // REGEXP_SUBSTR_ALL(s, p, ...) -> REGEXP_EXTRACT_ALL variants for DuckDB
4185 RegexpCountSnowflakeToDuckDB, // REGEXP_COUNT(s, p, ...) -> LENGTH(REGEXP_EXTRACT_ALL(...)) for DuckDB
4186 RegexpInstrSnowflakeToDuckDB, // REGEXP_INSTR(s, p, ...) -> complex CASE expression for DuckDB
4187 RegexpReplacePositionSnowflakeToDuckDB, // REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
4188 RlikeSnowflakeToDuckDB, // RLIKE(a, b[, flags]) -> REGEXP_FULL_MATCH(a, b[, flags]) for DuckDB
4189 RegexpExtractAllToSnowflake, // BigQuery REGEXP_EXTRACT_ALL -> REGEXP_SUBSTR_ALL for Snowflake
4190 ArrayExceptConvert, // ARRAY_EXCEPT -> DuckDB complex CASE / Snowflake ARRAY_EXCEPT / Presto ARRAY_EXCEPT
4191 ArrayPositionSnowflakeSwap, // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
4192 RegexpLikeExasolAnchor, // RegexpLike -> Exasol REGEXP_LIKE with .*pattern.* anchoring
4193 ArrayDistinctConvert, // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
4194 ArrayDistinctClickHouse, // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
4195 ArrayContainsDuckDBConvert, // ARRAY_CONTAINS -> DuckDB CASE with NULL-aware check
4196 SnowflakeWindowFrameStrip, // Strip default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for Snowflake target
4197 SnowflakeWindowFrameAdd, // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for non-Snowflake target
4198 SnowflakeArrayPositionToDuckDB, // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB
4199 }
4200
4201 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
4202 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
4203 Self::transform_select_into(expr, source, target)
4204 } else {
4205 expr
4206 };
4207
4208 // Strip OFFSET ROWS for non-TSQL/Oracle targets
4209 let expr = if !matches!(
4210 target,
4211 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
4212 ) {
4213 if let Expression::Select(mut select) = expr {
4214 if let Some(ref mut offset) = select.offset {
4215 offset.rows = None;
4216 }
4217 Expression::Select(select)
4218 } else {
4219 expr
4220 }
4221 } else {
4222 expr
4223 };
4224
4225 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
4226 let expr = if matches!(target, DialectType::Oracle) {
4227 if let Expression::Select(mut select) = expr {
4228 if let Some(limit) = select.limit.take() {
4229 // Convert LIMIT to FETCH FIRST n ROWS ONLY
4230 select.fetch = Some(crate::expressions::Fetch {
4231 direction: "FIRST".to_string(),
4232 count: Some(limit.this),
4233 percent: false,
4234 rows: true,
4235 with_ties: false,
4236 });
4237 }
4238 // Add ROWS to OFFSET if present
4239 if let Some(ref mut offset) = select.offset {
4240 offset.rows = Some(true);
4241 }
4242 Expression::Select(select)
4243 } else {
4244 expr
4245 }
4246 } else {
4247 expr
4248 };
4249
4250 // Handle CreateTable WITH properties transformation before recursive transforms
4251 let expr = if let Expression::CreateTable(mut ct) = expr {
4252 Self::transform_create_table_properties(&mut ct, source, target);
4253
4254 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
4255 // When the PARTITIONED BY clause contains column definitions, merge them into the
4256 // main column list and adjust the PARTITIONED BY clause for the target dialect.
4257 if matches!(
4258 source,
4259 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4260 ) {
4261 let mut partition_col_names: Vec<String> = Vec::new();
4262 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
4263 let mut has_col_def_partitions = false;
4264
4265 // Check if any PARTITIONED BY property contains ColumnDef expressions
4266 for prop in &ct.properties {
4267 if let Expression::PartitionedByProperty(ref pbp) = prop {
4268 if let Expression::Tuple(ref tuple) = *pbp.this {
4269 for expr in &tuple.expressions {
4270 if let Expression::ColumnDef(ref cd) = expr {
4271 has_col_def_partitions = true;
4272 partition_col_names.push(cd.name.name.clone());
4273 partition_col_defs.push(*cd.clone());
4274 }
4275 }
4276 }
4277 }
4278 }
4279
4280 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
4281 // Merge partition columns into main column list
4282 for cd in partition_col_defs {
4283 ct.columns.push(cd);
4284 }
4285
4286 // Replace PARTITIONED BY property with column-name-only version
4287 ct.properties
4288 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
4289
4290 if matches!(
4291 target,
4292 DialectType::Presto | DialectType::Trino | DialectType::Athena
4293 ) {
4294 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
4295 let array_elements: Vec<String> = partition_col_names
4296 .iter()
4297 .map(|n| format!("'{}'", n))
4298 .collect();
4299 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
4300 ct.with_properties
4301 .push(("PARTITIONED_BY".to_string(), array_value));
4302 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
4303 // Spark: PARTITIONED BY (y, z) - just column names
4304 let name_exprs: Vec<Expression> = partition_col_names
4305 .iter()
4306 .map(|n| {
4307 Expression::Column(Box::new(crate::expressions::Column {
4308 name: crate::expressions::Identifier::new(n.clone()),
4309 table: None,
4310 join_mark: false,
4311 trailing_comments: Vec::new(),
4312 span: None,
4313 inferred_type: None,
4314 }))
4315 })
4316 .collect();
4317 ct.properties.insert(
4318 0,
4319 Expression::PartitionedByProperty(Box::new(
4320 crate::expressions::PartitionedByProperty {
4321 this: Box::new(Expression::Tuple(Box::new(
4322 crate::expressions::Tuple {
4323 expressions: name_exprs,
4324 },
4325 ))),
4326 },
4327 )),
4328 );
4329 }
4330 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
4331 }
4332
4333 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
4334 // are handled by transform_create_table_properties which runs first
4335 }
4336
4337 // Strip LOCATION property for Presto/Trino (not supported)
4338 if matches!(
4339 target,
4340 DialectType::Presto | DialectType::Trino | DialectType::Athena
4341 ) {
4342 ct.properties
4343 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
4344 }
4345
4346 // Strip table-level constraints for Spark/Hive/Databricks
4347 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
4348 if matches!(
4349 target,
4350 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4351 ) {
4352 ct.constraints.retain(|c| {
4353 matches!(
4354 c,
4355 crate::expressions::TableConstraint::PrimaryKey { .. }
4356 | crate::expressions::TableConstraint::Like { .. }
4357 )
4358 });
4359 for constraint in &mut ct.constraints {
4360 if let crate::expressions::TableConstraint::PrimaryKey {
4361 columns,
4362 modifiers,
4363 ..
4364 } = constraint
4365 {
4366 // Strip ASC/DESC from column names
4367 for col in columns.iter_mut() {
4368 if col.name.ends_with(" ASC") {
4369 col.name = col.name[..col.name.len() - 4].to_string();
4370 } else if col.name.ends_with(" DESC") {
4371 col.name = col.name[..col.name.len() - 5].to_string();
4372 }
4373 }
4374 // Strip TSQL-specific modifiers
4375 modifiers.clustered = None;
4376 modifiers.with_options.clear();
4377 modifiers.on_filegroup = None;
4378 }
4379 }
4380 }
4381
4382 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
4383 if matches!(target, DialectType::Databricks) {
4384 for col in &mut ct.columns {
4385 if col.auto_increment {
4386 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
4387 col.data_type = crate::expressions::DataType::BigInt { length: None };
4388 }
4389 }
4390 }
4391 }
4392
4393 // Spark/Databricks: INTEGER -> INT in column definitions
4394 // Python sqlglot always outputs INT for Spark/Databricks
4395 if matches!(target, DialectType::Spark | DialectType::Databricks) {
4396 for col in &mut ct.columns {
4397 if let crate::expressions::DataType::Int {
4398 integer_spelling, ..
4399 } = &mut col.data_type
4400 {
4401 *integer_spelling = false;
4402 }
4403 }
4404 }
4405
4406 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
4407 if matches!(target, DialectType::Hive | DialectType::Spark) {
4408 for col in &mut ct.columns {
4409 // If nullable is explicitly true (NULL), change to None (omit it)
4410 if col.nullable == Some(true) {
4411 col.nullable = None;
4412 }
4413 // Also remove from constraints if stored there
4414 col.constraints
4415 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
4416 }
4417 }
4418
4419 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
4420 if ct.on_property.is_some()
4421 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4422 {
4423 ct.on_property = None;
4424 }
4425
4426 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
4427 // Snowflake doesn't support typed arrays in DDL
4428 if matches!(target, DialectType::Snowflake) {
4429 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
4430 if let crate::expressions::DataType::Array { .. } = dt {
4431 *dt = crate::expressions::DataType::Custom {
4432 name: "ARRAY".to_string(),
4433 };
4434 }
4435 }
4436 for col in &mut ct.columns {
4437 strip_array_type_params(&mut col.data_type);
4438 }
4439 }
4440
4441 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
4442 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
4443 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
4444 if matches!(target, DialectType::PostgreSQL) {
4445 for col in &mut ct.columns {
4446 if col.auto_increment && !col.constraint_order.is_empty() {
4447 use crate::expressions::ConstraintType;
4448 let has_explicit_not_null = col
4449 .constraint_order
4450 .iter()
4451 .any(|ct| *ct == ConstraintType::NotNull);
4452
4453 if has_explicit_not_null {
4454 // Source had explicit NOT NULL - preserve original order
4455 // Just ensure nullable is set
4456 if col.nullable != Some(false) {
4457 col.nullable = Some(false);
4458 }
4459 } else {
4460 // Source didn't have explicit NOT NULL - build order with
4461 // AutoIncrement + NotNull first, then remaining constraints
4462 let mut new_order = Vec::new();
4463 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
4464 new_order.push(ConstraintType::AutoIncrement);
4465 new_order.push(ConstraintType::NotNull);
4466 // Add remaining constraints in original order (except AutoIncrement)
4467 for ct_type in &col.constraint_order {
4468 if *ct_type != ConstraintType::AutoIncrement {
4469 new_order.push(ct_type.clone());
4470 }
4471 }
4472 col.constraint_order = new_order;
4473 col.nullable = Some(false);
4474 }
4475 }
4476 }
4477 }
4478
4479 Expression::CreateTable(ct)
4480 } else {
4481 expr
4482 };
4483
4484 // Handle CreateView column stripping for Presto/Trino target
4485 let expr = if let Expression::CreateView(mut cv) = expr {
4486 // Presto/Trino: drop column list when view has a SELECT body
4487 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
4488 {
4489 if !matches!(&cv.query, Expression::Null(_)) {
4490 cv.columns.clear();
4491 }
4492 }
4493 Expression::CreateView(cv)
4494 } else {
4495 expr
4496 };
4497
4498 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
4499 let expr = if !matches!(
4500 target,
4501 DialectType::Presto | DialectType::Trino | DialectType::Athena
4502 ) {
4503 if let Expression::Select(mut select) = expr {
4504 if let Some(ref mut with) = select.with {
4505 for cte in &mut with.ctes {
4506 if let Expression::Values(ref vals) = cte.this {
4507 // Build: SELECT * FROM (VALUES ...) AS _values
4508 let values_subquery =
4509 Expression::Subquery(Box::new(crate::expressions::Subquery {
4510 this: Expression::Values(vals.clone()),
4511 alias: Some(Identifier::new("_values".to_string())),
4512 column_aliases: Vec::new(),
4513 order_by: None,
4514 limit: None,
4515 offset: None,
4516 distribute_by: None,
4517 sort_by: None,
4518 cluster_by: None,
4519 lateral: false,
4520 modifiers_inside: false,
4521 trailing_comments: Vec::new(),
4522 inferred_type: None,
4523 }));
4524 let mut new_select = crate::expressions::Select::new();
4525 new_select.expressions =
4526 vec![Expression::Star(crate::expressions::Star {
4527 table: None,
4528 except: None,
4529 replace: None,
4530 rename: None,
4531 trailing_comments: Vec::new(),
4532 span: None,
4533 })];
4534 new_select.from = Some(crate::expressions::From {
4535 expressions: vec![values_subquery],
4536 });
4537 cte.this = Expression::Select(Box::new(new_select));
4538 }
4539 }
4540 }
4541 Expression::Select(select)
4542 } else {
4543 expr
4544 }
4545 } else {
4546 expr
4547 };
4548
4549 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
4550 let expr = if matches!(target, DialectType::PostgreSQL) {
4551 if let Expression::CreateIndex(mut ci) = expr {
4552 for col in &mut ci.columns {
4553 if col.nulls_first.is_none() {
4554 col.nulls_first = Some(true);
4555 }
4556 }
4557 Expression::CreateIndex(ci)
4558 } else {
4559 expr
4560 }
4561 } else {
4562 expr
4563 };
4564
4565 transform_recursive(expr, &|e| {
4566 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
4567 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
4568 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4569 if let Expression::Cast(ref c) = e {
4570 // Check if this is a CAST of an array to a struct array type
4571 let is_struct_array_cast =
4572 matches!(&c.to, crate::expressions::DataType::Array { .. });
4573 if is_struct_array_cast {
4574 let has_auto_named_structs = match &c.this {
4575 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
4576 if let Expression::Struct(s) = elem {
4577 s.fields.iter().all(|(name, _)| {
4578 name.as_ref().map_or(true, |n| {
4579 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4580 })
4581 })
4582 } else {
4583 false
4584 }
4585 }),
4586 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
4587 if let Expression::Struct(s) = elem {
4588 s.fields.iter().all(|(name, _)| {
4589 name.as_ref().map_or(true, |n| {
4590 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4591 })
4592 })
4593 } else {
4594 false
4595 }
4596 }),
4597 _ => false,
4598 };
4599 if has_auto_named_structs {
4600 let convert_struct_to_row = |elem: Expression| -> Expression {
4601 if let Expression::Struct(s) = elem {
4602 let row_args: Vec<Expression> =
4603 s.fields.into_iter().map(|(_, v)| v).collect();
4604 Expression::Function(Box::new(Function::new(
4605 "ROW".to_string(),
4606 row_args,
4607 )))
4608 } else {
4609 elem
4610 }
4611 };
4612 let mut c_clone = c.as_ref().clone();
4613 match &mut c_clone.this {
4614 Expression::Array(arr) => {
4615 arr.expressions = arr
4616 .expressions
4617 .drain(..)
4618 .map(convert_struct_to_row)
4619 .collect();
4620 }
4621 Expression::ArrayFunc(arr) => {
4622 arr.expressions = arr
4623 .expressions
4624 .drain(..)
4625 .map(convert_struct_to_row)
4626 .collect();
4627 }
4628 _ => {}
4629 }
4630 return Ok(Expression::Cast(Box::new(c_clone)));
4631 }
4632 }
4633 }
4634 }
4635
4636 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
4637 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4638 if let Expression::Select(ref sel) = e {
4639 if sel.kind.as_deref() == Some("STRUCT") {
4640 let mut fields = Vec::new();
4641 for expr in &sel.expressions {
4642 match expr {
4643 Expression::Alias(a) => {
4644 fields.push((Some(a.alias.name.clone()), a.this.clone()));
4645 }
4646 Expression::Column(c) => {
4647 fields.push((Some(c.name.name.clone()), expr.clone()));
4648 }
4649 _ => {
4650 fields.push((None, expr.clone()));
4651 }
4652 }
4653 }
4654 let struct_lit =
4655 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
4656 let mut new_select = sel.as_ref().clone();
4657 new_select.kind = None;
4658 new_select.expressions = vec![struct_lit];
4659 return Ok(Expression::Select(Box::new(new_select)));
4660 }
4661 }
4662 }
4663
4664 // Convert @variable -> ${variable} for Spark/Hive/Databricks
4665 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4666 && matches!(
4667 target,
4668 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4669 )
4670 {
4671 if let Expression::Parameter(ref p) = e {
4672 if p.style == crate::expressions::ParameterStyle::At {
4673 if let Some(ref name) = p.name {
4674 return Ok(Expression::Parameter(Box::new(
4675 crate::expressions::Parameter {
4676 name: Some(name.clone()),
4677 index: p.index,
4678 style: crate::expressions::ParameterStyle::DollarBrace,
4679 quoted: p.quoted,
4680 string_quoted: p.string_quoted,
4681 expression: None,
4682 },
4683 )));
4684 }
4685 }
4686 }
4687 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
4688 if let Expression::Column(ref col) = e {
4689 if col.name.name.starts_with('@') && col.table.is_none() {
4690 let var_name = col.name.name.trim_start_matches('@').to_string();
4691 return Ok(Expression::Parameter(Box::new(
4692 crate::expressions::Parameter {
4693 name: Some(var_name),
4694 index: None,
4695 style: crate::expressions::ParameterStyle::DollarBrace,
4696 quoted: false,
4697 string_quoted: false,
4698 expression: None,
4699 },
4700 )));
4701 }
4702 }
4703 }
4704
4705 // Convert @variable -> variable in SET statements for Spark/Databricks
4706 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4707 && matches!(target, DialectType::Spark | DialectType::Databricks)
4708 {
4709 if let Expression::SetStatement(ref s) = e {
4710 let mut new_items = s.items.clone();
4711 let mut changed = false;
4712 for item in &mut new_items {
4713 // Strip @ from the SET name (Parameter style)
4714 if let Expression::Parameter(ref p) = item.name {
4715 if p.style == crate::expressions::ParameterStyle::At {
4716 if let Some(ref name) = p.name {
4717 item.name = Expression::Identifier(Identifier::new(name));
4718 changed = true;
4719 }
4720 }
4721 }
4722 // Strip @ from the SET name (Identifier style - SET parser)
4723 if let Expression::Identifier(ref id) = item.name {
4724 if id.name.starts_with('@') {
4725 let var_name = id.name.trim_start_matches('@').to_string();
4726 item.name = Expression::Identifier(Identifier::new(&var_name));
4727 changed = true;
4728 }
4729 }
4730 // Strip @ from the SET name (Column style - alternative parsing)
4731 if let Expression::Column(ref col) = item.name {
4732 if col.name.name.starts_with('@') && col.table.is_none() {
4733 let var_name = col.name.name.trim_start_matches('@').to_string();
4734 item.name = Expression::Identifier(Identifier::new(&var_name));
4735 changed = true;
4736 }
4737 }
4738 }
4739 if changed {
4740 let mut new_set = (**s).clone();
4741 new_set.items = new_items;
4742 return Ok(Expression::SetStatement(Box::new(new_set)));
4743 }
4744 }
4745 }
4746
4747 // Strip NOLOCK hint for non-TSQL targets
4748 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4749 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4750 {
4751 if let Expression::Table(ref tr) = e {
4752 if !tr.hints.is_empty() {
4753 let mut new_tr = tr.clone();
4754 new_tr.hints.clear();
4755 return Ok(Expression::Table(new_tr));
4756 }
4757 }
4758 }
4759
4760 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
4761 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
4762 if matches!(target, DialectType::Snowflake) {
4763 if let Expression::IsTrue(ref itf) = e {
4764 if let Expression::Boolean(ref b) = itf.this {
4765 if !itf.not {
4766 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4767 value: b.value,
4768 }));
4769 } else {
4770 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4771 value: !b.value,
4772 }));
4773 }
4774 }
4775 }
4776 if let Expression::IsFalse(ref itf) = e {
4777 if let Expression::Boolean(ref b) = itf.this {
4778 if !itf.not {
4779 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4780 value: !b.value,
4781 }));
4782 } else {
4783 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4784 value: b.value,
4785 }));
4786 }
4787 }
4788 }
4789 }
4790
4791 // BigQuery: split dotted backtick identifiers in table names
4792 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
4793 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4794 if let Expression::CreateTable(ref ct) = e {
4795 let mut changed = false;
4796 let mut new_ct = ct.clone();
4797 // Split the table name
4798 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
4799 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
4800 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
4801 let was_quoted = ct.name.name.quoted;
4802 let mk_id = |s: &str| {
4803 if was_quoted {
4804 Identifier::quoted(s)
4805 } else {
4806 Identifier::new(s)
4807 }
4808 };
4809 if parts.len() == 3 {
4810 new_ct.name.catalog = Some(mk_id(parts[0]));
4811 new_ct.name.schema = Some(mk_id(parts[1]));
4812 new_ct.name.name = mk_id(parts[2]);
4813 changed = true;
4814 } else if parts.len() == 2 {
4815 new_ct.name.schema = Some(mk_id(parts[0]));
4816 new_ct.name.name = mk_id(parts[1]);
4817 changed = true;
4818 }
4819 }
4820 // Split the clone source name
4821 if let Some(ref clone_src) = ct.clone_source {
4822 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
4823 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
4824 let was_quoted = clone_src.name.quoted;
4825 let mk_id = |s: &str| {
4826 if was_quoted {
4827 Identifier::quoted(s)
4828 } else {
4829 Identifier::new(s)
4830 }
4831 };
4832 let mut new_src = clone_src.clone();
4833 if parts.len() == 3 {
4834 new_src.catalog = Some(mk_id(parts[0]));
4835 new_src.schema = Some(mk_id(parts[1]));
4836 new_src.name = mk_id(parts[2]);
4837 new_ct.clone_source = Some(new_src);
4838 changed = true;
4839 } else if parts.len() == 2 {
4840 new_src.schema = Some(mk_id(parts[0]));
4841 new_src.name = mk_id(parts[1]);
4842 new_ct.clone_source = Some(new_src);
4843 changed = true;
4844 }
4845 }
4846 }
4847 if changed {
4848 return Ok(Expression::CreateTable(new_ct));
4849 }
4850 }
4851 }
4852
4853 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
4854 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
4855 if matches!(source, DialectType::BigQuery)
4856 && matches!(
4857 target,
4858 DialectType::DuckDB
4859 | DialectType::Presto
4860 | DialectType::Trino
4861 | DialectType::Athena
4862 )
4863 {
4864 if let Expression::Subscript(ref sub) = e {
4865 let (new_index, is_safe) = match &sub.index {
4866 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
4867 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
4868 let Literal::Number(n) = lit.as_ref() else {
4869 unreachable!()
4870 };
4871 if let Ok(val) = n.parse::<i64>() {
4872 (
4873 Some(Expression::Literal(Box::new(Literal::Number(
4874 (val + 1).to_string(),
4875 )))),
4876 false,
4877 )
4878 } else {
4879 (None, false)
4880 }
4881 }
4882 // OFFSET(n) -> n+1 (0-based)
4883 Expression::Function(ref f)
4884 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
4885 {
4886 if let Expression::Literal(lit) = &f.args[0] {
4887 if let Literal::Number(n) = lit.as_ref() {
4888 if let Ok(val) = n.parse::<i64>() {
4889 (
4890 Some(Expression::Literal(Box::new(Literal::Number(
4891 (val + 1).to_string(),
4892 )))),
4893 false,
4894 )
4895 } else {
4896 (
4897 Some(Expression::Add(Box::new(
4898 crate::expressions::BinaryOp::new(
4899 f.args[0].clone(),
4900 Expression::number(1),
4901 ),
4902 ))),
4903 false,
4904 )
4905 }
4906 } else {
4907 (None, false)
4908 }
4909 } else {
4910 (
4911 Some(Expression::Add(Box::new(
4912 crate::expressions::BinaryOp::new(
4913 f.args[0].clone(),
4914 Expression::number(1),
4915 ),
4916 ))),
4917 false,
4918 )
4919 }
4920 }
4921 // ORDINAL(n) -> n (already 1-based)
4922 Expression::Function(ref f)
4923 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
4924 {
4925 (Some(f.args[0].clone()), false)
4926 }
4927 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
4928 Expression::Function(ref f)
4929 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
4930 {
4931 if let Expression::Literal(lit) = &f.args[0] {
4932 if let Literal::Number(n) = lit.as_ref() {
4933 if let Ok(val) = n.parse::<i64>() {
4934 (
4935 Some(Expression::Literal(Box::new(Literal::Number(
4936 (val + 1).to_string(),
4937 )))),
4938 true,
4939 )
4940 } else {
4941 (
4942 Some(Expression::Add(Box::new(
4943 crate::expressions::BinaryOp::new(
4944 f.args[0].clone(),
4945 Expression::number(1),
4946 ),
4947 ))),
4948 true,
4949 )
4950 }
4951 } else {
4952 (None, false)
4953 }
4954 } else {
4955 (
4956 Some(Expression::Add(Box::new(
4957 crate::expressions::BinaryOp::new(
4958 f.args[0].clone(),
4959 Expression::number(1),
4960 ),
4961 ))),
4962 true,
4963 )
4964 }
4965 }
4966 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
4967 Expression::Function(ref f)
4968 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
4969 {
4970 (Some(f.args[0].clone()), true)
4971 }
4972 _ => (None, false),
4973 };
4974 if let Some(idx) = new_index {
4975 if is_safe
4976 && matches!(
4977 target,
4978 DialectType::Presto | DialectType::Trino | DialectType::Athena
4979 )
4980 {
4981 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
4982 return Ok(Expression::Function(Box::new(Function::new(
4983 "ELEMENT_AT".to_string(),
4984 vec![sub.this.clone(), idx],
4985 ))));
4986 } else {
4987 // DuckDB or non-safe: just use subscript with converted index
4988 return Ok(Expression::Subscript(Box::new(
4989 crate::expressions::Subscript {
4990 this: sub.this.clone(),
4991 index: idx,
4992 },
4993 )));
4994 }
4995 }
4996 }
4997 }
4998
4999 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
5000 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
5001 if let Expression::Length(ref uf) = e {
5002 let arg = uf.this.clone();
5003 let typeof_func = Expression::Function(Box::new(Function::new(
5004 "TYPEOF".to_string(),
5005 vec![arg.clone()],
5006 )));
5007 let blob_cast = Expression::Cast(Box::new(Cast {
5008 this: arg.clone(),
5009 to: DataType::VarBinary { length: None },
5010 trailing_comments: vec![],
5011 double_colon_syntax: false,
5012 format: None,
5013 default: None,
5014 inferred_type: None,
5015 }));
5016 let octet_length = Expression::Function(Box::new(Function::new(
5017 "OCTET_LENGTH".to_string(),
5018 vec![blob_cast],
5019 )));
5020 let text_cast = Expression::Cast(Box::new(Cast {
5021 this: arg,
5022 to: DataType::Text,
5023 trailing_comments: vec![],
5024 double_colon_syntax: false,
5025 format: None,
5026 default: None,
5027 inferred_type: None,
5028 }));
5029 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
5030 this: text_cast,
5031 original_name: None,
5032 inferred_type: None,
5033 }));
5034 return Ok(Expression::Case(Box::new(Case {
5035 operand: Some(typeof_func),
5036 whens: vec![(
5037 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
5038 octet_length,
5039 )],
5040 else_: Some(length_text),
5041 comments: Vec::new(),
5042 inferred_type: None,
5043 })));
5044 }
5045 }
5046
5047 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
5048 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
5049 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
5050 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
5051 if let Expression::Alias(ref a) = e {
5052 if matches!(&a.this, Expression::Unnest(_)) {
5053 if a.column_aliases.is_empty() {
5054 // Drop the entire alias, return just the UNNEST expression
5055 return Ok(a.this.clone());
5056 } else {
5057 // Use first column alias as the main alias
5058 let mut new_alias = a.as_ref().clone();
5059 new_alias.alias = a.column_aliases[0].clone();
5060 new_alias.column_aliases.clear();
5061 return Ok(Expression::Alias(Box::new(new_alias)));
5062 }
5063 }
5064 }
5065 }
5066
5067 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
5068 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
5069 if let Expression::In(ref in_expr) = e {
5070 if let Some(ref unnest_inner) = in_expr.unnest {
5071 // Build the function call for the target dialect
5072 let func_expr = if matches!(
5073 target,
5074 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5075 ) {
5076 // Use EXPLODE for Hive/Spark
5077 Expression::Function(Box::new(Function::new(
5078 "EXPLODE".to_string(),
5079 vec![*unnest_inner.clone()],
5080 )))
5081 } else {
5082 // Use UNNEST for Presto/Trino/DuckDB/etc.
5083 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
5084 this: *unnest_inner.clone(),
5085 expressions: Vec::new(),
5086 with_ordinality: false,
5087 alias: None,
5088 offset_alias: None,
5089 }))
5090 };
5091
5092 // Wrap in SELECT
5093 let mut inner_select = crate::expressions::Select::new();
5094 inner_select.expressions = vec![func_expr];
5095
5096 let subquery_expr = Expression::Select(Box::new(inner_select));
5097
5098 return Ok(Expression::In(Box::new(crate::expressions::In {
5099 this: in_expr.this.clone(),
5100 expressions: Vec::new(),
5101 query: Some(subquery_expr),
5102 not: in_expr.not,
5103 global: in_expr.global,
5104 unnest: None,
5105 is_field: false,
5106 })));
5107 }
5108 }
5109 }
5110
5111 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
5112 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
5113 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
5114 if let Expression::Alias(ref a) = e {
5115 if let Expression::Function(ref f) = a.this {
5116 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
5117 && !a.column_aliases.is_empty()
5118 {
5119 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
5120 let col_alias = a.column_aliases[0].clone();
5121 let mut inner_select = crate::expressions::Select::new();
5122 inner_select.expressions =
5123 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
5124 Expression::Identifier(Identifier::new("value".to_string())),
5125 col_alias,
5126 )))];
5127 inner_select.from = Some(crate::expressions::From {
5128 expressions: vec![a.this.clone()],
5129 });
5130 let subquery =
5131 Expression::Subquery(Box::new(crate::expressions::Subquery {
5132 this: Expression::Select(Box::new(inner_select)),
5133 alias: Some(a.alias.clone()),
5134 column_aliases: Vec::new(),
5135 order_by: None,
5136 limit: None,
5137 offset: None,
5138 lateral: false,
5139 modifiers_inside: false,
5140 trailing_comments: Vec::new(),
5141 distribute_by: None,
5142 sort_by: None,
5143 cluster_by: None,
5144 inferred_type: None,
5145 }));
5146 return Ok(subquery);
5147 }
5148 }
5149 }
5150 }
5151
5152 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
5153 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
5154 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
5155 if matches!(source, DialectType::BigQuery) {
5156 if let Expression::Select(ref s) = e {
5157 if let Some(ref from) = s.from {
5158 if from.expressions.len() >= 2 {
5159 // Collect table names from first expression
5160 let first_tables: Vec<String> = from
5161 .expressions
5162 .iter()
5163 .take(1)
5164 .filter_map(|expr| {
5165 if let Expression::Table(t) = expr {
5166 Some(t.name.name.to_ascii_lowercase())
5167 } else {
5168 None
5169 }
5170 })
5171 .collect();
5172
5173 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
5174 // or have a dotted name matching a table
5175 let mut needs_rewrite = false;
5176 for expr in from.expressions.iter().skip(1) {
5177 if let Expression::Table(t) = expr {
5178 if let Some(ref schema) = t.schema {
5179 if first_tables.contains(&schema.name.to_ascii_lowercase())
5180 {
5181 needs_rewrite = true;
5182 break;
5183 }
5184 }
5185 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
5186 if t.schema.is_none() && t.name.name.contains('.') {
5187 let parts: Vec<&str> = t.name.name.split('.').collect();
5188 if parts.len() >= 2
5189 && first_tables.contains(&parts[0].to_ascii_lowercase())
5190 {
5191 needs_rewrite = true;
5192 break;
5193 }
5194 }
5195 }
5196 }
5197
5198 if needs_rewrite {
5199 let mut new_select = s.clone();
5200 let mut new_from_exprs = vec![from.expressions[0].clone()];
5201 let mut new_joins = s.joins.clone();
5202
5203 for expr in from.expressions.iter().skip(1) {
5204 if let Expression::Table(ref t) = expr {
5205 if let Some(ref schema) = t.schema {
5206 if first_tables
5207 .contains(&schema.name.to_ascii_lowercase())
5208 {
5209 // This is an array path reference, convert to CROSS JOIN UNNEST
5210 let col_expr = Expression::Column(Box::new(
5211 crate::expressions::Column {
5212 name: t.name.clone(),
5213 table: Some(schema.clone()),
5214 join_mark: false,
5215 trailing_comments: vec![],
5216 span: None,
5217 inferred_type: None,
5218 },
5219 ));
5220 let unnest_expr = Expression::Unnest(Box::new(
5221 crate::expressions::UnnestFunc {
5222 this: col_expr,
5223 expressions: Vec::new(),
5224 with_ordinality: false,
5225 alias: None,
5226 offset_alias: None,
5227 },
5228 ));
5229 let join_this = if let Some(ref alias) = t.alias {
5230 if matches!(
5231 target,
5232 DialectType::Presto
5233 | DialectType::Trino
5234 | DialectType::Athena
5235 ) {
5236 // Presto: UNNEST(x) AS _t0(results)
5237 Expression::Alias(Box::new(
5238 crate::expressions::Alias {
5239 this: unnest_expr,
5240 alias: Identifier::new("_t0"),
5241 column_aliases: vec![alias.clone()],
5242 pre_alias_comments: vec![],
5243 trailing_comments: vec![],
5244 inferred_type: None,
5245 },
5246 ))
5247 } else {
5248 // BigQuery: UNNEST(x) AS results
5249 Expression::Alias(Box::new(
5250 crate::expressions::Alias {
5251 this: unnest_expr,
5252 alias: alias.clone(),
5253 column_aliases: vec![],
5254 pre_alias_comments: vec![],
5255 trailing_comments: vec![],
5256 inferred_type: None,
5257 },
5258 ))
5259 }
5260 } else {
5261 unnest_expr
5262 };
5263 new_joins.push(crate::expressions::Join {
5264 kind: crate::expressions::JoinKind::Cross,
5265 this: join_this,
5266 on: None,
5267 using: Vec::new(),
5268 use_inner_keyword: false,
5269 use_outer_keyword: false,
5270 deferred_condition: false,
5271 join_hint: None,
5272 match_condition: None,
5273 pivots: Vec::new(),
5274 comments: Vec::new(),
5275 nesting_group: 0,
5276 directed: false,
5277 });
5278 } else {
5279 new_from_exprs.push(expr.clone());
5280 }
5281 } else if t.schema.is_none() && t.name.name.contains('.') {
5282 // Dotted name in quoted identifier: `Coordinates.position`
5283 let parts: Vec<&str> = t.name.name.split('.').collect();
5284 if parts.len() >= 2
5285 && first_tables
5286 .contains(&parts[0].to_ascii_lowercase())
5287 {
5288 let join_this =
5289 if matches!(target, DialectType::BigQuery) {
5290 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
5291 Expression::Table(t.clone())
5292 } else {
5293 // Other targets: split into "schema"."name"
5294 let mut new_t = t.clone();
5295 new_t.schema =
5296 Some(Identifier::quoted(parts[0]));
5297 new_t.name = Identifier::quoted(parts[1]);
5298 Expression::Table(new_t)
5299 };
5300 new_joins.push(crate::expressions::Join {
5301 kind: crate::expressions::JoinKind::Cross,
5302 this: join_this,
5303 on: None,
5304 using: Vec::new(),
5305 use_inner_keyword: false,
5306 use_outer_keyword: false,
5307 deferred_condition: false,
5308 join_hint: None,
5309 match_condition: None,
5310 pivots: Vec::new(),
5311 comments: Vec::new(),
5312 nesting_group: 0,
5313 directed: false,
5314 });
5315 } else {
5316 new_from_exprs.push(expr.clone());
5317 }
5318 } else {
5319 new_from_exprs.push(expr.clone());
5320 }
5321 } else {
5322 new_from_exprs.push(expr.clone());
5323 }
5324 }
5325
5326 new_select.from = Some(crate::expressions::From {
5327 expressions: new_from_exprs,
5328 ..from.clone()
5329 });
5330 new_select.joins = new_joins;
5331 return Ok(Expression::Select(new_select));
5332 }
5333 }
5334 }
5335 }
5336 }
5337
5338 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
5339 if matches!(
5340 target,
5341 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5342 ) {
5343 if let Expression::Select(ref s) = e {
5344 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
5345 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
5346 matches!(expr, Expression::Unnest(_))
5347 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
5348 };
5349 let has_unnest_join = s.joins.iter().any(|j| {
5350 j.kind == crate::expressions::JoinKind::Cross && (
5351 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
5352 || is_unnest_or_explode_expr(&j.this)
5353 )
5354 });
5355 if has_unnest_join {
5356 let mut select = s.clone();
5357 let mut new_joins = Vec::new();
5358 for join in select.joins.drain(..) {
5359 if join.kind == crate::expressions::JoinKind::Cross {
5360 // Extract the UNNEST/EXPLODE from the join
5361 let (func_expr, table_alias, col_aliases) = match &join.this {
5362 Expression::Alias(a) => {
5363 let ta = if a.alias.is_empty() {
5364 None
5365 } else {
5366 Some(a.alias.clone())
5367 };
5368 let cas = a.column_aliases.clone();
5369 match &a.this {
5370 Expression::Unnest(u) => {
5371 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
5372 if !u.expressions.is_empty() {
5373 let mut all_args = vec![u.this.clone()];
5374 all_args.extend(u.expressions.clone());
5375 let arrays_zip =
5376 Expression::Function(Box::new(
5377 crate::expressions::Function::new(
5378 "ARRAYS_ZIP".to_string(),
5379 all_args,
5380 ),
5381 ));
5382 let inline = Expression::Function(Box::new(
5383 crate::expressions::Function::new(
5384 "INLINE".to_string(),
5385 vec![arrays_zip],
5386 ),
5387 ));
5388 (Some(inline), ta, a.column_aliases.clone())
5389 } else {
5390 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
5391 let func_name = if u.with_ordinality {
5392 "POSEXPLODE"
5393 } else {
5394 "EXPLODE"
5395 };
5396 let explode = Expression::Function(Box::new(
5397 crate::expressions::Function::new(
5398 func_name.to_string(),
5399 vec![u.this.clone()],
5400 ),
5401 ));
5402 // For POSEXPLODE, add 'pos' to column aliases
5403 let cas = if u.with_ordinality {
5404 let mut pos_aliases =
5405 vec![Identifier::new(
5406 "pos".to_string(),
5407 )];
5408 pos_aliases
5409 .extend(a.column_aliases.clone());
5410 pos_aliases
5411 } else {
5412 a.column_aliases.clone()
5413 };
5414 (Some(explode), ta, cas)
5415 }
5416 }
5417 Expression::Function(f)
5418 if f.name.eq_ignore_ascii_case("EXPLODE") =>
5419 {
5420 (Some(Expression::Function(f.clone())), ta, cas)
5421 }
5422 _ => (None, None, Vec::new()),
5423 }
5424 }
5425 Expression::Unnest(u) => {
5426 let func_name = if u.with_ordinality {
5427 "POSEXPLODE"
5428 } else {
5429 "EXPLODE"
5430 };
5431 let explode = Expression::Function(Box::new(
5432 crate::expressions::Function::new(
5433 func_name.to_string(),
5434 vec![u.this.clone()],
5435 ),
5436 ));
5437 let ta = u.alias.clone();
5438 let col_aliases = if u.with_ordinality {
5439 vec![Identifier::new("pos".to_string())]
5440 } else {
5441 Vec::new()
5442 };
5443 (Some(explode), ta, col_aliases)
5444 }
5445 _ => (None, None, Vec::new()),
5446 };
5447 if let Some(func) = func_expr {
5448 select.lateral_views.push(crate::expressions::LateralView {
5449 this: func,
5450 table_alias,
5451 column_aliases: col_aliases,
5452 outer: false,
5453 });
5454 } else {
5455 new_joins.push(join);
5456 }
5457 } else {
5458 new_joins.push(join);
5459 }
5460 }
5461 select.joins = new_joins;
5462 return Ok(Expression::Select(select));
5463 }
5464 }
5465 }
5466
5467 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
5468 // for BigQuery, Presto/Trino, Snowflake
5469 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
5470 && matches!(
5471 target,
5472 DialectType::BigQuery
5473 | DialectType::Presto
5474 | DialectType::Trino
5475 | DialectType::Snowflake
5476 )
5477 {
5478 if let Expression::Select(ref s) = e {
5479 // Check if any SELECT expressions contain UNNEST
5480 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
5481 let has_unnest_in_select = s.expressions.iter().any(|expr| {
5482 fn contains_unnest(e: &Expression) -> bool {
5483 match e {
5484 Expression::Unnest(_) => true,
5485 Expression::Function(f)
5486 if f.name.eq_ignore_ascii_case("UNNEST") =>
5487 {
5488 true
5489 }
5490 Expression::Alias(a) => contains_unnest(&a.this),
5491 Expression::Add(op)
5492 | Expression::Sub(op)
5493 | Expression::Mul(op)
5494 | Expression::Div(op) => {
5495 contains_unnest(&op.left) || contains_unnest(&op.right)
5496 }
5497 _ => false,
5498 }
5499 }
5500 contains_unnest(expr)
5501 });
5502
5503 if has_unnest_in_select {
5504 let rewritten = Self::rewrite_unnest_expansion(s, target);
5505 if let Some(new_select) = rewritten {
5506 return Ok(Expression::Select(Box::new(new_select)));
5507 }
5508 }
5509 }
5510 }
5511
5512 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
5513 // BigQuery '\n' -> PostgreSQL literal newline in string
5514 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
5515 {
5516 if let Expression::Literal(ref lit) = e {
5517 if let Literal::String(ref s) = lit.as_ref() {
5518 if s.contains("\\n")
5519 || s.contains("\\t")
5520 || s.contains("\\r")
5521 || s.contains("\\\\")
5522 {
5523 let converted = s
5524 .replace("\\n", "\n")
5525 .replace("\\t", "\t")
5526 .replace("\\r", "\r")
5527 .replace("\\\\", "\\");
5528 return Ok(Expression::Literal(Box::new(Literal::String(converted))));
5529 }
5530 }
5531 }
5532 }
5533
5534 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
5535 // when source != target (identity tests keep the Literal::Timestamp for native handling)
5536 if source != target {
5537 if let Expression::Literal(ref lit) = e {
5538 if let Literal::Timestamp(ref s) = lit.as_ref() {
5539 let s = s.clone();
5540 // MySQL: TIMESTAMP handling depends on source dialect
5541 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
5542 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
5543 if matches!(target, DialectType::MySQL) {
5544 if matches!(source, DialectType::BigQuery) {
5545 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
5546 return Ok(Expression::Function(Box::new(Function::new(
5547 "TIMESTAMP".to_string(),
5548 vec![Expression::Literal(Box::new(Literal::String(s)))],
5549 ))));
5550 } else {
5551 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
5552 return Ok(Expression::Cast(Box::new(Cast {
5553 this: Expression::Literal(Box::new(Literal::String(s))),
5554 to: DataType::Custom {
5555 name: "DATETIME".to_string(),
5556 },
5557 trailing_comments: Vec::new(),
5558 double_colon_syntax: false,
5559 format: None,
5560 default: None,
5561 inferred_type: None,
5562 })));
5563 }
5564 }
5565 let dt = match target {
5566 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
5567 name: "DATETIME".to_string(),
5568 },
5569 DialectType::Snowflake => {
5570 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
5571 if matches!(source, DialectType::BigQuery) {
5572 DataType::Custom {
5573 name: "TIMESTAMPTZ".to_string(),
5574 }
5575 } else if matches!(
5576 source,
5577 DialectType::PostgreSQL
5578 | DialectType::Redshift
5579 | DialectType::Snowflake
5580 ) {
5581 DataType::Timestamp {
5582 precision: None,
5583 timezone: false,
5584 }
5585 } else {
5586 DataType::Custom {
5587 name: "TIMESTAMPNTZ".to_string(),
5588 }
5589 }
5590 }
5591 DialectType::Spark | DialectType::Databricks => {
5592 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
5593 if matches!(source, DialectType::BigQuery) {
5594 DataType::Timestamp {
5595 precision: None,
5596 timezone: false,
5597 }
5598 } else {
5599 DataType::Custom {
5600 name: "TIMESTAMP_NTZ".to_string(),
5601 }
5602 }
5603 }
5604 DialectType::ClickHouse => DataType::Nullable {
5605 inner: Box::new(DataType::Custom {
5606 name: "DateTime".to_string(),
5607 }),
5608 },
5609 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
5610 name: "DATETIME2".to_string(),
5611 },
5612 DialectType::DuckDB => {
5613 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
5614 // or when the timestamp string explicitly has timezone info
5615 if matches!(source, DialectType::BigQuery)
5616 || Self::timestamp_string_has_timezone(&s)
5617 {
5618 DataType::Custom {
5619 name: "TIMESTAMPTZ".to_string(),
5620 }
5621 } else {
5622 DataType::Timestamp {
5623 precision: None,
5624 timezone: false,
5625 }
5626 }
5627 }
5628 _ => DataType::Timestamp {
5629 precision: None,
5630 timezone: false,
5631 },
5632 };
5633 return Ok(Expression::Cast(Box::new(Cast {
5634 this: Expression::Literal(Box::new(Literal::String(s))),
5635 to: dt,
5636 trailing_comments: vec![],
5637 double_colon_syntax: false,
5638 format: None,
5639 default: None,
5640 inferred_type: None,
5641 })));
5642 }
5643 }
5644 }
5645
5646 // PostgreSQL DELETE requires explicit AS for table aliases
5647 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
5648 if let Expression::Delete(ref del) = e {
5649 if del.alias.is_some() && !del.alias_explicit_as {
5650 let mut new_del = del.clone();
5651 new_del.alias_explicit_as = true;
5652 return Ok(Expression::Delete(new_del));
5653 }
5654 }
5655 }
5656
5657 // UNION/INTERSECT/EXCEPT DISTINCT handling:
5658 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
5659 // while others don't support it (Presto, Spark, DuckDB, etc.)
5660 {
5661 let needs_distinct =
5662 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
5663 let drop_distinct = matches!(
5664 target,
5665 DialectType::Presto
5666 | DialectType::Trino
5667 | DialectType::Athena
5668 | DialectType::Spark
5669 | DialectType::Databricks
5670 | DialectType::DuckDB
5671 | DialectType::Hive
5672 | DialectType::MySQL
5673 | DialectType::PostgreSQL
5674 | DialectType::SQLite
5675 | DialectType::TSQL
5676 | DialectType::Redshift
5677 | DialectType::Snowflake
5678 | DialectType::Oracle
5679 | DialectType::Teradata
5680 | DialectType::Drill
5681 | DialectType::Doris
5682 | DialectType::StarRocks
5683 );
5684 match &e {
5685 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
5686 let mut new_u = (**u).clone();
5687 new_u.distinct = true;
5688 return Ok(Expression::Union(Box::new(new_u)));
5689 }
5690 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
5691 let mut new_i = (**i).clone();
5692 new_i.distinct = true;
5693 return Ok(Expression::Intersect(Box::new(new_i)));
5694 }
5695 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
5696 let mut new_ex = (**ex).clone();
5697 new_ex.distinct = true;
5698 return Ok(Expression::Except(Box::new(new_ex)));
5699 }
5700 Expression::Union(u) if u.distinct && drop_distinct => {
5701 let mut new_u = (**u).clone();
5702 new_u.distinct = false;
5703 return Ok(Expression::Union(Box::new(new_u)));
5704 }
5705 Expression::Intersect(i) if i.distinct && drop_distinct => {
5706 let mut new_i = (**i).clone();
5707 new_i.distinct = false;
5708 return Ok(Expression::Intersect(Box::new(new_i)));
5709 }
5710 Expression::Except(ex) if ex.distinct && drop_distinct => {
5711 let mut new_ex = (**ex).clone();
5712 new_ex.distinct = false;
5713 return Ok(Expression::Except(Box::new(new_ex)));
5714 }
5715 _ => {}
5716 }
5717 }
5718
5719 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
5720 if matches!(target, DialectType::ClickHouse) {
5721 if let Expression::Function(ref f) = e {
5722 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
5723 let mut new_f = f.as_ref().clone();
5724 new_f.name = "map".to_string();
5725 return Ok(Expression::Function(Box::new(new_f)));
5726 }
5727 }
5728 }
5729
5730 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
5731 if matches!(target, DialectType::ClickHouse) {
5732 if let Expression::Intersect(ref i) = e {
5733 if i.all {
5734 let mut new_i = (**i).clone();
5735 new_i.all = false;
5736 return Ok(Expression::Intersect(Box::new(new_i)));
5737 }
5738 }
5739 }
5740
5741 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
5742 // Only from Generic source, to prevent double-wrapping
5743 if matches!(source, DialectType::Generic) {
5744 if let Expression::Div(ref op) = e {
5745 let cast_type = match target {
5746 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
5747 precision: None,
5748 scale: None,
5749 real_spelling: false,
5750 }),
5751 DialectType::Drill
5752 | DialectType::Trino
5753 | DialectType::Athena
5754 | DialectType::Presto => Some(DataType::Double {
5755 precision: None,
5756 scale: None,
5757 }),
5758 DialectType::PostgreSQL
5759 | DialectType::Redshift
5760 | DialectType::Materialize
5761 | DialectType::Teradata
5762 | DialectType::RisingWave => Some(DataType::Double {
5763 precision: None,
5764 scale: None,
5765 }),
5766 _ => None,
5767 };
5768 if let Some(dt) = cast_type {
5769 let cast_left = Expression::Cast(Box::new(Cast {
5770 this: op.left.clone(),
5771 to: dt,
5772 double_colon_syntax: false,
5773 trailing_comments: Vec::new(),
5774 format: None,
5775 default: None,
5776 inferred_type: None,
5777 }));
5778 let new_op = crate::expressions::BinaryOp {
5779 left: cast_left,
5780 right: op.right.clone(),
5781 left_comments: op.left_comments.clone(),
5782 operator_comments: op.operator_comments.clone(),
5783 trailing_comments: op.trailing_comments.clone(),
5784 inferred_type: None,
5785 };
5786 return Ok(Expression::Div(Box::new(new_op)));
5787 }
5788 }
5789 }
5790
5791 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
5792 if matches!(target, DialectType::DuckDB) {
5793 if let Expression::CreateDatabase(db) = e {
5794 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
5795 schema.if_not_exists = db.if_not_exists;
5796 return Ok(Expression::CreateSchema(Box::new(schema)));
5797 }
5798 if let Expression::DropDatabase(db) = e {
5799 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
5800 schema.if_exists = db.if_exists;
5801 return Ok(Expression::DropSchema(Box::new(schema)));
5802 }
5803 }
5804
5805 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
5806 if matches!(source, DialectType::ClickHouse)
5807 && !matches!(target, DialectType::ClickHouse)
5808 {
5809 if let Expression::Cast(ref c) = e {
5810 if let DataType::Custom { ref name } = c.to {
5811 if name.len() >= 9
5812 && name[..9].eq_ignore_ascii_case("NULLABLE(")
5813 && name.ends_with(")")
5814 {
5815 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
5816 let inner_upper = inner.to_ascii_uppercase();
5817 let new_dt = match inner_upper.as_str() {
5818 "DATETIME" | "DATETIME64" => DataType::Timestamp {
5819 precision: None,
5820 timezone: false,
5821 },
5822 "DATE" => DataType::Date,
5823 "INT64" | "BIGINT" => DataType::BigInt { length: None },
5824 "INT32" | "INT" | "INTEGER" => DataType::Int {
5825 length: None,
5826 integer_spelling: false,
5827 },
5828 "FLOAT64" | "DOUBLE" => DataType::Double {
5829 precision: None,
5830 scale: None,
5831 },
5832 "STRING" => DataType::Text,
5833 _ => DataType::Custom {
5834 name: inner.to_string(),
5835 },
5836 };
5837 let mut new_cast = c.clone();
5838 new_cast.to = new_dt;
5839 return Ok(Expression::Cast(new_cast));
5840 }
5841 }
5842 }
5843 }
5844
5845 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
5846 if matches!(target, DialectType::Snowflake) {
5847 if let Expression::ArrayConcatAgg(ref agg) = e {
5848 let mut agg_clone = agg.as_ref().clone();
5849 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
5850 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
5851 let flatten = Expression::Function(Box::new(Function::new(
5852 "ARRAY_FLATTEN".to_string(),
5853 vec![array_agg],
5854 )));
5855 return Ok(flatten);
5856 }
5857 }
5858
5859 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
5860 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
5861 if let Expression::ArrayConcatAgg(agg) = e {
5862 let arg = agg.this;
5863 return Ok(Expression::Function(Box::new(Function::new(
5864 "ARRAY_CONCAT_AGG".to_string(),
5865 vec![arg],
5866 ))));
5867 }
5868 }
5869
5870 // Determine what action to take by inspecting e immutably
5871 let action = {
5872 let source_propagates_nulls =
5873 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
5874 let target_ignores_nulls =
5875 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
5876
5877 match &e {
5878 Expression::Function(f) => {
5879 let name = f.name.to_ascii_uppercase();
5880 // DuckDB json(x) is a synonym for CAST(x AS JSON) — parses a string.
5881 // Map to JSON_PARSE(x) for Trino/Presto/Athena to preserve semantics.
5882 if name == "JSON"
5883 && f.args.len() == 1
5884 && matches!(source, DialectType::DuckDB)
5885 && matches!(
5886 target,
5887 DialectType::Presto | DialectType::Trino | DialectType::Athena
5888 )
5889 {
5890 Action::DuckDBJsonFuncToJsonParse
5891 // DuckDB json_valid(x) has no direct Trino equivalent; emit the
5892 // SQL:2016 `x IS JSON` predicate which has matching semantics.
5893 } else if name == "JSON_VALID"
5894 && f.args.len() == 1
5895 && matches!(source, DialectType::DuckDB)
5896 && matches!(
5897 target,
5898 DialectType::Presto | DialectType::Trino | DialectType::Athena
5899 )
5900 {
5901 Action::DuckDBJsonValidToIsJson
5902 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
5903 } else if (name == "DATE_PART" || name == "DATEPART")
5904 && f.args.len() == 2
5905 && matches!(target, DialectType::Snowflake)
5906 && !matches!(source, DialectType::Snowflake)
5907 && matches!(
5908 &f.args[0],
5909 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
5910 )
5911 {
5912 Action::DatePartUnquote
5913 } else if source_propagates_nulls
5914 && target_ignores_nulls
5915 && (name == "GREATEST" || name == "LEAST")
5916 && f.args.len() >= 2
5917 {
5918 Action::GreatestLeastNull
5919 } else if matches!(source, DialectType::Snowflake)
5920 && name == "ARRAY_GENERATE_RANGE"
5921 && f.args.len() >= 2
5922 {
5923 Action::ArrayGenerateRange
5924 } else if matches!(source, DialectType::Snowflake)
5925 && matches!(target, DialectType::DuckDB)
5926 && name == "DATE_TRUNC"
5927 && f.args.len() == 2
5928 {
5929 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
5930 // Logic based on Python sqlglot's input_type_preserved flag:
5931 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
5932 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
5933 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
5934 let unit_str = match &f.args[0] {
5935 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
5936 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
5937 Some(s.to_ascii_uppercase())
5938 }
5939 _ => None,
5940 };
5941 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
5942 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
5943 });
5944 match &f.args[1] {
5945 Expression::Cast(c) => match &c.to {
5946 DataType::Time { .. } => Action::DateTruncWrapCast,
5947 DataType::Custom { name }
5948 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
5949 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
5950 {
5951 Action::DateTruncWrapCast
5952 }
5953 DataType::Timestamp { timezone: true, .. } => {
5954 Action::DateTruncWrapCast
5955 }
5956 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
5957 DataType::Timestamp {
5958 timezone: false, ..
5959 } if is_date_unit => Action::DateTruncWrapCast,
5960 _ => Action::None,
5961 },
5962 _ => Action::None,
5963 }
5964 } else if matches!(source, DialectType::Snowflake)
5965 && matches!(target, DialectType::DuckDB)
5966 && name == "TO_DATE"
5967 && f.args.len() == 1
5968 && !matches!(
5969 &f.args[0],
5970 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
5971 )
5972 {
5973 Action::ToDateToCast
5974 } else if !matches!(source, DialectType::Redshift)
5975 && matches!(target, DialectType::Redshift)
5976 && name == "CONVERT_TIMEZONE"
5977 && (f.args.len() == 2 || f.args.len() == 3)
5978 {
5979 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
5980 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
5981 // The Redshift parser adds 'UTC' as default source_tz, but when
5982 // transpiling from other dialects, we should preserve the original form.
5983 Action::ConvertTimezoneToExpr
5984 } else if matches!(source, DialectType::Snowflake)
5985 && matches!(target, DialectType::DuckDB)
5986 && name == "REGEXP_REPLACE"
5987 && f.args.len() == 4
5988 && !matches!(
5989 &f.args[3],
5990 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
5991 )
5992 {
5993 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
5994 Action::RegexpReplaceSnowflakeToDuckDB
5995 } else if matches!(source, DialectType::Snowflake)
5996 && matches!(target, DialectType::DuckDB)
5997 && name == "REGEXP_REPLACE"
5998 && f.args.len() == 5
5999 {
6000 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB
6001 Action::RegexpReplacePositionSnowflakeToDuckDB
6002 } else if matches!(source, DialectType::Snowflake)
6003 && matches!(target, DialectType::DuckDB)
6004 && name == "REGEXP_SUBSTR"
6005 {
6006 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
6007 Action::RegexpSubstrSnowflakeToDuckDB
6008 } else if matches!(source, DialectType::Snowflake)
6009 && matches!(target, DialectType::Snowflake)
6010 && (name == "REGEXP_SUBSTR" || name == "REGEXP_SUBSTR_ALL")
6011 && f.args.len() == 6
6012 {
6013 // Snowflake identity: strip trailing group=0
6014 Action::RegexpSubstrSnowflakeIdentity
6015 } else if matches!(source, DialectType::Snowflake)
6016 && matches!(target, DialectType::DuckDB)
6017 && name == "REGEXP_SUBSTR_ALL"
6018 {
6019 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
6020 Action::RegexpSubstrAllSnowflakeToDuckDB
6021 } else if matches!(source, DialectType::Snowflake)
6022 && matches!(target, DialectType::DuckDB)
6023 && name == "REGEXP_COUNT"
6024 {
6025 // Snowflake REGEXP_COUNT -> DuckDB LENGTH(REGEXP_EXTRACT_ALL(...))
6026 Action::RegexpCountSnowflakeToDuckDB
6027 } else if matches!(source, DialectType::Snowflake)
6028 && matches!(target, DialectType::DuckDB)
6029 && name == "REGEXP_INSTR"
6030 {
6031 // Snowflake REGEXP_INSTR -> DuckDB complex CASE expression
6032 Action::RegexpInstrSnowflakeToDuckDB
6033 } else if matches!(source, DialectType::BigQuery)
6034 && matches!(target, DialectType::Snowflake)
6035 && name == "REGEXP_EXTRACT_ALL"
6036 {
6037 // BigQuery REGEXP_EXTRACT_ALL -> Snowflake REGEXP_SUBSTR_ALL
6038 Action::RegexpExtractAllToSnowflake
6039 } else if name == "_BQ_TO_HEX" {
6040 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
6041 Action::BigQueryToHexBare
6042 } else if matches!(source, DialectType::BigQuery)
6043 && !matches!(target, DialectType::BigQuery)
6044 {
6045 // BigQuery-specific functions that need to be converted to standard forms
6046 match name.as_str() {
6047 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
6048 | "DATE_DIFF"
6049 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
6050 | "DATETIME_ADD" | "DATETIME_SUB"
6051 | "TIME_ADD" | "TIME_SUB"
6052 | "DATE_ADD" | "DATE_SUB"
6053 | "SAFE_DIVIDE"
6054 | "GENERATE_UUID"
6055 | "COUNTIF"
6056 | "EDIT_DISTANCE"
6057 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
6058 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
6059 | "TO_HEX"
6060 | "TO_JSON_STRING"
6061 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
6062 | "DIV"
6063 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
6064 | "LAST_DAY"
6065 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
6066 | "REGEXP_CONTAINS"
6067 | "CONTAINS_SUBSTR"
6068 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
6069 | "SAFE_CAST"
6070 | "GENERATE_DATE_ARRAY"
6071 | "PARSE_DATE" | "PARSE_TIMESTAMP"
6072 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
6073 | "ARRAY_CONCAT"
6074 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
6075 | "INSTR"
6076 | "MD5" | "SHA1" | "SHA256" | "SHA512"
6077 | "GENERATE_UUID()" // just in case
6078 | "REGEXP_EXTRACT_ALL"
6079 | "REGEXP_EXTRACT"
6080 | "INT64"
6081 | "ARRAY_CONCAT_AGG"
6082 | "DATE_DIFF(" // just in case
6083 | "TO_HEX_MD5" // internal
6084 | "MOD"
6085 | "CONCAT"
6086 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
6087 | "STRUCT"
6088 | "ROUND"
6089 | "MAKE_INTERVAL"
6090 | "ARRAY_TO_STRING"
6091 | "PERCENTILE_CONT"
6092 => Action::BigQueryFunctionNormalize,
6093 "ARRAY" if matches!(target, DialectType::Snowflake)
6094 && f.args.len() == 1
6095 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
6096 => Action::BigQueryArraySelectAsStructToSnowflake,
6097 _ => Action::None,
6098 }
6099 } else if matches!(source, DialectType::BigQuery)
6100 && matches!(target, DialectType::BigQuery)
6101 {
6102 // BigQuery -> BigQuery normalizations
6103 match name.as_str() {
6104 "TIMESTAMP_DIFF"
6105 | "DATETIME_DIFF"
6106 | "TIME_DIFF"
6107 | "DATE_DIFF"
6108 | "DATE_ADD"
6109 | "TO_HEX"
6110 | "CURRENT_TIMESTAMP"
6111 | "CURRENT_DATE"
6112 | "CURRENT_TIME"
6113 | "CURRENT_DATETIME"
6114 | "GENERATE_DATE_ARRAY"
6115 | "INSTR"
6116 | "FORMAT_DATETIME"
6117 | "DATETIME"
6118 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
6119 _ => Action::None,
6120 }
6121 } else {
6122 // Generic function normalization for non-BigQuery sources
6123 match name.as_str() {
6124 "ARBITRARY" | "AGGREGATE"
6125 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
6126 | "STRUCT_EXTRACT"
6127 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
6128 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
6129 | "SUBSTRINGINDEX"
6130 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
6131 | "UNICODE"
6132 | "XOR"
6133 | "ARRAY_REVERSE_SORT"
6134 | "ENCODE" | "DECODE"
6135 | "QUANTILE"
6136 | "EPOCH" | "EPOCH_MS"
6137 | "HASHBYTES"
6138 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
6139 | "APPROX_DISTINCT"
6140 | "DATE_PARSE" | "FORMAT_DATETIME"
6141 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
6142 | "RLIKE"
6143 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
6144 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
6145 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
6146 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
6147 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
6148 | "MAP" | "MAP_FROM_ENTRIES"
6149 | "COLLECT_LIST" | "COLLECT_SET"
6150 | "ISNAN" | "IS_NAN"
6151 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
6152 | "FORMAT_NUMBER"
6153 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
6154 | "ELEMENT_AT"
6155 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
6156 | "SPLIT_PART"
6157 // GENERATE_SERIES: handled separately below
6158 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
6159 | "JSON_QUERY" | "JSON_VALUE"
6160 | "JSON_SEARCH"
6161 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
6162 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
6163 | "CURDATE" | "CURTIME"
6164 | "ARRAY_TO_STRING"
6165 | "ARRAY_SORT" | "SORT_ARRAY"
6166 | "LEFT" | "RIGHT"
6167 | "MAP_FROM_ARRAYS"
6168 | "LIKE" | "ILIKE"
6169 | "ARRAY_CONCAT" | "LIST_CONCAT"
6170 | "QUANTILE_CONT" | "QUANTILE_DISC"
6171 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
6172 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
6173 | "LOCATE" | "STRPOS" | "INSTR"
6174 | "CHAR"
6175 // CONCAT: handled separately for COALESCE wrapping
6176 | "ARRAY_JOIN"
6177 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
6178 | "ISNULL"
6179 | "MONTHNAME"
6180 | "TO_TIMESTAMP"
6181 | "TO_DATE"
6182 | "TO_JSON"
6183 | "REGEXP_SPLIT"
6184 | "SPLIT"
6185 | "FORMATDATETIME"
6186 | "ARRAYJOIN"
6187 | "SPLITBYSTRING" | "SPLITBYREGEXP"
6188 | "NVL"
6189 | "TO_CHAR"
6190 | "DBMS_RANDOM.VALUE"
6191 | "REGEXP_LIKE"
6192 | "REPLICATE"
6193 | "LEN"
6194 | "COUNT_BIG"
6195 | "DATEFROMPARTS"
6196 | "DATETIMEFROMPARTS"
6197 | "CONVERT" | "TRY_CONVERT"
6198 | "STRFTIME" | "STRPTIME"
6199 | "DATE_FORMAT" | "FORMAT_DATE"
6200 | "PARSE_TIMESTAMP" | "PARSE_DATE"
6201 | "FROM_BASE64" | "TO_BASE64"
6202 | "GETDATE"
6203 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
6204 | "TO_UTF8" | "FROM_UTF8"
6205 | "STARTS_WITH" | "STARTSWITH"
6206 | "APPROX_COUNT_DISTINCT"
6207 | "JSON_FORMAT"
6208 | "SYSDATE"
6209 | "LOGICAL_OR" | "LOGICAL_AND"
6210 | "MONTHS_ADD"
6211 | "SCHEMA_NAME"
6212 | "STRTOL"
6213 | "EDITDIST3"
6214 | "FORMAT"
6215 | "LIST_CONTAINS" | "LIST_HAS"
6216 | "VARIANCE" | "STDDEV"
6217 | "ISINF"
6218 | "TO_UNIXTIME"
6219 | "FROM_UNIXTIME"
6220 | "DATEPART" | "DATE_PART"
6221 | "DATENAME"
6222 | "STRING_AGG"
6223 | "JSON_ARRAYAGG"
6224 | "APPROX_QUANTILE"
6225 | "MAKE_DATE"
6226 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
6227 | "RANGE"
6228 | "TRY_ELEMENT_AT"
6229 | "STR_TO_MAP"
6230 | "STRING"
6231 | "STR_TO_TIME"
6232 | "CURRENT_SCHEMA"
6233 | "LTRIM" | "RTRIM"
6234 | "UUID"
6235 | "FARM_FINGERPRINT"
6236 | "JSON_KEYS"
6237 | "WEEKOFYEAR"
6238 | "CONCAT_WS"
6239 | "ARRAY_SLICE"
6240 | "ARRAY_PREPEND"
6241 | "ARRAY_REMOVE"
6242 | "GENERATE_DATE_ARRAY"
6243 | "PARSE_JSON"
6244 | "JSON_REMOVE"
6245 | "JSON_SET"
6246 | "LEVENSHTEIN"
6247 | "CURRENT_VERSION"
6248 | "ARRAY_MAX"
6249 | "ARRAY_MIN"
6250 | "JAROWINKLER_SIMILARITY"
6251 | "CURRENT_SCHEMAS"
6252 | "TO_VARIANT"
6253 | "JSON_GROUP_ARRAY" | "JSON_GROUP_OBJECT"
6254 | "ARRAYS_OVERLAP" | "ARRAY_INTERSECTION"
6255 => Action::GenericFunctionNormalize,
6256 // Canonical date functions -> dialect-specific
6257 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
6258 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
6259 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
6260 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
6261 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
6262 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
6263 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
6264 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
6265 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
6266 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
6267 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
6268 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
6269 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
6270 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
6271 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
6272 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
6273 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
6274 // STR_TO_DATE(x, fmt) -> dialect-specific
6275 "STR_TO_DATE" if f.args.len() == 2
6276 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
6277 "STR_TO_DATE" => Action::GenericFunctionNormalize,
6278 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
6279 "TS_OR_DS_ADD" if f.args.len() == 3
6280 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
6281 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
6282 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
6283 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
6284 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
6285 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
6286 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
6287 // IS_ASCII(x) -> dialect-specific
6288 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
6289 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
6290 "STR_POSITION" => Action::StrPositionConvert,
6291 // ARRAY_SUM -> dialect-specific
6292 "ARRAY_SUM" => Action::ArraySumConvert,
6293 // ARRAY_SIZE -> dialect-specific (Drill only)
6294 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
6295 // ARRAY_ANY -> dialect-specific
6296 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
6297 // Functions needing specific cross-dialect transforms
6298 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
6299 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
6300 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
6301 "ARRAY" if matches!(source, DialectType::BigQuery)
6302 && matches!(target, DialectType::Snowflake)
6303 && f.args.len() == 1
6304 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
6305 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
6306 "TRUNC" if f.args.len() == 2 && matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
6307 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 && !f.args.get(1).map_or(false, |a| matches!(a, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) => Action::GenericFunctionNormalize,
6308 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
6309 "DATE_TRUNC" if f.args.len() == 2
6310 && matches!(source, DialectType::Generic)
6311 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
6312 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
6313 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
6314 "TIMESTAMP_TRUNC" if f.args.len() >= 2
6315 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
6316 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
6317 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
6318 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6319 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
6320 // GENERATE_SERIES with interval normalization for PG target
6321 "GENERATE_SERIES" if f.args.len() >= 3
6322 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6323 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
6324 "GENERATE_SERIES" => Action::None, // passthrough for other cases
6325 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
6326 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6327 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
6328 "CONCAT" => Action::GenericFunctionNormalize,
6329 // DIV(a, b) -> target-specific integer division
6330 "DIV" if f.args.len() == 2
6331 && matches!(source, DialectType::PostgreSQL)
6332 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
6333 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
6334 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
6335 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
6336 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
6337 "JSONB_EXISTS" if f.args.len() == 2
6338 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
6339 // DATE_BIN -> TIME_BUCKET for DuckDB
6340 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
6341 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
6342 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
6343 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
6344 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
6345 // ClickHouse any -> ANY_VALUE for other dialects
6346 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
6347 _ => Action::None,
6348 }
6349 }
6350 }
6351 Expression::AggregateFunction(af) => {
6352 let name = af.name.to_ascii_uppercase();
6353 match name.as_str() {
6354 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
6355 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
6356 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
6357 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
6358 if matches!(target, DialectType::DuckDB) =>
6359 {
6360 Action::JsonObjectAggConvert
6361 }
6362 "ARRAY_AGG"
6363 if matches!(
6364 target,
6365 DialectType::Hive
6366 | DialectType::Spark
6367 | DialectType::Databricks
6368 ) =>
6369 {
6370 Action::ArrayAggToCollectList
6371 }
6372 "MAX_BY" | "MIN_BY"
6373 if matches!(
6374 target,
6375 DialectType::ClickHouse
6376 | DialectType::Spark
6377 | DialectType::Databricks
6378 | DialectType::DuckDB
6379 ) =>
6380 {
6381 Action::MaxByMinByConvert
6382 }
6383 "COLLECT_LIST"
6384 if matches!(
6385 target,
6386 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
6387 ) =>
6388 {
6389 Action::CollectListToArrayAgg
6390 }
6391 "COLLECT_SET"
6392 if matches!(
6393 target,
6394 DialectType::Presto
6395 | DialectType::Trino
6396 | DialectType::Snowflake
6397 | DialectType::DuckDB
6398 ) =>
6399 {
6400 Action::CollectSetConvert
6401 }
6402 "PERCENTILE"
6403 if matches!(
6404 target,
6405 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6406 ) =>
6407 {
6408 Action::PercentileConvert
6409 }
6410 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
6411 "CORR"
6412 if matches!(target, DialectType::DuckDB)
6413 && matches!(source, DialectType::Snowflake) =>
6414 {
6415 Action::CorrIsnanWrap
6416 }
6417 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6418 "APPROX_QUANTILES"
6419 if matches!(source, DialectType::BigQuery)
6420 && matches!(target, DialectType::DuckDB) =>
6421 {
6422 Action::BigQueryApproxQuantiles
6423 }
6424 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
6425 "PERCENTILE_CONT"
6426 if matches!(source, DialectType::BigQuery)
6427 && matches!(target, DialectType::DuckDB)
6428 && af.args.len() >= 2 =>
6429 {
6430 Action::BigQueryPercentileContToDuckDB
6431 }
6432 _ => Action::None,
6433 }
6434 }
6435 Expression::JSONArrayAgg(_) => match target {
6436 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
6437 _ => Action::None,
6438 },
6439 Expression::ToNumber(tn) => {
6440 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
6441 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
6442 match target {
6443 DialectType::Oracle
6444 | DialectType::Snowflake
6445 | DialectType::Teradata => Action::None,
6446 _ => Action::GenericFunctionNormalize,
6447 }
6448 } else {
6449 Action::None
6450 }
6451 }
6452 Expression::Nvl2(_) => {
6453 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
6454 // Keep as NVL2 for dialects that support it natively
6455 match target {
6456 DialectType::Oracle
6457 | DialectType::Snowflake
6458 | DialectType::Teradata
6459 | DialectType::Spark
6460 | DialectType::Databricks
6461 | DialectType::Redshift => Action::None,
6462 _ => Action::Nvl2Expand,
6463 }
6464 }
6465 Expression::Decode(_) | Expression::DecodeCase(_) => {
6466 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
6467 // Keep as DECODE for Oracle/Snowflake
6468 match target {
6469 DialectType::Oracle | DialectType::Snowflake => Action::None,
6470 _ => Action::DecodeSimplify,
6471 }
6472 }
6473 Expression::Coalesce(ref cf) => {
6474 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
6475 // BigQuery keeps IFNULL natively when source is also BigQuery
6476 if cf.original_name.as_deref() == Some("IFNULL")
6477 && !(matches!(source, DialectType::BigQuery)
6478 && matches!(target, DialectType::BigQuery))
6479 {
6480 Action::IfnullToCoalesce
6481 } else {
6482 Action::None
6483 }
6484 }
6485 Expression::IfFunc(if_func) => {
6486 if matches!(source, DialectType::Snowflake)
6487 && matches!(
6488 target,
6489 DialectType::Presto | DialectType::Trino | DialectType::SQLite
6490 )
6491 && matches!(if_func.false_value, Some(Expression::Div(_)))
6492 {
6493 Action::Div0TypedDivision
6494 } else {
6495 Action::None
6496 }
6497 }
6498 Expression::ToJson(_) => match target {
6499 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
6500 DialectType::BigQuery => Action::ToJsonConvert,
6501 DialectType::DuckDB => Action::ToJsonConvert,
6502 _ => Action::None,
6503 },
6504 Expression::ArrayAgg(ref agg) => {
6505 if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
6506 Action::ArrayAggToGroupConcat
6507 } else if matches!(
6508 target,
6509 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6510 ) {
6511 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
6512 Action::ArrayAggToCollectList
6513 } else if matches!(
6514 source,
6515 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6516 ) && matches!(target, DialectType::DuckDB)
6517 && agg.filter.is_some()
6518 {
6519 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
6520 // Need to add NOT x IS NULL to existing filter
6521 Action::ArrayAggNullFilter
6522 } else if matches!(target, DialectType::DuckDB)
6523 && agg.ignore_nulls == Some(true)
6524 && !agg.order_by.is_empty()
6525 {
6526 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
6527 Action::ArrayAggIgnoreNullsDuckDB
6528 } else if !matches!(source, DialectType::Snowflake) {
6529 Action::None
6530 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6531 let is_array_agg = agg.name.as_deref().map_or(false, |n| n.eq_ignore_ascii_case("ARRAY_AGG"))
6532 || agg.name.is_none();
6533 if is_array_agg {
6534 Action::ArrayAggCollectList
6535 } else {
6536 Action::None
6537 }
6538 } else if matches!(
6539 target,
6540 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6541 ) && agg.filter.is_none()
6542 {
6543 Action::ArrayAggFilter
6544 } else {
6545 Action::None
6546 }
6547 }
6548 Expression::WithinGroup(wg) => {
6549 if matches!(source, DialectType::Snowflake)
6550 && matches!(
6551 target,
6552 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6553 )
6554 && matches!(wg.this, Expression::ArrayAgg(_))
6555 {
6556 Action::ArrayAggWithinGroupFilter
6557 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
6558 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
6559 || matches!(&wg.this, Expression::StringAgg(_))
6560 {
6561 Action::StringAggConvert
6562 } else if matches!(
6563 target,
6564 DialectType::Presto
6565 | DialectType::Trino
6566 | DialectType::Athena
6567 | DialectType::Spark
6568 | DialectType::Databricks
6569 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
6570 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
6571 || matches!(&wg.this, Expression::PercentileCont(_)))
6572 {
6573 Action::PercentileContConvert
6574 } else {
6575 Action::None
6576 }
6577 }
6578 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
6579 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
6580 // DATETIME is the timezone-unaware type
6581 Expression::Cast(ref c) => {
6582 if c.format.is_some()
6583 && (matches!(source, DialectType::BigQuery)
6584 || matches!(source, DialectType::Teradata))
6585 {
6586 Action::BigQueryCastFormat
6587 } else if matches!(target, DialectType::BigQuery)
6588 && !matches!(source, DialectType::BigQuery)
6589 && matches!(
6590 c.to,
6591 DataType::Timestamp {
6592 timezone: false,
6593 ..
6594 }
6595 )
6596 {
6597 Action::CastTimestampToDatetime
6598 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
6599 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
6600 && matches!(
6601 c.to,
6602 DataType::Timestamp {
6603 timezone: false,
6604 ..
6605 }
6606 )
6607 {
6608 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
6609 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
6610 Action::CastTimestampToDatetime
6611 } else if matches!(
6612 source,
6613 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6614 ) && matches!(
6615 target,
6616 DialectType::Presto
6617 | DialectType::Trino
6618 | DialectType::Athena
6619 | DialectType::DuckDB
6620 | DialectType::Snowflake
6621 | DialectType::BigQuery
6622 | DialectType::Databricks
6623 | DialectType::TSQL
6624 ) {
6625 Action::HiveCastToTryCast
6626 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6627 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
6628 {
6629 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
6630 Action::CastTimestamptzToFunc
6631 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6632 && matches!(
6633 target,
6634 DialectType::Hive
6635 | DialectType::Spark
6636 | DialectType::Databricks
6637 | DialectType::BigQuery
6638 )
6639 {
6640 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
6641 Action::CastTimestampStripTz
6642 } else if matches!(&c.to, DataType::Json)
6643 && matches!(&c.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
6644 && matches!(
6645 target,
6646 DialectType::Presto
6647 | DialectType::Trino
6648 | DialectType::Athena
6649 | DialectType::Snowflake
6650 )
6651 {
6652 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
6653 // Only when the input is a string literal (JSON 'value' syntax)
6654 Action::JsonLiteralToJsonParse
6655 } else if matches!(&c.to, DataType::Json)
6656 && matches!(source, DialectType::DuckDB)
6657 && matches!(
6658 target,
6659 DialectType::Presto | DialectType::Trino | DialectType::Athena
6660 )
6661 {
6662 // DuckDB's CAST(x AS JSON) parses the string value into a JSON value.
6663 // Trino/Presto/Athena's CAST(x AS JSON) instead wraps the value as a
6664 // JSON string (no parsing) — different semantics. Use JSON_PARSE(x)
6665 // in the target to preserve DuckDB's parse semantics.
6666 Action::JsonLiteralToJsonParse
6667 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
6668 && matches!(target, DialectType::Spark | DialectType::Databricks)
6669 {
6670 // CAST(x AS JSON) -> TO_JSON(x) for Spark
6671 Action::CastToJsonForSpark
6672 } else if (matches!(
6673 &c.to,
6674 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
6675 )) && matches!(
6676 target,
6677 DialectType::Spark | DialectType::Databricks
6678 ) && (matches!(&c.this, Expression::ParseJson(_))
6679 || matches!(
6680 &c.this,
6681 Expression::Function(f)
6682 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
6683 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
6684 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
6685 ))
6686 {
6687 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
6688 // -> FROM_JSON(..., type_string) for Spark
6689 Action::CastJsonToFromJson
6690 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6691 && matches!(
6692 c.to,
6693 DataType::Timestamp {
6694 timezone: false,
6695 ..
6696 }
6697 )
6698 && matches!(source, DialectType::DuckDB)
6699 {
6700 Action::StrftimeCastTimestamp
6701 } else if matches!(source, DialectType::DuckDB)
6702 && matches!(
6703 c.to,
6704 DataType::Decimal {
6705 precision: None,
6706 ..
6707 }
6708 )
6709 {
6710 Action::DecimalDefaultPrecision
6711 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
6712 && matches!(c.to, DataType::Char { length: None })
6713 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
6714 {
6715 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
6716 Action::MysqlCastCharToText
6717 } else if matches!(
6718 source,
6719 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6720 ) && matches!(
6721 target,
6722 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6723 ) && Self::has_varchar_char_type(&c.to)
6724 {
6725 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
6726 Action::SparkCastVarcharToString
6727 } else {
6728 Action::None
6729 }
6730 }
6731 Expression::SafeCast(ref c) => {
6732 if c.format.is_some()
6733 && matches!(source, DialectType::BigQuery)
6734 && !matches!(target, DialectType::BigQuery)
6735 {
6736 Action::BigQueryCastFormat
6737 } else {
6738 Action::None
6739 }
6740 }
6741 Expression::TryCast(ref c) => {
6742 if matches!(&c.to, DataType::Json)
6743 && matches!(source, DialectType::DuckDB)
6744 && matches!(
6745 target,
6746 DialectType::Presto | DialectType::Trino | DialectType::Athena
6747 )
6748 {
6749 // DuckDB's TRY_CAST(x AS JSON) tries to parse x as JSON, returning
6750 // NULL on parse failure. Trino/Presto/Athena's TRY_CAST(x AS JSON)
6751 // wraps the value as a JSON string (no parse). Emit TRY(JSON_PARSE(x))
6752 // to preserve DuckDB's parse-or-null semantics.
6753 Action::DuckDBTryCastJsonToTryJsonParse
6754 } else {
6755 Action::None
6756 }
6757 }
6758 // For DuckDB: DATE_TRUNC should preserve the input type
6759 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
6760 if matches!(source, DialectType::Snowflake)
6761 && matches!(target, DialectType::DuckDB)
6762 {
6763 Action::DateTruncWrapCast
6764 } else {
6765 Action::None
6766 }
6767 }
6768 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
6769 Expression::SetStatement(s) => {
6770 if matches!(target, DialectType::DuckDB)
6771 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
6772 && s.items.iter().any(|item| item.kind.is_none())
6773 {
6774 Action::SetToVariable
6775 } else {
6776 Action::None
6777 }
6778 }
6779 // Cross-dialect NULL ordering normalization.
6780 // When nulls_first is not specified, fill in the source dialect's implied
6781 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
6782 Expression::Ordered(o) => {
6783 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
6784 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
6785 Action::MysqlNullsOrdering
6786 } else {
6787 // Skip targets that don't support NULLS FIRST/LAST syntax
6788 let target_supports_nulls = !matches!(
6789 target,
6790 DialectType::MySQL
6791 | DialectType::TSQL
6792 | DialectType::StarRocks
6793 | DialectType::Doris
6794 );
6795 if o.nulls_first.is_none() && source != target && target_supports_nulls
6796 {
6797 Action::NullsOrdering
6798 } else {
6799 Action::None
6800 }
6801 }
6802 }
6803 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
6804 Expression::DataType(dt) => {
6805 if matches!(source, DialectType::BigQuery)
6806 && !matches!(target, DialectType::BigQuery)
6807 {
6808 match dt {
6809 DataType::Custom { ref name }
6810 if name.eq_ignore_ascii_case("INT64")
6811 || name.eq_ignore_ascii_case("FLOAT64")
6812 || name.eq_ignore_ascii_case("BOOL")
6813 || name.eq_ignore_ascii_case("BYTES")
6814 || name.eq_ignore_ascii_case("NUMERIC")
6815 || name.eq_ignore_ascii_case("STRING")
6816 || name.eq_ignore_ascii_case("DATETIME") =>
6817 {
6818 Action::BigQueryCastType
6819 }
6820 _ => Action::None,
6821 }
6822 } else if matches!(source, DialectType::TSQL) {
6823 // For TSQL source -> any target (including TSQL itself for REAL)
6824 match dt {
6825 // REAL -> FLOAT even for TSQL->TSQL
6826 DataType::Custom { ref name }
6827 if name.eq_ignore_ascii_case("REAL") =>
6828 {
6829 Action::TSQLTypeNormalize
6830 }
6831 DataType::Float {
6832 real_spelling: true,
6833 ..
6834 } => Action::TSQLTypeNormalize,
6835 // Other TSQL type normalizations only for non-TSQL targets
6836 DataType::Custom { ref name }
6837 if !matches!(target, DialectType::TSQL)
6838 && (name.eq_ignore_ascii_case("MONEY")
6839 || name.eq_ignore_ascii_case("SMALLMONEY")
6840 || name.eq_ignore_ascii_case("DATETIME2")
6841 || name.eq_ignore_ascii_case("IMAGE")
6842 || name.eq_ignore_ascii_case("BIT")
6843 || name.eq_ignore_ascii_case("ROWVERSION")
6844 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
6845 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
6846 || (name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC"))
6847 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("DATETIME2("))
6848 || (name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME("))) =>
6849 {
6850 Action::TSQLTypeNormalize
6851 }
6852 DataType::Float {
6853 precision: Some(_), ..
6854 } if !matches!(target, DialectType::TSQL) => {
6855 Action::TSQLTypeNormalize
6856 }
6857 DataType::TinyInt { .. }
6858 if !matches!(target, DialectType::TSQL) =>
6859 {
6860 Action::TSQLTypeNormalize
6861 }
6862 // INTEGER -> INT for Databricks/Spark targets
6863 DataType::Int {
6864 integer_spelling: true,
6865 ..
6866 } if matches!(
6867 target,
6868 DialectType::Databricks | DialectType::Spark
6869 ) =>
6870 {
6871 Action::TSQLTypeNormalize
6872 }
6873 _ => Action::None,
6874 }
6875 } else if (matches!(source, DialectType::Oracle)
6876 || matches!(source, DialectType::Generic))
6877 && !matches!(target, DialectType::Oracle)
6878 {
6879 match dt {
6880 DataType::Custom { ref name }
6881 if (name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2("))
6882 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2("))
6883 || name.eq_ignore_ascii_case("VARCHAR2")
6884 || name.eq_ignore_ascii_case("NVARCHAR2") =>
6885 {
6886 Action::OracleVarchar2ToVarchar
6887 }
6888 _ => Action::None,
6889 }
6890 } else if matches!(target, DialectType::Snowflake)
6891 && !matches!(source, DialectType::Snowflake)
6892 {
6893 // When target is Snowflake but source is NOT Snowflake,
6894 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
6895 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
6896 // should keep their FLOAT spelling.
6897 match dt {
6898 DataType::Float { .. } => Action::SnowflakeFloatProtect,
6899 _ => Action::None,
6900 }
6901 } else {
6902 Action::None
6903 }
6904 }
6905 // LOWER patterns from BigQuery TO_HEX conversions:
6906 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
6907 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
6908 Expression::Lower(uf) => {
6909 if matches!(source, DialectType::BigQuery) {
6910 match &uf.this {
6911 Expression::Lower(_) => Action::BigQueryToHexLower,
6912 Expression::Function(f)
6913 if f.name == "TO_HEX"
6914 && matches!(target, DialectType::BigQuery) =>
6915 {
6916 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
6917 Action::BigQueryToHexLower
6918 }
6919 _ => Action::None,
6920 }
6921 } else {
6922 Action::None
6923 }
6924 }
6925 // UPPER patterns from BigQuery TO_HEX conversions:
6926 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
6927 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
6928 Expression::Upper(uf) => {
6929 if matches!(source, DialectType::BigQuery) {
6930 match &uf.this {
6931 Expression::Lower(_) => Action::BigQueryToHexUpper,
6932 _ => Action::None,
6933 }
6934 } else {
6935 Action::None
6936 }
6937 }
6938 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
6939 // Snowflake supports LAST_DAY with unit, so keep it there
6940 Expression::LastDay(ld) => {
6941 if matches!(source, DialectType::BigQuery)
6942 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
6943 && ld.unit.is_some()
6944 {
6945 Action::BigQueryLastDayStripUnit
6946 } else {
6947 Action::None
6948 }
6949 }
6950 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
6951 Expression::SafeDivide(_) => {
6952 if matches!(source, DialectType::BigQuery)
6953 && !matches!(target, DialectType::BigQuery)
6954 {
6955 Action::BigQuerySafeDivide
6956 } else {
6957 Action::None
6958 }
6959 }
6960 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
6961 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
6962 Expression::AnyValue(ref agg) => {
6963 if matches!(source, DialectType::BigQuery)
6964 && matches!(target, DialectType::DuckDB)
6965 && agg.having_max.is_some()
6966 {
6967 Action::BigQueryAnyValueHaving
6968 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6969 && !matches!(source, DialectType::Spark | DialectType::Databricks)
6970 && agg.ignore_nulls.is_none()
6971 {
6972 Action::AnyValueIgnoreNulls
6973 } else {
6974 Action::None
6975 }
6976 }
6977 Expression::Any(ref q) => {
6978 if matches!(source, DialectType::PostgreSQL)
6979 && matches!(
6980 target,
6981 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6982 )
6983 && q.op.is_some()
6984 && !matches!(
6985 q.subquery,
6986 Expression::Select(_) | Expression::Subquery(_)
6987 )
6988 {
6989 Action::AnyToExists
6990 } else {
6991 Action::None
6992 }
6993 }
6994 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6995 // Snowflake RLIKE does full-string match; DuckDB REGEXP_FULL_MATCH also does full-string match
6996 Expression::RegexpLike(_)
6997 if matches!(source, DialectType::Snowflake)
6998 && matches!(target, DialectType::DuckDB) =>
6999 {
7000 Action::RlikeSnowflakeToDuckDB
7001 }
7002 // RegexpLike from non-DuckDB/non-Snowflake sources -> REGEXP_MATCHES for DuckDB target
7003 Expression::RegexpLike(_)
7004 if !matches!(source, DialectType::DuckDB)
7005 && matches!(target, DialectType::DuckDB) =>
7006 {
7007 Action::RegexpLikeToDuckDB
7008 }
7009 // RegexpLike -> Exasol: anchor pattern with .*...*
7010 Expression::RegexpLike(_)
7011 if matches!(target, DialectType::Exasol) =>
7012 {
7013 Action::RegexpLikeExasolAnchor
7014 }
7015 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
7016 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
7017 Expression::Div(ref op)
7018 if matches!(
7019 source,
7020 DialectType::MySQL
7021 | DialectType::DuckDB
7022 | DialectType::SingleStore
7023 | DialectType::TiDB
7024 | DialectType::ClickHouse
7025 | DialectType::Doris
7026 ) && matches!(
7027 target,
7028 DialectType::PostgreSQL
7029 | DialectType::Redshift
7030 | DialectType::Drill
7031 | DialectType::Trino
7032 | DialectType::Presto
7033 | DialectType::Athena
7034 | DialectType::TSQL
7035 | DialectType::Teradata
7036 | DialectType::SQLite
7037 | DialectType::BigQuery
7038 | DialectType::Snowflake
7039 | DialectType::Databricks
7040 | DialectType::Oracle
7041 | DialectType::Materialize
7042 | DialectType::RisingWave
7043 ) =>
7044 {
7045 // Only wrap if RHS is not already NULLIF
7046 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
7047 {
7048 Action::MySQLSafeDivide
7049 } else {
7050 Action::None
7051 }
7052 }
7053 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
7054 // For TSQL/Fabric, convert to sp_rename instead
7055 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
7056 if let Some(crate::expressions::AlterTableAction::RenameTable(
7057 ref new_tbl,
7058 )) = at.actions.first()
7059 {
7060 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
7061 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
7062 Action::AlterTableToSpRename
7063 } else if new_tbl.schema.is_some()
7064 && matches!(
7065 target,
7066 DialectType::BigQuery
7067 | DialectType::Doris
7068 | DialectType::StarRocks
7069 | DialectType::DuckDB
7070 | DialectType::PostgreSQL
7071 | DialectType::Redshift
7072 )
7073 {
7074 Action::AlterTableRenameStripSchema
7075 } else {
7076 Action::None
7077 }
7078 } else {
7079 Action::None
7080 }
7081 }
7082 // EPOCH(x) expression -> target-specific epoch conversion
7083 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
7084 Action::EpochConvert
7085 }
7086 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
7087 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
7088 Action::EpochMsConvert
7089 }
7090 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
7091 Expression::StringAgg(_) => {
7092 if matches!(
7093 target,
7094 DialectType::MySQL
7095 | DialectType::SingleStore
7096 | DialectType::Doris
7097 | DialectType::StarRocks
7098 | DialectType::SQLite
7099 ) {
7100 Action::StringAggConvert
7101 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
7102 Action::StringAggConvert
7103 } else {
7104 Action::None
7105 }
7106 }
7107 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
7108 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
7109 Expression::GroupConcat(_) => Action::GroupConcatConvert,
7110 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
7111 // DuckDB CARDINALITY -> keep as CARDINALITY for DuckDB target (used for maps)
7112 Expression::Cardinality(_)
7113 if matches!(source, DialectType::DuckDB)
7114 && matches!(target, DialectType::DuckDB) =>
7115 {
7116 Action::None
7117 }
7118 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
7119 Action::ArrayLengthConvert
7120 }
7121 Expression::ArraySize(_) => {
7122 if matches!(target, DialectType::Drill) {
7123 Action::ArraySizeDrill
7124 } else {
7125 Action::ArrayLengthConvert
7126 }
7127 }
7128 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
7129 Expression::ArrayRemove(_) => match target {
7130 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
7131 Action::ArrayRemoveConvert
7132 }
7133 _ => Action::None,
7134 },
7135 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
7136 Expression::ArrayReverse(_) => match target {
7137 DialectType::ClickHouse => Action::ArrayReverseConvert,
7138 _ => Action::None,
7139 },
7140 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
7141 Expression::JsonKeys(_) => match target {
7142 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
7143 Action::JsonKeysConvert
7144 }
7145 _ => Action::None,
7146 },
7147 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
7148 Expression::ParseJson(_) => match target {
7149 DialectType::SQLite
7150 | DialectType::Doris
7151 | DialectType::MySQL
7152 | DialectType::StarRocks => Action::ParseJsonStrip,
7153 _ => Action::None,
7154 },
7155 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
7156 Expression::WeekOfYear(_)
7157 if matches!(target, DialectType::Snowflake)
7158 && !matches!(source, DialectType::Snowflake) =>
7159 {
7160 Action::WeekOfYearToWeekIso
7161 }
7162 // NVL: clear original_name so generator uses dialect-specific function names
7163 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
7164 // XOR: expand for dialects that don't support the XOR keyword
7165 Expression::Xor(_) => {
7166 let target_supports_xor = matches!(
7167 target,
7168 DialectType::MySQL
7169 | DialectType::SingleStore
7170 | DialectType::Doris
7171 | DialectType::StarRocks
7172 );
7173 if !target_supports_xor {
7174 Action::XorExpand
7175 } else {
7176 Action::None
7177 }
7178 }
7179 // TSQL #table -> temp table normalization (CREATE TABLE)
7180 Expression::CreateTable(ct)
7181 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7182 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
7183 && ct.name.name.name.starts_with('#') =>
7184 {
7185 Action::TempTableHash
7186 }
7187 // TSQL #table -> strip # from table references in SELECT/etc.
7188 Expression::Table(tr)
7189 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7190 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
7191 && tr.name.name.starts_with('#') =>
7192 {
7193 Action::TempTableHash
7194 }
7195 // TSQL #table -> strip # from DROP TABLE names
7196 Expression::DropTable(ref dt)
7197 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7198 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
7199 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
7200 {
7201 Action::TempTableHash
7202 }
7203 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
7204 Expression::JsonExtract(_)
7205 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
7206 {
7207 Action::JsonExtractToTsql
7208 }
7209 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
7210 Expression::JsonExtractScalar(_)
7211 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
7212 {
7213 Action::JsonExtractToTsql
7214 }
7215 // JSON_EXTRACT -> JSONExtractString for ClickHouse
7216 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
7217 Action::JsonExtractToClickHouse
7218 }
7219 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
7220 Expression::JsonExtractScalar(_)
7221 if matches!(target, DialectType::ClickHouse) =>
7222 {
7223 Action::JsonExtractToClickHouse
7224 }
7225 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
7226 Expression::JsonExtract(ref f)
7227 if !f.arrow_syntax
7228 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
7229 {
7230 Action::JsonExtractToArrow
7231 }
7232 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
7233 Expression::JsonExtract(ref f)
7234 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
7235 && !matches!(
7236 source,
7237 DialectType::PostgreSQL
7238 | DialectType::Redshift
7239 | DialectType::Materialize
7240 )
7241 && matches!(&f.path, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$'))) =>
7242 {
7243 Action::JsonExtractToGetJsonObject
7244 }
7245 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
7246 Expression::JsonExtract(_)
7247 if matches!(
7248 target,
7249 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7250 ) =>
7251 {
7252 Action::JsonExtractToGetJsonObject
7253 }
7254 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
7255 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
7256 Expression::JsonExtractScalar(ref f)
7257 if !f.arrow_syntax
7258 && !f.hash_arrow_syntax
7259 && matches!(
7260 target,
7261 DialectType::PostgreSQL
7262 | DialectType::Redshift
7263 | DialectType::Snowflake
7264 | DialectType::SQLite
7265 | DialectType::DuckDB
7266 ) =>
7267 {
7268 Action::JsonExtractScalarConvert
7269 }
7270 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
7271 Expression::JsonExtractScalar(_)
7272 if matches!(
7273 target,
7274 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7275 ) =>
7276 {
7277 Action::JsonExtractScalarToGetJsonObject
7278 }
7279 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
7280 Expression::JsonExtract(ref f)
7281 if !f.arrow_syntax
7282 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
7283 {
7284 Action::JsonPathNormalize
7285 }
7286 // JsonQuery (parsed JSON_QUERY) -> target-specific
7287 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
7288 // JsonValue (parsed JSON_VALUE) -> target-specific
7289 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
7290 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
7291 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
7292 Expression::AtTimeZone(_)
7293 if matches!(
7294 target,
7295 DialectType::Presto
7296 | DialectType::Trino
7297 | DialectType::Athena
7298 | DialectType::Spark
7299 | DialectType::Databricks
7300 | DialectType::BigQuery
7301 | DialectType::Snowflake
7302 ) =>
7303 {
7304 Action::AtTimeZoneConvert
7305 }
7306 // DAY_OF_WEEK -> dialect-specific
7307 Expression::DayOfWeek(_)
7308 if matches!(
7309 target,
7310 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
7311 ) =>
7312 {
7313 Action::DayOfWeekConvert
7314 }
7315 // CURRENT_USER -> CURRENT_USER() for Snowflake
7316 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
7317 Action::CurrentUserParens
7318 }
7319 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
7320 Expression::ElementAt(_)
7321 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
7322 {
7323 Action::ElementAtConvert
7324 }
7325 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
7326 Expression::ArrayFunc(ref arr)
7327 if !arr.bracket_notation
7328 && matches!(
7329 target,
7330 DialectType::Spark
7331 | DialectType::Databricks
7332 | DialectType::Hive
7333 | DialectType::BigQuery
7334 | DialectType::DuckDB
7335 | DialectType::Snowflake
7336 | DialectType::Presto
7337 | DialectType::Trino
7338 | DialectType::Athena
7339 | DialectType::ClickHouse
7340 | DialectType::StarRocks
7341 ) =>
7342 {
7343 Action::ArraySyntaxConvert
7344 }
7345 // VARIANCE expression -> varSamp for ClickHouse
7346 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
7347 Action::VarianceToClickHouse
7348 }
7349 // STDDEV expression -> stddevSamp for ClickHouse
7350 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
7351 Action::StddevToClickHouse
7352 }
7353 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
7354 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
7355 Action::ApproxQuantileConvert
7356 }
7357 // MonthsBetween -> target-specific
7358 Expression::MonthsBetween(_)
7359 if !matches!(
7360 target,
7361 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7362 ) =>
7363 {
7364 Action::MonthsBetweenConvert
7365 }
7366 // AddMonths -> target-specific DATEADD/DATE_ADD
7367 Expression::AddMonths(_) => Action::AddMonthsConvert,
7368 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
7369 Expression::MapFromArrays(_)
7370 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
7371 {
7372 Action::MapFromArraysConvert
7373 }
7374 // CURRENT_USER -> CURRENT_USER() for Spark
7375 Expression::CurrentUser(_)
7376 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
7377 {
7378 Action::CurrentUserSparkParens
7379 }
7380 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
7381 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
7382 if matches!(
7383 source,
7384 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7385 ) && matches!(&f.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
7386 && matches!(
7387 target,
7388 DialectType::DuckDB
7389 | DialectType::Presto
7390 | DialectType::Trino
7391 | DialectType::Athena
7392 | DialectType::PostgreSQL
7393 | DialectType::Redshift
7394 ) =>
7395 {
7396 Action::SparkDateFuncCast
7397 }
7398 // $parameter -> @parameter for BigQuery
7399 Expression::Parameter(ref p)
7400 if matches!(target, DialectType::BigQuery)
7401 && matches!(source, DialectType::DuckDB)
7402 && (p.style == crate::expressions::ParameterStyle::Dollar
7403 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
7404 {
7405 Action::DollarParamConvert
7406 }
7407 // EscapeString literal: normalize literal newlines to \n
7408 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::EscapeString(ref s) if s.contains('\n') || s.contains('\r') || s.contains('\t'))
7409 =>
7410 {
7411 Action::EscapeStringNormalize
7412 }
7413 // straight_join: keep lowercase for DuckDB, quote for MySQL
7414 Expression::Column(ref col)
7415 if col.name.name == "STRAIGHT_JOIN"
7416 && col.table.is_none()
7417 && matches!(source, DialectType::DuckDB)
7418 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
7419 {
7420 Action::StraightJoinCase
7421 }
7422 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
7423 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
7424 Expression::Interval(ref iv)
7425 if matches!(
7426 target,
7427 DialectType::Snowflake
7428 | DialectType::PostgreSQL
7429 | DialectType::Redshift
7430 ) && iv.unit.is_some()
7431 && iv.this.as_ref().map_or(false, |t| matches!(t, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) =>
7432 {
7433 Action::SnowflakeIntervalFormat
7434 }
7435 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
7436 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
7437 if let Some(ref sample) = ts.sample {
7438 if !sample.explicit_method {
7439 Action::TablesampleReservoir
7440 } else {
7441 Action::None
7442 }
7443 } else {
7444 Action::None
7445 }
7446 }
7447 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
7448 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
7449 Expression::TableSample(ref ts)
7450 if matches!(target, DialectType::Snowflake)
7451 && !matches!(source, DialectType::Snowflake)
7452 && ts.sample.is_some() =>
7453 {
7454 if let Some(ref sample) = ts.sample {
7455 if !sample.explicit_method {
7456 Action::TablesampleSnowflakeStrip
7457 } else {
7458 Action::None
7459 }
7460 } else {
7461 Action::None
7462 }
7463 }
7464 Expression::Table(ref t)
7465 if matches!(target, DialectType::Snowflake)
7466 && !matches!(source, DialectType::Snowflake)
7467 && t.table_sample.is_some() =>
7468 {
7469 if let Some(ref sample) = t.table_sample {
7470 if !sample.explicit_method {
7471 Action::TablesampleSnowflakeStrip
7472 } else {
7473 Action::None
7474 }
7475 } else {
7476 Action::None
7477 }
7478 }
7479 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
7480 Expression::AlterTable(ref at)
7481 if matches!(target, DialectType::TSQL | DialectType::Fabric)
7482 && !at.actions.is_empty()
7483 && matches!(
7484 at.actions.first(),
7485 Some(crate::expressions::AlterTableAction::RenameTable(_))
7486 ) =>
7487 {
7488 Action::AlterTableToSpRename
7489 }
7490 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
7491 Expression::Subscript(ref sub)
7492 if matches!(
7493 target,
7494 DialectType::BigQuery
7495 | DialectType::Hive
7496 | DialectType::Spark
7497 | DialectType::Databricks
7498 ) && matches!(
7499 source,
7500 DialectType::DuckDB
7501 | DialectType::PostgreSQL
7502 | DialectType::Presto
7503 | DialectType::Trino
7504 | DialectType::Redshift
7505 | DialectType::ClickHouse
7506 ) && matches!(&sub.index, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(ref n) if n.parse::<i64>().unwrap_or(0) > 0)) =>
7507 {
7508 Action::ArrayIndexConvert
7509 }
7510 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
7511 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
7512 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
7513 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
7514 Expression::WindowFunction(ref wf) => {
7515 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
7516 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
7517 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
7518 if matches!(target, DialectType::BigQuery)
7519 && !is_row_number
7520 && !wf.over.order_by.is_empty()
7521 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
7522 {
7523 Action::BigQueryNullsOrdering
7524 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
7525 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
7526 } else {
7527 let source_nulls_last = matches!(source, DialectType::DuckDB);
7528 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
7529 matches!(
7530 f.kind,
7531 crate::expressions::WindowFrameKind::Range
7532 | crate::expressions::WindowFrameKind::Groups
7533 )
7534 });
7535 if source_nulls_last
7536 && matches!(target, DialectType::MySQL)
7537 && !wf.over.order_by.is_empty()
7538 && wf.over.order_by.iter().any(|o| !o.desc)
7539 && !has_range_frame
7540 {
7541 Action::MysqlNullsLastRewrite
7542 } else {
7543 // Check for Snowflake window frame handling for FIRST_VALUE/LAST_VALUE/NTH_VALUE
7544 let is_ranking_window_func = matches!(
7545 &wf.this,
7546 Expression::FirstValue(_)
7547 | Expression::LastValue(_)
7548 | Expression::NthValue(_)
7549 );
7550 let has_full_unbounded_frame = wf.over.frame.as_ref().map_or(false, |f| {
7551 matches!(f.kind, crate::expressions::WindowFrameKind::Rows)
7552 && matches!(f.start, crate::expressions::WindowFrameBound::UnboundedPreceding)
7553 && matches!(f.end, Some(crate::expressions::WindowFrameBound::UnboundedFollowing))
7554 && f.exclude.is_none()
7555 });
7556 if is_ranking_window_func && matches!(source, DialectType::Snowflake) {
7557 if has_full_unbounded_frame && matches!(target, DialectType::Snowflake) {
7558 // Strip the default frame for Snowflake target
7559 Action::SnowflakeWindowFrameStrip
7560 } else if !has_full_unbounded_frame && wf.over.frame.is_none() && !matches!(target, DialectType::Snowflake) {
7561 // Add default frame for non-Snowflake target
7562 Action::SnowflakeWindowFrameAdd
7563 } else {
7564 match &wf.this {
7565 Expression::FirstValue(ref vf)
7566 | Expression::LastValue(ref vf)
7567 if vf.ignore_nulls == Some(false) =>
7568 {
7569 match target {
7570 DialectType::SQLite => Action::RespectNullsConvert,
7571 _ => Action::None,
7572 }
7573 }
7574 _ => Action::None,
7575 }
7576 }
7577 } else {
7578 match &wf.this {
7579 Expression::FirstValue(ref vf)
7580 | Expression::LastValue(ref vf)
7581 if vf.ignore_nulls == Some(false) =>
7582 {
7583 // RESPECT NULLS
7584 match target {
7585 DialectType::SQLite | DialectType::PostgreSQL => {
7586 Action::RespectNullsConvert
7587 }
7588 _ => Action::None,
7589 }
7590 }
7591 _ => Action::None,
7592 }
7593 }
7594 }
7595 }
7596 }
7597 // CREATE TABLE a LIKE b -> dialect-specific transformations
7598 Expression::CreateTable(ref ct)
7599 if ct.columns.is_empty()
7600 && ct.constraints.iter().any(|c| {
7601 matches!(c, crate::expressions::TableConstraint::Like { .. })
7602 })
7603 && matches!(
7604 target,
7605 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
7606 ) =>
7607 {
7608 Action::CreateTableLikeToCtas
7609 }
7610 Expression::CreateTable(ref ct)
7611 if ct.columns.is_empty()
7612 && ct.constraints.iter().any(|c| {
7613 matches!(c, crate::expressions::TableConstraint::Like { .. })
7614 })
7615 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
7616 {
7617 Action::CreateTableLikeToSelectInto
7618 }
7619 Expression::CreateTable(ref ct)
7620 if ct.columns.is_empty()
7621 && ct.constraints.iter().any(|c| {
7622 matches!(c, crate::expressions::TableConstraint::Like { .. })
7623 })
7624 && matches!(target, DialectType::ClickHouse) =>
7625 {
7626 Action::CreateTableLikeToAs
7627 }
7628 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
7629 Expression::CreateTable(ref ct)
7630 if matches!(target, DialectType::DuckDB)
7631 && matches!(
7632 source,
7633 DialectType::DuckDB
7634 | DialectType::Spark
7635 | DialectType::Databricks
7636 | DialectType::Hive
7637 ) =>
7638 {
7639 let has_comment = ct.columns.iter().any(|c| {
7640 c.comment.is_some()
7641 || c.constraints.iter().any(|con| {
7642 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
7643 })
7644 });
7645 let has_props = !ct.properties.is_empty();
7646 if has_comment || has_props {
7647 Action::CreateTableStripComment
7648 } else {
7649 Action::None
7650 }
7651 }
7652 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
7653 Expression::Array(_)
7654 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
7655 {
7656 Action::ArrayConcatBracketConvert
7657 }
7658 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
7659 Expression::ArrayFunc(ref arr)
7660 if arr.bracket_notation
7661 && matches!(source, DialectType::BigQuery)
7662 && matches!(target, DialectType::Redshift) =>
7663 {
7664 Action::ArrayConcatBracketConvert
7665 }
7666 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
7667 Expression::BitwiseOrAgg(ref f)
7668 | Expression::BitwiseAndAgg(ref f)
7669 | Expression::BitwiseXorAgg(ref f) => {
7670 if matches!(target, DialectType::DuckDB) {
7671 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
7672 if let Expression::Cast(ref c) = f.this {
7673 match &c.to {
7674 DataType::Float { .. }
7675 | DataType::Double { .. }
7676 | DataType::Decimal { .. } => Action::BitAggFloatCast,
7677 DataType::Custom { ref name }
7678 if name.eq_ignore_ascii_case("REAL") =>
7679 {
7680 Action::BitAggFloatCast
7681 }
7682 _ => Action::None,
7683 }
7684 } else {
7685 Action::None
7686 }
7687 } else if matches!(target, DialectType::Snowflake) {
7688 Action::BitAggSnowflakeRename
7689 } else {
7690 Action::None
7691 }
7692 }
7693 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
7694 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
7695 Action::FilterToIff
7696 }
7697 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
7698 Expression::Avg(ref f)
7699 | Expression::Sum(ref f)
7700 | Expression::Min(ref f)
7701 | Expression::Max(ref f)
7702 | Expression::CountIf(ref f)
7703 | Expression::Stddev(ref f)
7704 | Expression::StddevPop(ref f)
7705 | Expression::StddevSamp(ref f)
7706 | Expression::Variance(ref f)
7707 | Expression::VarPop(ref f)
7708 | Expression::VarSamp(ref f)
7709 | Expression::Median(ref f)
7710 | Expression::Mode(ref f)
7711 | Expression::First(ref f)
7712 | Expression::Last(ref f)
7713 | Expression::ApproxDistinct(ref f)
7714 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7715 {
7716 Action::AggFilterToIff
7717 }
7718 Expression::Count(ref c)
7719 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7720 {
7721 Action::AggFilterToIff
7722 }
7723 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
7724 Expression::Count(ref c)
7725 if c.distinct
7726 && matches!(&c.this, Some(Expression::Tuple(_)))
7727 && matches!(
7728 target,
7729 DialectType::Presto
7730 | DialectType::Trino
7731 | DialectType::DuckDB
7732 | DialectType::PostgreSQL
7733 ) =>
7734 {
7735 Action::CountDistinctMultiArg
7736 }
7737 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
7738 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
7739 Action::JsonToGetPath
7740 }
7741 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
7742 Expression::Struct(_)
7743 if matches!(
7744 target,
7745 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7746 ) && matches!(source, DialectType::DuckDB) =>
7747 {
7748 Action::StructToRow
7749 }
7750 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
7751 Expression::MapFunc(ref m)
7752 if m.curly_brace_syntax
7753 && matches!(
7754 target,
7755 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7756 )
7757 && matches!(source, DialectType::DuckDB) =>
7758 {
7759 Action::StructToRow
7760 }
7761 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
7762 Expression::ApproxCountDistinct(_)
7763 if matches!(
7764 target,
7765 DialectType::Presto | DialectType::Trino | DialectType::Athena
7766 ) =>
7767 {
7768 Action::ApproxCountDistinctToApproxDistinct
7769 }
7770 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
7771 Expression::ArrayContains(_)
7772 if matches!(
7773 target,
7774 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
7775 ) && !(matches!(source, DialectType::Snowflake) && matches!(target, DialectType::Snowflake)) =>
7776 {
7777 Action::ArrayContainsConvert
7778 }
7779 // ARRAY_CONTAINS -> DuckDB NULL-aware CASE (from Snowflake source with check_null semantics)
7780 Expression::ArrayContains(_)
7781 if matches!(target, DialectType::DuckDB)
7782 && matches!(source, DialectType::Snowflake) =>
7783 {
7784 Action::ArrayContainsDuckDBConvert
7785 }
7786 // ARRAY_EXCEPT -> target-specific conversion
7787 Expression::ArrayExcept(_)
7788 if matches!(
7789 target,
7790 DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena
7791 ) =>
7792 {
7793 Action::ArrayExceptConvert
7794 }
7795 // ARRAY_POSITION -> swap args for Snowflake target (only when source is not Snowflake)
7796 Expression::ArrayPosition(_)
7797 if matches!(target, DialectType::Snowflake)
7798 && !matches!(source, DialectType::Snowflake) =>
7799 {
7800 Action::ArrayPositionSnowflakeSwap
7801 }
7802 // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB from Snowflake source
7803 Expression::ArrayPosition(_)
7804 if matches!(target, DialectType::DuckDB)
7805 && matches!(source, DialectType::Snowflake) =>
7806 {
7807 Action::SnowflakeArrayPositionToDuckDB
7808 }
7809 // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
7810 Expression::ArrayDistinct(_)
7811 if matches!(target, DialectType::ClickHouse) =>
7812 {
7813 Action::ArrayDistinctClickHouse
7814 }
7815 // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
7816 Expression::ArrayDistinct(_)
7817 if matches!(target, DialectType::DuckDB)
7818 && matches!(source, DialectType::Snowflake) =>
7819 {
7820 Action::ArrayDistinctConvert
7821 }
7822 // StrPosition with position -> complex expansion for Presto/DuckDB
7823 // STRPOS doesn't support a position arg in these dialects
7824 Expression::StrPosition(ref sp)
7825 if sp.position.is_some()
7826 && matches!(
7827 target,
7828 DialectType::Presto
7829 | DialectType::Trino
7830 | DialectType::Athena
7831 | DialectType::DuckDB
7832 ) =>
7833 {
7834 Action::StrPositionExpand
7835 }
7836 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
7837 Expression::First(ref f)
7838 if f.ignore_nulls == Some(true)
7839 && matches!(target, DialectType::DuckDB) =>
7840 {
7841 Action::FirstToAnyValue
7842 }
7843 // BEGIN -> START TRANSACTION for Presto/Trino
7844 Expression::Command(ref cmd)
7845 if cmd.this.eq_ignore_ascii_case("BEGIN")
7846 && matches!(
7847 target,
7848 DialectType::Presto | DialectType::Trino | DialectType::Athena
7849 ) =>
7850 {
7851 // Handled inline below
7852 Action::None // We'll handle it directly
7853 }
7854 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
7855 // PostgreSQL # is parsed as BitwiseXor (which is correct).
7856 // a || b (Concat operator) -> CONCAT function for Presto/Trino
7857 Expression::Concat(ref _op)
7858 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7859 && matches!(target, DialectType::Presto | DialectType::Trino) =>
7860 {
7861 Action::PipeConcatToConcat
7862 }
7863 _ => Action::None,
7864 }
7865 };
7866
7867 match action {
7868 Action::None => {
7869 // Handle inline transforms that don't need a dedicated action
7870
7871 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
7872 if let Expression::Between(ref b) = e {
7873 if let Some(sym) = b.symmetric {
7874 let keeps_symmetric =
7875 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
7876 if !keeps_symmetric {
7877 if sym {
7878 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
7879 let b = if let Expression::Between(b) = e {
7880 *b
7881 } else {
7882 unreachable!()
7883 };
7884 let between1 = Expression::Between(Box::new(
7885 crate::expressions::Between {
7886 this: b.this.clone(),
7887 low: b.low.clone(),
7888 high: b.high.clone(),
7889 not: b.not,
7890 symmetric: None,
7891 },
7892 ));
7893 let between2 = Expression::Between(Box::new(
7894 crate::expressions::Between {
7895 this: b.this,
7896 low: b.high,
7897 high: b.low,
7898 not: b.not,
7899 symmetric: None,
7900 },
7901 ));
7902 return Ok(Expression::Paren(Box::new(
7903 crate::expressions::Paren {
7904 this: Expression::Or(Box::new(
7905 crate::expressions::BinaryOp::new(
7906 between1, between2,
7907 ),
7908 )),
7909 trailing_comments: vec![],
7910 },
7911 )));
7912 } else {
7913 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
7914 let b = if let Expression::Between(b) = e {
7915 *b
7916 } else {
7917 unreachable!()
7918 };
7919 return Ok(Expression::Between(Box::new(
7920 crate::expressions::Between {
7921 this: b.this,
7922 low: b.low,
7923 high: b.high,
7924 not: b.not,
7925 symmetric: None,
7926 },
7927 )));
7928 }
7929 }
7930 }
7931 }
7932
7933 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
7934 if let Expression::ILike(ref _like) = e {
7935 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
7936 let like = if let Expression::ILike(l) = e {
7937 *l
7938 } else {
7939 unreachable!()
7940 };
7941 let lower_left = Expression::Function(Box::new(Function::new(
7942 "LOWER".to_string(),
7943 vec![like.left],
7944 )));
7945 let lower_right = Expression::Function(Box::new(Function::new(
7946 "LOWER".to_string(),
7947 vec![like.right],
7948 )));
7949 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
7950 left: lower_left,
7951 right: lower_right,
7952 escape: like.escape,
7953 quantifier: like.quantifier,
7954 inferred_type: None,
7955 })));
7956 }
7957 }
7958
7959 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
7960 if let Expression::MethodCall(ref mc) = e {
7961 if matches!(source, DialectType::Oracle)
7962 && mc.method.name.eq_ignore_ascii_case("VALUE")
7963 && mc.args.is_empty()
7964 {
7965 let is_dbms_random = match &mc.this {
7966 Expression::Identifier(id) => {
7967 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
7968 }
7969 Expression::Column(col) => {
7970 col.table.is_none()
7971 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
7972 }
7973 _ => false,
7974 };
7975 if is_dbms_random {
7976 let func_name = match target {
7977 DialectType::PostgreSQL
7978 | DialectType::Redshift
7979 | DialectType::DuckDB
7980 | DialectType::SQLite => "RANDOM",
7981 DialectType::Oracle => "DBMS_RANDOM.VALUE",
7982 _ => "RAND",
7983 };
7984 return Ok(Expression::Function(Box::new(Function::new(
7985 func_name.to_string(),
7986 vec![],
7987 ))));
7988 }
7989 }
7990 }
7991 // TRIM without explicit position -> add BOTH for ClickHouse
7992 if let Expression::Trim(ref trim) = e {
7993 if matches!(target, DialectType::ClickHouse)
7994 && trim.sql_standard_syntax
7995 && trim.characters.is_some()
7996 && !trim.position_explicit
7997 {
7998 let mut new_trim = (**trim).clone();
7999 new_trim.position_explicit = true;
8000 return Ok(Expression::Trim(Box::new(new_trim)));
8001 }
8002 }
8003 // BEGIN -> START TRANSACTION for Presto/Trino
8004 if let Expression::Transaction(ref txn) = e {
8005 if matches!(
8006 target,
8007 DialectType::Presto | DialectType::Trino | DialectType::Athena
8008 ) {
8009 // Convert BEGIN to START TRANSACTION by setting mark to "START"
8010 let mut txn = txn.clone();
8011 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
8012 "START".to_string(),
8013 ))));
8014 return Ok(Expression::Transaction(Box::new(*txn)));
8015 }
8016 }
8017 // IS TRUE/FALSE -> simplified forms for Presto/Trino
8018 if matches!(
8019 target,
8020 DialectType::Presto | DialectType::Trino | DialectType::Athena
8021 ) {
8022 match &e {
8023 Expression::IsTrue(itf) if !itf.not => {
8024 // x IS TRUE -> x
8025 return Ok(itf.this.clone());
8026 }
8027 Expression::IsTrue(itf) if itf.not => {
8028 // x IS NOT TRUE -> NOT x
8029 return Ok(Expression::Not(Box::new(
8030 crate::expressions::UnaryOp {
8031 this: itf.this.clone(),
8032 inferred_type: None,
8033 },
8034 )));
8035 }
8036 Expression::IsFalse(itf) if !itf.not => {
8037 // x IS FALSE -> NOT x
8038 return Ok(Expression::Not(Box::new(
8039 crate::expressions::UnaryOp {
8040 this: itf.this.clone(),
8041 inferred_type: None,
8042 },
8043 )));
8044 }
8045 Expression::IsFalse(itf) if itf.not => {
8046 // x IS NOT FALSE -> NOT NOT x
8047 let not_x =
8048 Expression::Not(Box::new(crate::expressions::UnaryOp {
8049 this: itf.this.clone(),
8050 inferred_type: None,
8051 }));
8052 return Ok(Expression::Not(Box::new(
8053 crate::expressions::UnaryOp {
8054 this: not_x,
8055 inferred_type: None,
8056 },
8057 )));
8058 }
8059 _ => {}
8060 }
8061 }
8062 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
8063 if matches!(target, DialectType::Redshift) {
8064 if let Expression::IsFalse(ref itf) = e {
8065 if itf.not {
8066 return Ok(Expression::Not(Box::new(
8067 crate::expressions::UnaryOp {
8068 this: Expression::IsFalse(Box::new(
8069 crate::expressions::IsTrueFalse {
8070 this: itf.this.clone(),
8071 not: false,
8072 },
8073 )),
8074 inferred_type: None,
8075 },
8076 )));
8077 }
8078 }
8079 }
8080 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
8081 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
8082 if let Expression::Function(ref f) = e {
8083 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
8084 && matches!(source, DialectType::Snowflake)
8085 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
8086 {
8087 if f.args.len() == 3 {
8088 let mut args = f.args.clone();
8089 args.push(Expression::string("g"));
8090 return Ok(Expression::Function(Box::new(Function::new(
8091 "REGEXP_REPLACE".to_string(),
8092 args,
8093 ))));
8094 } else if f.args.len() == 4 {
8095 // 4th arg might be position, add 'g' as 5th
8096 let mut args = f.args.clone();
8097 args.push(Expression::string("g"));
8098 return Ok(Expression::Function(Box::new(Function::new(
8099 "REGEXP_REPLACE".to_string(),
8100 args,
8101 ))));
8102 }
8103 }
8104 }
8105 Ok(e)
8106 }
8107
8108 Action::GreatestLeastNull => {
8109 let f = if let Expression::Function(f) = e {
8110 *f
8111 } else {
8112 unreachable!("action only triggered for Function expressions")
8113 };
8114 let mut null_checks: Vec<Expression> = f
8115 .args
8116 .iter()
8117 .map(|a| {
8118 Expression::IsNull(Box::new(IsNull {
8119 this: a.clone(),
8120 not: false,
8121 postfix_form: false,
8122 }))
8123 })
8124 .collect();
8125 let condition = if null_checks.len() == 1 {
8126 null_checks.remove(0)
8127 } else {
8128 let first = null_checks.remove(0);
8129 null_checks.into_iter().fold(first, |acc, check| {
8130 Expression::Or(Box::new(BinaryOp::new(acc, check)))
8131 })
8132 };
8133 Ok(Expression::Case(Box::new(Case {
8134 operand: None,
8135 whens: vec![(condition, Expression::Null(Null))],
8136 else_: Some(Expression::Function(Box::new(Function::new(
8137 f.name, f.args,
8138 )))),
8139 comments: Vec::new(),
8140 inferred_type: None,
8141 })))
8142 }
8143
8144 Action::ArrayGenerateRange => {
8145 let f = if let Expression::Function(f) = e {
8146 *f
8147 } else {
8148 unreachable!("action only triggered for Function expressions")
8149 };
8150 let start = f.args[0].clone();
8151 let end = f.args[1].clone();
8152 let step = f.args.get(2).cloned();
8153
8154 // Helper: compute end - 1 for converting exclusive→inclusive end.
8155 // When end is a literal number, simplify to a computed literal.
8156 fn exclusive_to_inclusive_end(end: &Expression) -> Expression {
8157 // Try to simplify literal numbers
8158 match end {
8159 Expression::Literal(lit)
8160 if matches!(lit.as_ref(), Literal::Number(_)) =>
8161 {
8162 let Literal::Number(n) = lit.as_ref() else {
8163 unreachable!()
8164 };
8165 if let Ok(val) = n.parse::<i64>() {
8166 return Expression::number(val - 1);
8167 }
8168 }
8169 Expression::Neg(u) => {
8170 if let Expression::Literal(lit) = &u.this {
8171 if let Literal::Number(n) = lit.as_ref() {
8172 if let Ok(val) = n.parse::<i64>() {
8173 return Expression::number(-val - 1);
8174 }
8175 }
8176 }
8177 }
8178 _ => {}
8179 }
8180 // Non-literal: produce end - 1 expression
8181 Expression::Sub(Box::new(BinaryOp::new(end.clone(), Expression::number(1))))
8182 }
8183
8184 match target {
8185 // Snowflake ARRAY_GENERATE_RANGE and DuckDB RANGE both use exclusive end,
8186 // so no adjustment needed — just rename the function.
8187 DialectType::Snowflake => {
8188 let mut args = vec![start, end];
8189 if let Some(s) = step {
8190 args.push(s);
8191 }
8192 Ok(Expression::Function(Box::new(Function::new(
8193 "ARRAY_GENERATE_RANGE".to_string(),
8194 args,
8195 ))))
8196 }
8197 DialectType::DuckDB => {
8198 let mut args = vec![start, end];
8199 if let Some(s) = step {
8200 args.push(s);
8201 }
8202 Ok(Expression::Function(Box::new(Function::new(
8203 "RANGE".to_string(),
8204 args,
8205 ))))
8206 }
8207 // These dialects use inclusive end, so convert exclusive→inclusive.
8208 // Presto/Trino: simplify literal numbers (3 → 2).
8209 DialectType::Presto | DialectType::Trino => {
8210 let end_inclusive = exclusive_to_inclusive_end(&end);
8211 let mut args = vec![start, end_inclusive];
8212 if let Some(s) = step {
8213 args.push(s);
8214 }
8215 Ok(Expression::Function(Box::new(Function::new(
8216 "SEQUENCE".to_string(),
8217 args,
8218 ))))
8219 }
8220 // PostgreSQL, Redshift, BigQuery: keep as end - 1 expression form.
8221 DialectType::PostgreSQL | DialectType::Redshift => {
8222 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
8223 end.clone(),
8224 Expression::number(1),
8225 )));
8226 let mut args = vec![start, end_minus_1];
8227 if let Some(s) = step {
8228 args.push(s);
8229 }
8230 Ok(Expression::Function(Box::new(Function::new(
8231 "GENERATE_SERIES".to_string(),
8232 args,
8233 ))))
8234 }
8235 DialectType::BigQuery => {
8236 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
8237 end.clone(),
8238 Expression::number(1),
8239 )));
8240 let mut args = vec![start, end_minus_1];
8241 if let Some(s) = step {
8242 args.push(s);
8243 }
8244 Ok(Expression::Function(Box::new(Function::new(
8245 "GENERATE_ARRAY".to_string(),
8246 args,
8247 ))))
8248 }
8249 _ => Ok(Expression::Function(Box::new(Function::new(
8250 f.name, f.args,
8251 )))),
8252 }
8253 }
8254
8255 Action::Div0TypedDivision => {
8256 let if_func = if let Expression::IfFunc(f) = e {
8257 *f
8258 } else {
8259 unreachable!("action only triggered for IfFunc expressions")
8260 };
8261 if let Some(Expression::Div(div)) = if_func.false_value {
8262 let cast_type = if matches!(target, DialectType::SQLite) {
8263 DataType::Float {
8264 precision: None,
8265 scale: None,
8266 real_spelling: true,
8267 }
8268 } else {
8269 DataType::Double {
8270 precision: None,
8271 scale: None,
8272 }
8273 };
8274 let casted_left = Expression::Cast(Box::new(Cast {
8275 this: div.left,
8276 to: cast_type,
8277 trailing_comments: vec![],
8278 double_colon_syntax: false,
8279 format: None,
8280 default: None,
8281 inferred_type: None,
8282 }));
8283 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8284 condition: if_func.condition,
8285 true_value: if_func.true_value,
8286 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
8287 casted_left,
8288 div.right,
8289 )))),
8290 original_name: if_func.original_name,
8291 inferred_type: None,
8292 })))
8293 } else {
8294 // Not actually a Div, reconstruct
8295 Ok(Expression::IfFunc(Box::new(if_func)))
8296 }
8297 }
8298
8299 Action::ArrayAggCollectList => {
8300 let agg = if let Expression::ArrayAgg(a) = e {
8301 *a
8302 } else {
8303 unreachable!("action only triggered for ArrayAgg expressions")
8304 };
8305 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8306 name: Some("COLLECT_LIST".to_string()),
8307 ..agg
8308 })))
8309 }
8310
8311 Action::ArrayAggToGroupConcat => {
8312 let agg = if let Expression::ArrayAgg(a) = e {
8313 *a
8314 } else {
8315 unreachable!("action only triggered for ArrayAgg expressions")
8316 };
8317 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8318 name: Some("GROUP_CONCAT".to_string()),
8319 ..agg
8320 })))
8321 }
8322
8323 Action::ArrayAggWithinGroupFilter => {
8324 let wg = if let Expression::WithinGroup(w) = e {
8325 *w
8326 } else {
8327 unreachable!("action only triggered for WithinGroup expressions")
8328 };
8329 if let Expression::ArrayAgg(inner_agg) = wg.this {
8330 let col = inner_agg.this.clone();
8331 let filter = Expression::IsNull(Box::new(IsNull {
8332 this: col,
8333 not: true,
8334 postfix_form: false,
8335 }));
8336 // For DuckDB, add explicit NULLS FIRST for DESC ordering
8337 let order_by = if matches!(target, DialectType::DuckDB) {
8338 wg.order_by
8339 .into_iter()
8340 .map(|mut o| {
8341 if o.desc && o.nulls_first.is_none() {
8342 o.nulls_first = Some(true);
8343 }
8344 o
8345 })
8346 .collect()
8347 } else {
8348 wg.order_by
8349 };
8350 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8351 this: inner_agg.this,
8352 distinct: inner_agg.distinct,
8353 filter: Some(filter),
8354 order_by,
8355 name: inner_agg.name,
8356 ignore_nulls: inner_agg.ignore_nulls,
8357 having_max: inner_agg.having_max,
8358 limit: inner_agg.limit,
8359 inferred_type: None,
8360 })))
8361 } else {
8362 Ok(Expression::WithinGroup(Box::new(wg)))
8363 }
8364 }
8365
8366 Action::ArrayAggFilter => {
8367 let agg = if let Expression::ArrayAgg(a) = e {
8368 *a
8369 } else {
8370 unreachable!("action only triggered for ArrayAgg expressions")
8371 };
8372 let col = agg.this.clone();
8373 let filter = Expression::IsNull(Box::new(IsNull {
8374 this: col,
8375 not: true,
8376 postfix_form: false,
8377 }));
8378 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8379 filter: Some(filter),
8380 ..agg
8381 })))
8382 }
8383
8384 Action::ArrayAggNullFilter => {
8385 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
8386 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
8387 let agg = if let Expression::ArrayAgg(a) = e {
8388 *a
8389 } else {
8390 unreachable!("action only triggered for ArrayAgg expressions")
8391 };
8392 let col = agg.this.clone();
8393 let not_null = Expression::IsNull(Box::new(IsNull {
8394 this: col,
8395 not: true,
8396 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
8397 }));
8398 let new_filter = if let Some(existing_filter) = agg.filter {
8399 // AND the NOT IS NULL with existing filter
8400 Expression::And(Box::new(crate::expressions::BinaryOp::new(
8401 existing_filter,
8402 not_null,
8403 )))
8404 } else {
8405 not_null
8406 };
8407 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8408 filter: Some(new_filter),
8409 ..agg
8410 })))
8411 }
8412
8413 Action::BigQueryArraySelectAsStructToSnowflake => {
8414 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
8415 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
8416 if let Expression::Function(mut f) = e {
8417 let is_match = f.args.len() == 1
8418 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
8419 if is_match {
8420 let inner_select = match f.args.remove(0) {
8421 Expression::Select(s) => *s,
8422 _ => unreachable!(
8423 "argument already verified to be a Select expression"
8424 ),
8425 };
8426 // Build OBJECT_CONSTRUCT args from SELECT expressions
8427 let mut oc_args = Vec::new();
8428 for expr in &inner_select.expressions {
8429 match expr {
8430 Expression::Alias(a) => {
8431 let key = Expression::Literal(Box::new(Literal::String(
8432 a.alias.name.clone(),
8433 )));
8434 let value = a.this.clone();
8435 oc_args.push(key);
8436 oc_args.push(value);
8437 }
8438 Expression::Column(c) => {
8439 let key = Expression::Literal(Box::new(Literal::String(
8440 c.name.name.clone(),
8441 )));
8442 oc_args.push(key);
8443 oc_args.push(expr.clone());
8444 }
8445 _ => {
8446 oc_args.push(expr.clone());
8447 }
8448 }
8449 }
8450 let object_construct = Expression::Function(Box::new(Function::new(
8451 "OBJECT_CONSTRUCT".to_string(),
8452 oc_args,
8453 )));
8454 let array_agg = Expression::Function(Box::new(Function::new(
8455 "ARRAY_AGG".to_string(),
8456 vec![object_construct],
8457 )));
8458 let mut new_select = crate::expressions::Select::new();
8459 new_select.expressions = vec![array_agg];
8460 new_select.from = inner_select.from.clone();
8461 new_select.where_clause = inner_select.where_clause.clone();
8462 new_select.group_by = inner_select.group_by.clone();
8463 new_select.having = inner_select.having.clone();
8464 new_select.joins = inner_select.joins.clone();
8465 Ok(Expression::Subquery(Box::new(
8466 crate::expressions::Subquery {
8467 this: Expression::Select(Box::new(new_select)),
8468 alias: None,
8469 column_aliases: Vec::new(),
8470 order_by: None,
8471 limit: None,
8472 offset: None,
8473 distribute_by: None,
8474 sort_by: None,
8475 cluster_by: None,
8476 lateral: false,
8477 modifiers_inside: false,
8478 trailing_comments: Vec::new(),
8479 inferred_type: None,
8480 },
8481 )))
8482 } else {
8483 Ok(Expression::Function(f))
8484 }
8485 } else {
8486 Ok(e)
8487 }
8488 }
8489
8490 Action::BigQueryPercentileContToDuckDB => {
8491 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
8492 if let Expression::AggregateFunction(mut af) = e {
8493 af.name = "QUANTILE_CONT".to_string();
8494 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
8495 // Keep only first 2 args
8496 if af.args.len() > 2 {
8497 af.args.truncate(2);
8498 }
8499 Ok(Expression::AggregateFunction(af))
8500 } else {
8501 Ok(e)
8502 }
8503 }
8504
8505 Action::ArrayAggIgnoreNullsDuckDB => {
8506 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
8507 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
8508 let mut agg = if let Expression::ArrayAgg(a) = e {
8509 *a
8510 } else {
8511 unreachable!("action only triggered for ArrayAgg expressions")
8512 };
8513 agg.ignore_nulls = None; // Strip IGNORE NULLS
8514 if !agg.order_by.is_empty() {
8515 agg.order_by[0].nulls_first = Some(true);
8516 }
8517 Ok(Expression::ArrayAgg(Box::new(agg)))
8518 }
8519
8520 Action::CountDistinctMultiArg => {
8521 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
8522 if let Expression::Count(c) = e {
8523 if let Some(Expression::Tuple(t)) = c.this {
8524 let args = t.expressions;
8525 // Build CASE expression:
8526 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
8527 let mut whens = Vec::new();
8528 for arg in &args {
8529 whens.push((
8530 Expression::IsNull(Box::new(IsNull {
8531 this: arg.clone(),
8532 not: false,
8533 postfix_form: false,
8534 })),
8535 Expression::Null(crate::expressions::Null),
8536 ));
8537 }
8538 // Build the tuple for ELSE
8539 let tuple_expr =
8540 Expression::Tuple(Box::new(crate::expressions::Tuple {
8541 expressions: args,
8542 }));
8543 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
8544 operand: None,
8545 whens,
8546 else_: Some(tuple_expr),
8547 comments: Vec::new(),
8548 inferred_type: None,
8549 }));
8550 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
8551 this: Some(case_expr),
8552 star: false,
8553 distinct: true,
8554 filter: c.filter,
8555 ignore_nulls: c.ignore_nulls,
8556 original_name: c.original_name,
8557 inferred_type: None,
8558 })))
8559 } else {
8560 Ok(Expression::Count(c))
8561 }
8562 } else {
8563 Ok(e)
8564 }
8565 }
8566
8567 Action::CastTimestampToDatetime => {
8568 let c = if let Expression::Cast(c) = e {
8569 *c
8570 } else {
8571 unreachable!("action only triggered for Cast expressions")
8572 };
8573 Ok(Expression::Cast(Box::new(Cast {
8574 to: DataType::Custom {
8575 name: "DATETIME".to_string(),
8576 },
8577 ..c
8578 })))
8579 }
8580
8581 Action::CastTimestampStripTz => {
8582 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
8583 let c = if let Expression::Cast(c) = e {
8584 *c
8585 } else {
8586 unreachable!("action only triggered for Cast expressions")
8587 };
8588 Ok(Expression::Cast(Box::new(Cast {
8589 to: DataType::Timestamp {
8590 precision: None,
8591 timezone: false,
8592 },
8593 ..c
8594 })))
8595 }
8596
8597 Action::CastTimestamptzToFunc => {
8598 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
8599 let c = if let Expression::Cast(c) = e {
8600 *c
8601 } else {
8602 unreachable!("action only triggered for Cast expressions")
8603 };
8604 Ok(Expression::Function(Box::new(Function::new(
8605 "TIMESTAMP".to_string(),
8606 vec![c.this],
8607 ))))
8608 }
8609
8610 Action::ToDateToCast => {
8611 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
8612 if let Expression::Function(f) = e {
8613 let arg = f.args.into_iter().next().unwrap();
8614 Ok(Expression::Cast(Box::new(Cast {
8615 this: arg,
8616 to: DataType::Date,
8617 double_colon_syntax: false,
8618 trailing_comments: vec![],
8619 format: None,
8620 default: None,
8621 inferred_type: None,
8622 })))
8623 } else {
8624 Ok(e)
8625 }
8626 }
8627 Action::DateTruncWrapCast => {
8628 // Handle both Expression::DateTrunc/TimestampTrunc and
8629 // Expression::Function("DATE_TRUNC", [unit, expr])
8630 match e {
8631 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
8632 let input_type = match &d.this {
8633 Expression::Cast(c) => Some(c.to.clone()),
8634 _ => None,
8635 };
8636 if let Some(cast_type) = input_type {
8637 let is_time = matches!(cast_type, DataType::Time { .. });
8638 if is_time {
8639 let date_expr = Expression::Cast(Box::new(Cast {
8640 this: Expression::Literal(Box::new(
8641 crate::expressions::Literal::String(
8642 "1970-01-01".to_string(),
8643 ),
8644 )),
8645 to: DataType::Date,
8646 double_colon_syntax: false,
8647 trailing_comments: vec![],
8648 format: None,
8649 default: None,
8650 inferred_type: None,
8651 }));
8652 let add_expr =
8653 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
8654 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
8655 this: add_expr,
8656 unit: d.unit,
8657 }));
8658 Ok(Expression::Cast(Box::new(Cast {
8659 this: inner,
8660 to: cast_type,
8661 double_colon_syntax: false,
8662 trailing_comments: vec![],
8663 format: None,
8664 default: None,
8665 inferred_type: None,
8666 })))
8667 } else {
8668 let inner = Expression::DateTrunc(Box::new(*d));
8669 Ok(Expression::Cast(Box::new(Cast {
8670 this: inner,
8671 to: cast_type,
8672 double_colon_syntax: false,
8673 trailing_comments: vec![],
8674 format: None,
8675 default: None,
8676 inferred_type: None,
8677 })))
8678 }
8679 } else {
8680 Ok(Expression::DateTrunc(d))
8681 }
8682 }
8683 Expression::Function(f) if f.args.len() == 2 => {
8684 // Function-based DATE_TRUNC(unit, expr)
8685 let input_type = match &f.args[1] {
8686 Expression::Cast(c) => Some(c.to.clone()),
8687 _ => None,
8688 };
8689 if let Some(cast_type) = input_type {
8690 let is_time = matches!(cast_type, DataType::Time { .. });
8691 if is_time {
8692 let date_expr = Expression::Cast(Box::new(Cast {
8693 this: Expression::Literal(Box::new(
8694 crate::expressions::Literal::String(
8695 "1970-01-01".to_string(),
8696 ),
8697 )),
8698 to: DataType::Date,
8699 double_colon_syntax: false,
8700 trailing_comments: vec![],
8701 format: None,
8702 default: None,
8703 inferred_type: None,
8704 }));
8705 let mut args = f.args;
8706 let unit_arg = args.remove(0);
8707 let time_expr = args.remove(0);
8708 let add_expr = Expression::Add(Box::new(BinaryOp::new(
8709 date_expr, time_expr,
8710 )));
8711 let inner = Expression::Function(Box::new(Function::new(
8712 "DATE_TRUNC".to_string(),
8713 vec![unit_arg, add_expr],
8714 )));
8715 Ok(Expression::Cast(Box::new(Cast {
8716 this: inner,
8717 to: cast_type,
8718 double_colon_syntax: false,
8719 trailing_comments: vec![],
8720 format: None,
8721 default: None,
8722 inferred_type: None,
8723 })))
8724 } else {
8725 // Wrap the function in CAST
8726 Ok(Expression::Cast(Box::new(Cast {
8727 this: Expression::Function(f),
8728 to: cast_type,
8729 double_colon_syntax: false,
8730 trailing_comments: vec![],
8731 format: None,
8732 default: None,
8733 inferred_type: None,
8734 })))
8735 }
8736 } else {
8737 Ok(Expression::Function(f))
8738 }
8739 }
8740 other => Ok(other),
8741 }
8742 }
8743
8744 Action::RegexpReplaceSnowflakeToDuckDB => {
8745 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
8746 if let Expression::Function(f) = e {
8747 let mut args = f.args;
8748 let subject = args.remove(0);
8749 let pattern = args.remove(0);
8750 let replacement = args.remove(0);
8751 Ok(Expression::Function(Box::new(Function::new(
8752 "REGEXP_REPLACE".to_string(),
8753 vec![
8754 subject,
8755 pattern,
8756 replacement,
8757 Expression::Literal(Box::new(crate::expressions::Literal::String(
8758 "g".to_string(),
8759 ))),
8760 ],
8761 ))))
8762 } else {
8763 Ok(e)
8764 }
8765 }
8766
8767 Action::RegexpReplacePositionSnowflakeToDuckDB => {
8768 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
8769 // pos=1, occ=1 -> REGEXP_REPLACE(s, p, r) (single replace, no 'g')
8770 // pos>1, occ=0 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r, 'g')
8771 // pos>1, occ=1 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r)
8772 // pos=1, occ=0 -> REGEXP_REPLACE(s, p, r, 'g') (replace all)
8773 if let Expression::Function(f) = e {
8774 let mut args = f.args;
8775 let subject = args.remove(0);
8776 let pattern = args.remove(0);
8777 let replacement = args.remove(0);
8778 let position = args.remove(0);
8779 let occurrence = args.remove(0);
8780
8781 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8782 let is_occ_0 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
8783 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8784
8785 if is_pos_1 && is_occ_1 {
8786 // REGEXP_REPLACE(s, p, r) - single replace, no flags
8787 Ok(Expression::Function(Box::new(Function::new(
8788 "REGEXP_REPLACE".to_string(),
8789 vec![subject, pattern, replacement],
8790 ))))
8791 } else if is_pos_1 && is_occ_0 {
8792 // REGEXP_REPLACE(s, p, r, 'g') - global replace
8793 Ok(Expression::Function(Box::new(Function::new(
8794 "REGEXP_REPLACE".to_string(),
8795 vec![
8796 subject,
8797 pattern,
8798 replacement,
8799 Expression::Literal(Box::new(Literal::String("g".to_string()))),
8800 ],
8801 ))))
8802 } else {
8803 // pos>1: SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r[, 'g'])
8804 // Pre-compute pos-1 when position is a numeric literal
8805 let pos_minus_1 = if let Expression::Literal(ref lit) = position {
8806 if let Literal::Number(ref n) = lit.as_ref() {
8807 if let Ok(val) = n.parse::<i64>() {
8808 Expression::number(val - 1)
8809 } else {
8810 Expression::Sub(Box::new(BinaryOp::new(
8811 position.clone(),
8812 Expression::number(1),
8813 )))
8814 }
8815 } else {
8816 position.clone()
8817 }
8818 } else {
8819 Expression::Sub(Box::new(BinaryOp::new(
8820 position.clone(),
8821 Expression::number(1),
8822 )))
8823 };
8824 let prefix = Expression::Function(Box::new(Function::new(
8825 "SUBSTRING".to_string(),
8826 vec![subject.clone(), Expression::number(1), pos_minus_1],
8827 )));
8828 let suffix_subject = Expression::Function(Box::new(Function::new(
8829 "SUBSTRING".to_string(),
8830 vec![subject, position],
8831 )));
8832 let mut replace_args = vec![suffix_subject, pattern, replacement];
8833 if is_occ_0 {
8834 replace_args.push(Expression::Literal(Box::new(Literal::String(
8835 "g".to_string(),
8836 ))));
8837 }
8838 let replace_expr = Expression::Function(Box::new(Function::new(
8839 "REGEXP_REPLACE".to_string(),
8840 replace_args,
8841 )));
8842 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
8843 this: Box::new(prefix),
8844 expression: Box::new(replace_expr),
8845 safe: None,
8846 })))
8847 }
8848 } else {
8849 Ok(e)
8850 }
8851 }
8852
8853 Action::RegexpSubstrSnowflakeToDuckDB => {
8854 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
8855 if let Expression::Function(f) = e {
8856 let mut args = f.args;
8857 let arg_count = args.len();
8858 match arg_count {
8859 // REGEXP_SUBSTR(s, p) -> REGEXP_EXTRACT(s, p)
8860 0..=2 => Ok(Expression::Function(Box::new(Function::new(
8861 "REGEXP_EXTRACT".to_string(),
8862 args,
8863 )))),
8864 // REGEXP_SUBSTR(s, p, pos) -> REGEXP_EXTRACT(NULLIF(SUBSTRING(s, pos), ''), p)
8865 3 => {
8866 let subject = args.remove(0);
8867 let pattern = args.remove(0);
8868 let position = args.remove(0);
8869 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8870 if is_pos_1 {
8871 Ok(Expression::Function(Box::new(Function::new(
8872 "REGEXP_EXTRACT".to_string(),
8873 vec![subject, pattern],
8874 ))))
8875 } else {
8876 let substring_expr =
8877 Expression::Function(Box::new(Function::new(
8878 "SUBSTRING".to_string(),
8879 vec![subject, position],
8880 )));
8881 let nullif_expr =
8882 Expression::Function(Box::new(Function::new(
8883 "NULLIF".to_string(),
8884 vec![
8885 substring_expr,
8886 Expression::Literal(Box::new(Literal::String(
8887 String::new(),
8888 ))),
8889 ],
8890 )));
8891 Ok(Expression::Function(Box::new(Function::new(
8892 "REGEXP_EXTRACT".to_string(),
8893 vec![nullif_expr, pattern],
8894 ))))
8895 }
8896 }
8897 // REGEXP_SUBSTR(s, p, pos, occ) -> depends on pos and occ
8898 4 => {
8899 let subject = args.remove(0);
8900 let pattern = args.remove(0);
8901 let position = args.remove(0);
8902 let occurrence = args.remove(0);
8903 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8904 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8905
8906 let effective_subject = if is_pos_1 {
8907 subject
8908 } else {
8909 let substring_expr =
8910 Expression::Function(Box::new(Function::new(
8911 "SUBSTRING".to_string(),
8912 vec![subject, position],
8913 )));
8914 Expression::Function(Box::new(Function::new(
8915 "NULLIF".to_string(),
8916 vec![
8917 substring_expr,
8918 Expression::Literal(Box::new(Literal::String(
8919 String::new(),
8920 ))),
8921 ],
8922 )))
8923 };
8924
8925 if is_occ_1 {
8926 Ok(Expression::Function(Box::new(Function::new(
8927 "REGEXP_EXTRACT".to_string(),
8928 vec![effective_subject, pattern],
8929 ))))
8930 } else {
8931 // ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, p), occ)
8932 let extract_all =
8933 Expression::Function(Box::new(Function::new(
8934 "REGEXP_EXTRACT_ALL".to_string(),
8935 vec![effective_subject, pattern],
8936 )));
8937 Ok(Expression::Function(Box::new(Function::new(
8938 "ARRAY_EXTRACT".to_string(),
8939 vec![extract_all, occurrence],
8940 ))))
8941 }
8942 }
8943 // REGEXP_SUBSTR(s, p, 1, 1, 'e') -> REGEXP_EXTRACT(s, p)
8944 5 => {
8945 let subject = args.remove(0);
8946 let pattern = args.remove(0);
8947 let _position = args.remove(0);
8948 let _occurrence = args.remove(0);
8949 let _flags = args.remove(0);
8950 // Strip 'e' flag, convert to REGEXP_EXTRACT
8951 Ok(Expression::Function(Box::new(Function::new(
8952 "REGEXP_EXTRACT".to_string(),
8953 vec![subject, pattern],
8954 ))))
8955 }
8956 // REGEXP_SUBSTR(s, p, 1, 1, 'e', group) -> REGEXP_EXTRACT(s, p[, group])
8957 _ => {
8958 let subject = args.remove(0);
8959 let pattern = args.remove(0);
8960 let _position = args.remove(0);
8961 let _occurrence = args.remove(0);
8962 let _flags = args.remove(0);
8963 let group = args.remove(0);
8964 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
8965 if is_group_0 {
8966 // Strip group=0 (default)
8967 Ok(Expression::Function(Box::new(Function::new(
8968 "REGEXP_EXTRACT".to_string(),
8969 vec![subject, pattern],
8970 ))))
8971 } else {
8972 Ok(Expression::Function(Box::new(Function::new(
8973 "REGEXP_EXTRACT".to_string(),
8974 vec![subject, pattern, group],
8975 ))))
8976 }
8977 }
8978 }
8979 } else {
8980 Ok(e)
8981 }
8982 }
8983
8984 Action::RegexpSubstrSnowflakeIdentity => {
8985 // Snowflake→Snowflake: REGEXP_SUBSTR/REGEXP_SUBSTR_ALL with 6 args
8986 // Strip trailing group=0
8987 if let Expression::Function(f) = e {
8988 let func_name = f.name.clone();
8989 let mut args = f.args;
8990 if args.len() == 6 {
8991 let is_group_0 = matches!(&args[5], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
8992 if is_group_0 {
8993 args.truncate(5);
8994 }
8995 }
8996 Ok(Expression::Function(Box::new(Function::new(
8997 func_name, args,
8998 ))))
8999 } else {
9000 Ok(e)
9001 }
9002 }
9003
9004 Action::RegexpSubstrAllSnowflakeToDuckDB => {
9005 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
9006 if let Expression::Function(f) = e {
9007 let mut args = f.args;
9008 let arg_count = args.len();
9009 match arg_count {
9010 // REGEXP_SUBSTR_ALL(s, p) -> REGEXP_EXTRACT_ALL(s, p)
9011 0..=2 => Ok(Expression::Function(Box::new(Function::new(
9012 "REGEXP_EXTRACT_ALL".to_string(),
9013 args,
9014 )))),
9015 // REGEXP_SUBSTR_ALL(s, p, pos) -> REGEXP_EXTRACT_ALL(SUBSTRING(s, pos), p)
9016 3 => {
9017 let subject = args.remove(0);
9018 let pattern = args.remove(0);
9019 let position = args.remove(0);
9020 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
9021 if is_pos_1 {
9022 Ok(Expression::Function(Box::new(Function::new(
9023 "REGEXP_EXTRACT_ALL".to_string(),
9024 vec![subject, pattern],
9025 ))))
9026 } else {
9027 let substring_expr =
9028 Expression::Function(Box::new(Function::new(
9029 "SUBSTRING".to_string(),
9030 vec![subject, position],
9031 )));
9032 Ok(Expression::Function(Box::new(Function::new(
9033 "REGEXP_EXTRACT_ALL".to_string(),
9034 vec![substring_expr, pattern],
9035 ))))
9036 }
9037 }
9038 // REGEXP_SUBSTR_ALL(s, p, 1, occ) -> REGEXP_EXTRACT_ALL(s, p)[occ:]
9039 4 => {
9040 let subject = args.remove(0);
9041 let pattern = args.remove(0);
9042 let position = args.remove(0);
9043 let occurrence = args.remove(0);
9044 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
9045 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
9046
9047 let effective_subject = if is_pos_1 {
9048 subject
9049 } else {
9050 Expression::Function(Box::new(Function::new(
9051 "SUBSTRING".to_string(),
9052 vec![subject, position],
9053 )))
9054 };
9055
9056 if is_occ_1 {
9057 Ok(Expression::Function(Box::new(Function::new(
9058 "REGEXP_EXTRACT_ALL".to_string(),
9059 vec![effective_subject, pattern],
9060 ))))
9061 } else {
9062 // REGEXP_EXTRACT_ALL(s, p)[occ:]
9063 let extract_all =
9064 Expression::Function(Box::new(Function::new(
9065 "REGEXP_EXTRACT_ALL".to_string(),
9066 vec![effective_subject, pattern],
9067 )));
9068 Ok(Expression::ArraySlice(Box::new(
9069 crate::expressions::ArraySlice {
9070 this: extract_all,
9071 start: Some(occurrence),
9072 end: None,
9073 },
9074 )))
9075 }
9076 }
9077 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e') -> REGEXP_EXTRACT_ALL(s, p)
9078 5 => {
9079 let subject = args.remove(0);
9080 let pattern = args.remove(0);
9081 let _position = args.remove(0);
9082 let _occurrence = args.remove(0);
9083 let _flags = args.remove(0);
9084 Ok(Expression::Function(Box::new(Function::new(
9085 "REGEXP_EXTRACT_ALL".to_string(),
9086 vec![subject, pattern],
9087 ))))
9088 }
9089 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e', 0) -> REGEXP_EXTRACT_ALL(s, p)
9090 _ => {
9091 let subject = args.remove(0);
9092 let pattern = args.remove(0);
9093 let _position = args.remove(0);
9094 let _occurrence = args.remove(0);
9095 let _flags = args.remove(0);
9096 let group = args.remove(0);
9097 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
9098 if is_group_0 {
9099 Ok(Expression::Function(Box::new(Function::new(
9100 "REGEXP_EXTRACT_ALL".to_string(),
9101 vec![subject, pattern],
9102 ))))
9103 } else {
9104 Ok(Expression::Function(Box::new(Function::new(
9105 "REGEXP_EXTRACT_ALL".to_string(),
9106 vec![subject, pattern, group],
9107 ))))
9108 }
9109 }
9110 }
9111 } else {
9112 Ok(e)
9113 }
9114 }
9115
9116 Action::RegexpCountSnowflakeToDuckDB => {
9117 // Snowflake REGEXP_COUNT(s, p[, pos[, flags]]) ->
9118 // DuckDB: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
9119 if let Expression::Function(f) = e {
9120 let mut args = f.args;
9121 let arg_count = args.len();
9122 let subject = args.remove(0);
9123 let pattern = args.remove(0);
9124
9125 // Handle position arg
9126 let effective_subject = if arg_count >= 3 {
9127 let position = args.remove(0);
9128 Expression::Function(Box::new(Function::new(
9129 "SUBSTRING".to_string(),
9130 vec![subject, position],
9131 )))
9132 } else {
9133 subject
9134 };
9135
9136 // Handle flags arg -> embed as (?flags) prefix in pattern
9137 let effective_pattern = if arg_count >= 4 {
9138 let flags = args.remove(0);
9139 match &flags {
9140 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(f_str) if !f_str.is_empty()) =>
9141 {
9142 let Literal::String(f_str) = lit.as_ref() else {
9143 unreachable!()
9144 };
9145 // Always use concatenation: '(?flags)' || pattern
9146 let prefix = Expression::Literal(Box::new(Literal::String(
9147 format!("(?{})", f_str),
9148 )));
9149 Expression::DPipe(Box::new(crate::expressions::DPipe {
9150 this: Box::new(prefix),
9151 expression: Box::new(pattern.clone()),
9152 safe: None,
9153 }))
9154 }
9155 _ => pattern.clone(),
9156 }
9157 } else {
9158 pattern.clone()
9159 };
9160
9161 // Build: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
9162 let extract_all = Expression::Function(Box::new(Function::new(
9163 "REGEXP_EXTRACT_ALL".to_string(),
9164 vec![effective_subject, effective_pattern.clone()],
9165 )));
9166 let length_expr =
9167 Expression::Length(Box::new(crate::expressions::UnaryFunc {
9168 this: extract_all,
9169 original_name: None,
9170 inferred_type: None,
9171 }));
9172 let condition = Expression::Eq(Box::new(BinaryOp::new(
9173 effective_pattern,
9174 Expression::Literal(Box::new(Literal::String(String::new()))),
9175 )));
9176 Ok(Expression::Case(Box::new(Case {
9177 operand: None,
9178 whens: vec![(condition, Expression::number(0))],
9179 else_: Some(length_expr),
9180 comments: vec![],
9181 inferred_type: None,
9182 })))
9183 } else {
9184 Ok(e)
9185 }
9186 }
9187
9188 Action::RegexpInstrSnowflakeToDuckDB => {
9189 // Snowflake REGEXP_INSTR(s, p[, pos[, occ[, option[, flags[, group]]]]]) ->
9190 // DuckDB: CASE WHEN s IS NULL OR p IS NULL [OR ...] THEN NULL
9191 // WHEN p = '' THEN 0
9192 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
9193 // ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(eff_s, eff_p)[1:occ], x -> LENGTH(x))), 0)
9194 // + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(eff_s, eff_p)[1:occ - 1], x -> LENGTH(x))), 0)
9195 // + pos_offset
9196 // END
9197 if let Expression::Function(f) = e {
9198 let mut args = f.args;
9199 let subject = args.remove(0);
9200 let pattern = if !args.is_empty() {
9201 args.remove(0)
9202 } else {
9203 Expression::Literal(Box::new(Literal::String(String::new())))
9204 };
9205
9206 // Collect all original args for NULL checks
9207 let position = if !args.is_empty() {
9208 Some(args.remove(0))
9209 } else {
9210 None
9211 };
9212 let occurrence = if !args.is_empty() {
9213 Some(args.remove(0))
9214 } else {
9215 None
9216 };
9217 let option = if !args.is_empty() {
9218 Some(args.remove(0))
9219 } else {
9220 None
9221 };
9222 let flags = if !args.is_empty() {
9223 Some(args.remove(0))
9224 } else {
9225 None
9226 };
9227 let _group = if !args.is_empty() {
9228 Some(args.remove(0))
9229 } else {
9230 None
9231 };
9232
9233 let is_pos_1 = position.as_ref().map_or(true, |p| matches!(p, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1")));
9234 let occurrence_expr = occurrence.clone().unwrap_or(Expression::number(1));
9235
9236 // Build NULL check: subject IS NULL OR pattern IS NULL [OR pos IS NULL ...]
9237 let mut null_checks: Vec<Expression> = vec![
9238 Expression::Is(Box::new(BinaryOp::new(
9239 subject.clone(),
9240 Expression::Null(Null),
9241 ))),
9242 Expression::Is(Box::new(BinaryOp::new(
9243 pattern.clone(),
9244 Expression::Null(Null),
9245 ))),
9246 ];
9247 // Add NULL checks for all provided optional args
9248 for opt_arg in [&position, &occurrence, &option, &flags].iter() {
9249 if let Some(arg) = opt_arg {
9250 null_checks.push(Expression::Is(Box::new(BinaryOp::new(
9251 (*arg).clone(),
9252 Expression::Null(Null),
9253 ))));
9254 }
9255 }
9256 // Chain with OR
9257 let null_condition = null_checks
9258 .into_iter()
9259 .reduce(|a, b| Expression::Or(Box::new(BinaryOp::new(a, b))))
9260 .unwrap();
9261
9262 // Effective subject (apply position offset)
9263 let effective_subject = if is_pos_1 {
9264 subject.clone()
9265 } else {
9266 let pos = position.clone().unwrap_or(Expression::number(1));
9267 Expression::Function(Box::new(Function::new(
9268 "SUBSTRING".to_string(),
9269 vec![subject.clone(), pos],
9270 )))
9271 };
9272
9273 // Effective pattern (apply flags if present)
9274 let effective_pattern = if let Some(ref fl) = flags {
9275 if let Expression::Literal(lit) = fl {
9276 if let Literal::String(f_str) = lit.as_ref() {
9277 if !f_str.is_empty() {
9278 let prefix = Expression::Literal(Box::new(
9279 Literal::String(format!("(?{})", f_str)),
9280 ));
9281 Expression::DPipe(Box::new(crate::expressions::DPipe {
9282 this: Box::new(prefix),
9283 expression: Box::new(pattern.clone()),
9284 safe: None,
9285 }))
9286 } else {
9287 pattern.clone()
9288 }
9289 } else {
9290 fl.clone()
9291 }
9292 } else {
9293 pattern.clone()
9294 }
9295 } else {
9296 pattern.clone()
9297 };
9298
9299 // WHEN pattern = '' THEN 0
9300 let empty_pattern_check = Expression::Eq(Box::new(BinaryOp::new(
9301 effective_pattern.clone(),
9302 Expression::Literal(Box::new(Literal::String(String::new()))),
9303 )));
9304
9305 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
9306 let match_count_check = Expression::Lt(Box::new(BinaryOp::new(
9307 Expression::Length(Box::new(crate::expressions::UnaryFunc {
9308 this: Expression::Function(Box::new(Function::new(
9309 "REGEXP_EXTRACT_ALL".to_string(),
9310 vec![effective_subject.clone(), effective_pattern.clone()],
9311 ))),
9312 original_name: None,
9313 inferred_type: None,
9314 })),
9315 occurrence_expr.clone(),
9316 )));
9317
9318 // Helper: build LENGTH lambda for LIST_TRANSFORM
9319 let make_len_lambda = || {
9320 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
9321 parameters: vec![crate::expressions::Identifier::new("x")],
9322 body: Expression::Length(Box::new(crate::expressions::UnaryFunc {
9323 this: Expression::Identifier(
9324 crate::expressions::Identifier::new("x"),
9325 ),
9326 original_name: None,
9327 inferred_type: None,
9328 })),
9329 colon: false,
9330 parameter_types: vec![],
9331 }))
9332 };
9333
9334 // COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(s, p)[1:occ], x -> LENGTH(x))), 0)
9335 let split_sliced =
9336 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
9337 this: Expression::Function(Box::new(Function::new(
9338 "STRING_SPLIT_REGEX".to_string(),
9339 vec![effective_subject.clone(), effective_pattern.clone()],
9340 ))),
9341 start: Some(Expression::number(1)),
9342 end: Some(occurrence_expr.clone()),
9343 }));
9344 let split_sum = Expression::Function(Box::new(Function::new(
9345 "COALESCE".to_string(),
9346 vec![
9347 Expression::Function(Box::new(Function::new(
9348 "LIST_SUM".to_string(),
9349 vec![Expression::Function(Box::new(Function::new(
9350 "LIST_TRANSFORM".to_string(),
9351 vec![split_sliced, make_len_lambda()],
9352 )))],
9353 ))),
9354 Expression::number(0),
9355 ],
9356 )));
9357
9358 // COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(s, p)[1:occ - 1], x -> LENGTH(x))), 0)
9359 let extract_sliced =
9360 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
9361 this: Expression::Function(Box::new(Function::new(
9362 "REGEXP_EXTRACT_ALL".to_string(),
9363 vec![effective_subject.clone(), effective_pattern.clone()],
9364 ))),
9365 start: Some(Expression::number(1)),
9366 end: Some(Expression::Sub(Box::new(BinaryOp::new(
9367 occurrence_expr.clone(),
9368 Expression::number(1),
9369 )))),
9370 }));
9371 let extract_sum = Expression::Function(Box::new(Function::new(
9372 "COALESCE".to_string(),
9373 vec![
9374 Expression::Function(Box::new(Function::new(
9375 "LIST_SUM".to_string(),
9376 vec![Expression::Function(Box::new(Function::new(
9377 "LIST_TRANSFORM".to_string(),
9378 vec![extract_sliced, make_len_lambda()],
9379 )))],
9380 ))),
9381 Expression::number(0),
9382 ],
9383 )));
9384
9385 // Position offset: pos - 1 when pos > 1, else 0
9386 let pos_offset: Expression = if !is_pos_1 {
9387 let pos = position.clone().unwrap_or(Expression::number(1));
9388 Expression::Sub(Box::new(BinaryOp::new(pos, Expression::number(1))))
9389 } else {
9390 Expression::number(0)
9391 };
9392
9393 // ELSE: 1 + split_sum + extract_sum + pos_offset
9394 let else_expr = Expression::Add(Box::new(BinaryOp::new(
9395 Expression::Add(Box::new(BinaryOp::new(
9396 Expression::Add(Box::new(BinaryOp::new(
9397 Expression::number(1),
9398 split_sum,
9399 ))),
9400 extract_sum,
9401 ))),
9402 pos_offset,
9403 )));
9404
9405 Ok(Expression::Case(Box::new(Case {
9406 operand: None,
9407 whens: vec![
9408 (null_condition, Expression::Null(Null)),
9409 (empty_pattern_check, Expression::number(0)),
9410 (match_count_check, Expression::number(0)),
9411 ],
9412 else_: Some(else_expr),
9413 comments: vec![],
9414 inferred_type: None,
9415 })))
9416 } else {
9417 Ok(e)
9418 }
9419 }
9420
9421 Action::RlikeSnowflakeToDuckDB => {
9422 // Snowflake RLIKE(a, b[, flags]) -> DuckDB REGEXP_FULL_MATCH(a, b[, flags])
9423 // Both do full-string matching, so no anchoring needed
9424 let (subject, pattern, flags) = match e {
9425 Expression::RegexpLike(ref rl) => {
9426 (rl.this.clone(), rl.pattern.clone(), rl.flags.clone())
9427 }
9428 Expression::Function(ref f) if f.args.len() >= 2 => {
9429 let s = f.args[0].clone();
9430 let p = f.args[1].clone();
9431 let fl = f.args.get(2).cloned();
9432 (s, p, fl)
9433 }
9434 _ => return Ok(e),
9435 };
9436
9437 let mut result_args = vec![subject, pattern];
9438 if let Some(fl) = flags {
9439 result_args.push(fl);
9440 }
9441 Ok(Expression::Function(Box::new(Function::new(
9442 "REGEXP_FULL_MATCH".to_string(),
9443 result_args,
9444 ))))
9445 }
9446
9447 Action::RegexpExtractAllToSnowflake => {
9448 // BigQuery REGEXP_EXTRACT_ALL(s, p) -> Snowflake REGEXP_SUBSTR_ALL(s, p)
9449 // With capture group: REGEXP_SUBSTR_ALL(s, p, 1, 1, 'c', 1)
9450 if let Expression::Function(f) = e {
9451 let mut args = f.args;
9452 if args.len() >= 2 {
9453 let str_expr = args.remove(0);
9454 let pattern = args.remove(0);
9455
9456 let has_groups = match &pattern {
9457 Expression::Literal(lit)
9458 if matches!(lit.as_ref(), Literal::String(_)) =>
9459 {
9460 let Literal::String(s) = lit.as_ref() else {
9461 unreachable!()
9462 };
9463 s.contains('(') && s.contains(')')
9464 }
9465 _ => false,
9466 };
9467
9468 if has_groups {
9469 Ok(Expression::Function(Box::new(Function::new(
9470 "REGEXP_SUBSTR_ALL".to_string(),
9471 vec![
9472 str_expr,
9473 pattern,
9474 Expression::number(1),
9475 Expression::number(1),
9476 Expression::Literal(Box::new(Literal::String(
9477 "c".to_string(),
9478 ))),
9479 Expression::number(1),
9480 ],
9481 ))))
9482 } else {
9483 Ok(Expression::Function(Box::new(Function::new(
9484 "REGEXP_SUBSTR_ALL".to_string(),
9485 vec![str_expr, pattern],
9486 ))))
9487 }
9488 } else {
9489 Ok(Expression::Function(Box::new(Function::new(
9490 "REGEXP_SUBSTR_ALL".to_string(),
9491 args,
9492 ))))
9493 }
9494 } else {
9495 Ok(e)
9496 }
9497 }
9498
9499 Action::SetToVariable => {
9500 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
9501 if let Expression::SetStatement(mut s) = e {
9502 for item in &mut s.items {
9503 if item.kind.is_none() {
9504 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
9505 let already_variable = match &item.name {
9506 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
9507 _ => false,
9508 };
9509 if already_variable {
9510 // Extract the actual name and set kind
9511 if let Expression::Identifier(ref mut id) = item.name {
9512 let actual_name = id.name["VARIABLE ".len()..].to_string();
9513 id.name = actual_name;
9514 }
9515 }
9516 item.kind = Some("VARIABLE".to_string());
9517 }
9518 }
9519 Ok(Expression::SetStatement(s))
9520 } else {
9521 Ok(e)
9522 }
9523 }
9524
9525 Action::ConvertTimezoneToExpr => {
9526 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
9527 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
9528 if let Expression::Function(f) = e {
9529 if f.args.len() == 2 {
9530 let mut args = f.args;
9531 let target_tz = args.remove(0);
9532 let timestamp = args.remove(0);
9533 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
9534 source_tz: None,
9535 target_tz: Some(Box::new(target_tz)),
9536 timestamp: Some(Box::new(timestamp)),
9537 options: vec![],
9538 })))
9539 } else if f.args.len() == 3 {
9540 let mut args = f.args;
9541 let source_tz = args.remove(0);
9542 let target_tz = args.remove(0);
9543 let timestamp = args.remove(0);
9544 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
9545 source_tz: Some(Box::new(source_tz)),
9546 target_tz: Some(Box::new(target_tz)),
9547 timestamp: Some(Box::new(timestamp)),
9548 options: vec![],
9549 })))
9550 } else {
9551 Ok(Expression::Function(f))
9552 }
9553 } else {
9554 Ok(e)
9555 }
9556 }
9557
9558 Action::BigQueryCastType => {
9559 // Convert BigQuery types to standard SQL types
9560 if let Expression::DataType(dt) = e {
9561 match dt {
9562 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
9563 Ok(Expression::DataType(DataType::BigInt { length: None }))
9564 }
9565 DataType::Custom { ref name }
9566 if name.eq_ignore_ascii_case("FLOAT64") =>
9567 {
9568 Ok(Expression::DataType(DataType::Double {
9569 precision: None,
9570 scale: None,
9571 }))
9572 }
9573 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
9574 Ok(Expression::DataType(DataType::Boolean))
9575 }
9576 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
9577 Ok(Expression::DataType(DataType::VarBinary { length: None }))
9578 }
9579 DataType::Custom { ref name }
9580 if name.eq_ignore_ascii_case("NUMERIC") =>
9581 {
9582 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
9583 // default precision (18, 3) being added to bare DECIMAL
9584 if matches!(target, DialectType::DuckDB) {
9585 Ok(Expression::DataType(DataType::Custom {
9586 name: "DECIMAL".to_string(),
9587 }))
9588 } else {
9589 Ok(Expression::DataType(DataType::Decimal {
9590 precision: None,
9591 scale: None,
9592 }))
9593 }
9594 }
9595 DataType::Custom { ref name }
9596 if name.eq_ignore_ascii_case("STRING") =>
9597 {
9598 Ok(Expression::DataType(DataType::String { length: None }))
9599 }
9600 DataType::Custom { ref name }
9601 if name.eq_ignore_ascii_case("DATETIME") =>
9602 {
9603 Ok(Expression::DataType(DataType::Timestamp {
9604 precision: None,
9605 timezone: false,
9606 }))
9607 }
9608 _ => Ok(Expression::DataType(dt)),
9609 }
9610 } else {
9611 Ok(e)
9612 }
9613 }
9614
9615 Action::BigQuerySafeDivide => {
9616 // Convert SafeDivide expression to IF/CASE form for most targets
9617 if let Expression::SafeDivide(sd) = e {
9618 let x = *sd.this;
9619 let y = *sd.expression;
9620 // Wrap x and y in parens if they're complex expressions
9621 let y_ref = match &y {
9622 Expression::Column(_)
9623 | Expression::Literal(_)
9624 | Expression::Identifier(_) => y.clone(),
9625 _ => Expression::Paren(Box::new(Paren {
9626 this: y.clone(),
9627 trailing_comments: vec![],
9628 })),
9629 };
9630 let x_ref = match &x {
9631 Expression::Column(_)
9632 | Expression::Literal(_)
9633 | Expression::Identifier(_) => x.clone(),
9634 _ => Expression::Paren(Box::new(Paren {
9635 this: x.clone(),
9636 trailing_comments: vec![],
9637 })),
9638 };
9639 let condition = Expression::Neq(Box::new(BinaryOp::new(
9640 y_ref.clone(),
9641 Expression::number(0),
9642 )));
9643 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
9644
9645 if matches!(target, DialectType::Presto | DialectType::Trino) {
9646 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
9647 let cast_x = Expression::Cast(Box::new(Cast {
9648 this: match &x {
9649 Expression::Column(_)
9650 | Expression::Literal(_)
9651 | Expression::Identifier(_) => x,
9652 _ => Expression::Paren(Box::new(Paren {
9653 this: x,
9654 trailing_comments: vec![],
9655 })),
9656 },
9657 to: DataType::Double {
9658 precision: None,
9659 scale: None,
9660 },
9661 trailing_comments: vec![],
9662 double_colon_syntax: false,
9663 format: None,
9664 default: None,
9665 inferred_type: None,
9666 }));
9667 let cast_div = Expression::Div(Box::new(BinaryOp::new(
9668 cast_x,
9669 match &y {
9670 Expression::Column(_)
9671 | Expression::Literal(_)
9672 | Expression::Identifier(_) => y,
9673 _ => Expression::Paren(Box::new(Paren {
9674 this: y,
9675 trailing_comments: vec![],
9676 })),
9677 },
9678 )));
9679 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9680 condition,
9681 true_value: cast_div,
9682 false_value: Some(Expression::Null(Null)),
9683 original_name: None,
9684 inferred_type: None,
9685 })))
9686 } else if matches!(target, DialectType::PostgreSQL) {
9687 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
9688 let cast_x = Expression::Cast(Box::new(Cast {
9689 this: match &x {
9690 Expression::Column(_)
9691 | Expression::Literal(_)
9692 | Expression::Identifier(_) => x,
9693 _ => Expression::Paren(Box::new(Paren {
9694 this: x,
9695 trailing_comments: vec![],
9696 })),
9697 },
9698 to: DataType::Custom {
9699 name: "DOUBLE PRECISION".to_string(),
9700 },
9701 trailing_comments: vec![],
9702 double_colon_syntax: false,
9703 format: None,
9704 default: None,
9705 inferred_type: None,
9706 }));
9707 let y_paren = match &y {
9708 Expression::Column(_)
9709 | Expression::Literal(_)
9710 | Expression::Identifier(_) => y,
9711 _ => Expression::Paren(Box::new(Paren {
9712 this: y,
9713 trailing_comments: vec![],
9714 })),
9715 };
9716 let cast_div =
9717 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
9718 Ok(Expression::Case(Box::new(Case {
9719 operand: None,
9720 whens: vec![(condition, cast_div)],
9721 else_: Some(Expression::Null(Null)),
9722 comments: Vec::new(),
9723 inferred_type: None,
9724 })))
9725 } else if matches!(target, DialectType::DuckDB) {
9726 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
9727 Ok(Expression::Case(Box::new(Case {
9728 operand: None,
9729 whens: vec![(condition, div_expr)],
9730 else_: Some(Expression::Null(Null)),
9731 comments: Vec::new(),
9732 inferred_type: None,
9733 })))
9734 } else if matches!(target, DialectType::Snowflake) {
9735 // Snowflake: IFF(y <> 0, x / y, NULL)
9736 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9737 condition,
9738 true_value: div_expr,
9739 false_value: Some(Expression::Null(Null)),
9740 original_name: Some("IFF".to_string()),
9741 inferred_type: None,
9742 })))
9743 } else {
9744 // All others: IF(y <> 0, x / y, NULL)
9745 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9746 condition,
9747 true_value: div_expr,
9748 false_value: Some(Expression::Null(Null)),
9749 original_name: None,
9750 inferred_type: None,
9751 })))
9752 }
9753 } else {
9754 Ok(e)
9755 }
9756 }
9757
9758 Action::BigQueryLastDayStripUnit => {
9759 if let Expression::LastDay(mut ld) = e {
9760 ld.unit = None; // Strip the unit (MONTH is default)
9761 match target {
9762 DialectType::PostgreSQL => {
9763 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
9764 let date_trunc = Expression::Function(Box::new(Function::new(
9765 "DATE_TRUNC".to_string(),
9766 vec![
9767 Expression::Literal(Box::new(
9768 crate::expressions::Literal::String(
9769 "MONTH".to_string(),
9770 ),
9771 )),
9772 ld.this.clone(),
9773 ],
9774 )));
9775 let plus_month =
9776 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9777 date_trunc,
9778 Expression::Interval(Box::new(
9779 crate::expressions::Interval {
9780 this: Some(Expression::Literal(Box::new(
9781 crate::expressions::Literal::String(
9782 "1 MONTH".to_string(),
9783 ),
9784 ))),
9785 unit: None,
9786 },
9787 )),
9788 )));
9789 let minus_day =
9790 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
9791 plus_month,
9792 Expression::Interval(Box::new(
9793 crate::expressions::Interval {
9794 this: Some(Expression::Literal(Box::new(
9795 crate::expressions::Literal::String(
9796 "1 DAY".to_string(),
9797 ),
9798 ))),
9799 unit: None,
9800 },
9801 )),
9802 )));
9803 Ok(Expression::Cast(Box::new(Cast {
9804 this: minus_day,
9805 to: DataType::Date,
9806 trailing_comments: vec![],
9807 double_colon_syntax: false,
9808 format: None,
9809 default: None,
9810 inferred_type: None,
9811 })))
9812 }
9813 DialectType::Presto => {
9814 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
9815 Ok(Expression::Function(Box::new(Function::new(
9816 "LAST_DAY_OF_MONTH".to_string(),
9817 vec![ld.this],
9818 ))))
9819 }
9820 DialectType::ClickHouse => {
9821 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
9822 // Need to wrap the DATE type in Nullable
9823 let nullable_date = match ld.this {
9824 Expression::Cast(mut c) => {
9825 c.to = DataType::Nullable {
9826 inner: Box::new(DataType::Date),
9827 };
9828 Expression::Cast(c)
9829 }
9830 other => other,
9831 };
9832 ld.this = nullable_date;
9833 Ok(Expression::LastDay(ld))
9834 }
9835 _ => Ok(Expression::LastDay(ld)),
9836 }
9837 } else {
9838 Ok(e)
9839 }
9840 }
9841
9842 Action::BigQueryCastFormat => {
9843 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
9844 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
9845 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
9846 let (this, to, format_expr, is_safe) = match e {
9847 Expression::Cast(ref c) if c.format.is_some() => (
9848 c.this.clone(),
9849 c.to.clone(),
9850 c.format.as_ref().unwrap().as_ref().clone(),
9851 false,
9852 ),
9853 Expression::SafeCast(ref c) if c.format.is_some() => (
9854 c.this.clone(),
9855 c.to.clone(),
9856 c.format.as_ref().unwrap().as_ref().clone(),
9857 true,
9858 ),
9859 _ => return Ok(e),
9860 };
9861 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
9862 if matches!(target, DialectType::BigQuery) {
9863 match &to {
9864 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
9865 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
9866 return Ok(e);
9867 }
9868 _ => {}
9869 }
9870 }
9871 // Extract timezone from format if AT TIME ZONE is present
9872 let (actual_format_expr, timezone) = match &format_expr {
9873 Expression::AtTimeZone(ref atz) => {
9874 (atz.this.clone(), Some(atz.zone.clone()))
9875 }
9876 _ => (format_expr.clone(), None),
9877 };
9878 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
9879 match target {
9880 DialectType::BigQuery => {
9881 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
9882 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
9883 let func_name = match &to {
9884 DataType::Date => "PARSE_DATE",
9885 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
9886 DataType::Time { .. } => "PARSE_TIMESTAMP",
9887 _ => "PARSE_TIMESTAMP",
9888 };
9889 let mut func_args = vec![strftime_fmt, this];
9890 if let Some(tz) = timezone {
9891 func_args.push(tz);
9892 }
9893 Ok(Expression::Function(Box::new(Function::new(
9894 func_name.to_string(),
9895 func_args,
9896 ))))
9897 }
9898 DialectType::DuckDB => {
9899 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
9900 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
9901 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
9902 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
9903 let parse_call = Expression::Function(Box::new(Function::new(
9904 parse_fn_name.to_string(),
9905 vec![this, duck_fmt],
9906 )));
9907 Ok(Expression::Cast(Box::new(Cast {
9908 this: parse_call,
9909 to,
9910 trailing_comments: vec![],
9911 double_colon_syntax: false,
9912 format: None,
9913 default: None,
9914 inferred_type: None,
9915 })))
9916 }
9917 _ => Ok(e),
9918 }
9919 }
9920
9921 Action::BigQueryFunctionNormalize => {
9922 Self::normalize_bigquery_function(e, source, target)
9923 }
9924
9925 Action::BigQueryToHexBare => {
9926 // Not used anymore - handled directly in normalize_bigquery_function
9927 Ok(e)
9928 }
9929
9930 Action::BigQueryToHexLower => {
9931 if let Expression::Lower(uf) = e {
9932 match uf.this {
9933 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
9934 Expression::Function(f)
9935 if matches!(target, DialectType::BigQuery)
9936 && f.name == "TO_HEX" =>
9937 {
9938 Ok(Expression::Function(f))
9939 }
9940 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
9941 Expression::Lower(inner_uf) => {
9942 if matches!(target, DialectType::BigQuery) {
9943 // BQ->BQ: extract TO_HEX
9944 if let Expression::Function(f) = inner_uf.this {
9945 Ok(Expression::Function(Box::new(Function::new(
9946 "TO_HEX".to_string(),
9947 f.args,
9948 ))))
9949 } else {
9950 Ok(Expression::Lower(inner_uf))
9951 }
9952 } else {
9953 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
9954 Ok(Expression::Lower(inner_uf))
9955 }
9956 }
9957 other => {
9958 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
9959 this: other,
9960 original_name: None,
9961 inferred_type: None,
9962 })))
9963 }
9964 }
9965 } else {
9966 Ok(e)
9967 }
9968 }
9969
9970 Action::BigQueryToHexUpper => {
9971 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
9972 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
9973 if let Expression::Upper(uf) = e {
9974 if let Expression::Lower(inner_uf) = uf.this {
9975 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
9976 if matches!(target, DialectType::BigQuery) {
9977 // Restore TO_HEX name in inner function
9978 if let Expression::Function(f) = inner_uf.this {
9979 let restored = Expression::Function(Box::new(Function::new(
9980 "TO_HEX".to_string(),
9981 f.args,
9982 )));
9983 Ok(Expression::Upper(Box::new(
9984 crate::expressions::UnaryFunc::new(restored),
9985 )))
9986 } else {
9987 Ok(Expression::Upper(inner_uf))
9988 }
9989 } else {
9990 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
9991 Ok(inner_uf.this)
9992 }
9993 } else {
9994 Ok(Expression::Upper(uf))
9995 }
9996 } else {
9997 Ok(e)
9998 }
9999 }
10000
10001 Action::BigQueryAnyValueHaving => {
10002 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
10003 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
10004 if let Expression::AnyValue(agg) = e {
10005 if let Some((having_expr, is_max)) = agg.having_max {
10006 let func_name = if is_max {
10007 "ARG_MAX_NULL"
10008 } else {
10009 "ARG_MIN_NULL"
10010 };
10011 Ok(Expression::Function(Box::new(Function::new(
10012 func_name.to_string(),
10013 vec![agg.this, *having_expr],
10014 ))))
10015 } else {
10016 Ok(Expression::AnyValue(agg))
10017 }
10018 } else {
10019 Ok(e)
10020 }
10021 }
10022
10023 Action::BigQueryApproxQuantiles => {
10024 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
10025 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
10026 if let Expression::AggregateFunction(agg) = e {
10027 if agg.args.len() >= 2 {
10028 let x_expr = agg.args[0].clone();
10029 let n_expr = &agg.args[1];
10030
10031 // Extract the numeric value from n_expr
10032 let n = match n_expr {
10033 Expression::Literal(lit)
10034 if matches!(
10035 lit.as_ref(),
10036 crate::expressions::Literal::Number(_)
10037 ) =>
10038 {
10039 let crate::expressions::Literal::Number(s) = lit.as_ref()
10040 else {
10041 unreachable!()
10042 };
10043 s.parse::<usize>().unwrap_or(2)
10044 }
10045 _ => 2,
10046 };
10047
10048 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
10049 let mut quantiles = Vec::new();
10050 for i in 0..=n {
10051 let q = i as f64 / n as f64;
10052 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
10053 if q == 0.0 {
10054 quantiles.push(Expression::number(0));
10055 } else if q == 1.0 {
10056 quantiles.push(Expression::number(1));
10057 } else {
10058 quantiles.push(Expression::Literal(Box::new(
10059 crate::expressions::Literal::Number(format!("{}", q)),
10060 )));
10061 }
10062 }
10063
10064 let array_expr =
10065 Expression::Array(Box::new(crate::expressions::Array {
10066 expressions: quantiles,
10067 }));
10068
10069 // Preserve DISTINCT modifier
10070 let mut new_func = Function::new(
10071 "APPROX_QUANTILE".to_string(),
10072 vec![x_expr, array_expr],
10073 );
10074 new_func.distinct = agg.distinct;
10075 Ok(Expression::Function(Box::new(new_func)))
10076 } else {
10077 Ok(Expression::AggregateFunction(agg))
10078 }
10079 } else {
10080 Ok(e)
10081 }
10082 }
10083
10084 Action::GenericFunctionNormalize => {
10085 // Helper closure to convert ARBITRARY to target-specific function
10086 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
10087 let name = match target {
10088 DialectType::ClickHouse => "any",
10089 DialectType::TSQL | DialectType::SQLite => "MAX",
10090 DialectType::Hive => "FIRST",
10091 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10092 "ARBITRARY"
10093 }
10094 _ => "ANY_VALUE",
10095 };
10096 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
10097 }
10098
10099 if let Expression::Function(f) = e {
10100 let name = f.name.to_ascii_uppercase();
10101 match name.as_str() {
10102 "ARBITRARY" if f.args.len() == 1 => {
10103 let arg = f.args.into_iter().next().unwrap();
10104 Ok(convert_arbitrary(arg, target))
10105 }
10106 "TO_NUMBER" if f.args.len() == 1 => {
10107 let arg = f.args.into_iter().next().unwrap();
10108 match target {
10109 DialectType::Oracle | DialectType::Snowflake => {
10110 Ok(Expression::Function(Box::new(Function::new(
10111 "TO_NUMBER".to_string(),
10112 vec![arg],
10113 ))))
10114 }
10115 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
10116 this: arg,
10117 to: crate::expressions::DataType::Double {
10118 precision: None,
10119 scale: None,
10120 },
10121 double_colon_syntax: false,
10122 trailing_comments: Vec::new(),
10123 format: None,
10124 default: None,
10125 inferred_type: None,
10126 }))),
10127 }
10128 }
10129 "AGGREGATE" if f.args.len() >= 3 => match target {
10130 DialectType::DuckDB
10131 | DialectType::Hive
10132 | DialectType::Presto
10133 | DialectType::Trino => Ok(Expression::Function(Box::new(
10134 Function::new("REDUCE".to_string(), f.args),
10135 ))),
10136 _ => Ok(Expression::Function(f)),
10137 },
10138 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep as-is for DuckDB
10139 "REGEXP_MATCHES" if f.args.len() >= 2 => {
10140 if matches!(target, DialectType::DuckDB) {
10141 Ok(Expression::Function(f))
10142 } else {
10143 let mut args = f.args;
10144 let this = args.remove(0);
10145 let pattern = args.remove(0);
10146 let flags = if args.is_empty() {
10147 None
10148 } else {
10149 Some(args.remove(0))
10150 };
10151 Ok(Expression::RegexpLike(Box::new(
10152 crate::expressions::RegexpFunc {
10153 this,
10154 pattern,
10155 flags,
10156 },
10157 )))
10158 }
10159 }
10160 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
10161 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
10162 if matches!(target, DialectType::DuckDB) {
10163 Ok(Expression::Function(f))
10164 } else {
10165 let mut args = f.args;
10166 let this = args.remove(0);
10167 let pattern = args.remove(0);
10168 let flags = if args.is_empty() {
10169 None
10170 } else {
10171 Some(args.remove(0))
10172 };
10173 Ok(Expression::RegexpLike(Box::new(
10174 crate::expressions::RegexpFunc {
10175 this,
10176 pattern,
10177 flags,
10178 },
10179 )))
10180 }
10181 }
10182 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
10183 "STRUCT_EXTRACT" if f.args.len() == 2 => {
10184 let mut args = f.args;
10185 let this = args.remove(0);
10186 let field_expr = args.remove(0);
10187 // Extract string literal to get field name
10188 let field_name = match &field_expr {
10189 Expression::Literal(lit)
10190 if matches!(
10191 lit.as_ref(),
10192 crate::expressions::Literal::String(_)
10193 ) =>
10194 {
10195 let crate::expressions::Literal::String(s) = lit.as_ref()
10196 else {
10197 unreachable!()
10198 };
10199 s.clone()
10200 }
10201 Expression::Identifier(id) => id.name.clone(),
10202 _ => {
10203 return Ok(Expression::Function(Box::new(Function::new(
10204 "STRUCT_EXTRACT".to_string(),
10205 vec![this, field_expr],
10206 ))))
10207 }
10208 };
10209 Ok(Expression::StructExtract(Box::new(
10210 crate::expressions::StructExtractFunc {
10211 this,
10212 field: crate::expressions::Identifier::new(field_name),
10213 },
10214 )))
10215 }
10216 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
10217 "LIST_FILTER" if f.args.len() == 2 => {
10218 let name = match target {
10219 DialectType::DuckDB => "LIST_FILTER",
10220 _ => "FILTER",
10221 };
10222 Ok(Expression::Function(Box::new(Function::new(
10223 name.to_string(),
10224 f.args,
10225 ))))
10226 }
10227 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
10228 "LIST_TRANSFORM" if f.args.len() == 2 => {
10229 let name = match target {
10230 DialectType::DuckDB => "LIST_TRANSFORM",
10231 _ => "TRANSFORM",
10232 };
10233 Ok(Expression::Function(Box::new(Function::new(
10234 name.to_string(),
10235 f.args,
10236 ))))
10237 }
10238 // LIST_SORT(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, SORT_ARRAY(x) for others
10239 "LIST_SORT" if f.args.len() >= 1 => {
10240 let name = match target {
10241 DialectType::DuckDB => "LIST_SORT",
10242 DialectType::Presto | DialectType::Trino => "ARRAY_SORT",
10243 _ => "SORT_ARRAY",
10244 };
10245 Ok(Expression::Function(Box::new(Function::new(
10246 name.to_string(),
10247 f.args,
10248 ))))
10249 }
10250 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
10251 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
10252 match target {
10253 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10254 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
10255 ))),
10256 DialectType::Spark
10257 | DialectType::Databricks
10258 | DialectType::Hive => {
10259 let mut args = f.args;
10260 args.push(Expression::Identifier(
10261 crate::expressions::Identifier::new("FALSE"),
10262 ));
10263 Ok(Expression::Function(Box::new(Function::new(
10264 "SORT_ARRAY".to_string(),
10265 args,
10266 ))))
10267 }
10268 DialectType::Presto
10269 | DialectType::Trino
10270 | DialectType::Athena => {
10271 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
10272 let arr = f.args.into_iter().next().unwrap();
10273 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
10274 parameters: vec![
10275 crate::expressions::Identifier::new("a"),
10276 crate::expressions::Identifier::new("b"),
10277 ],
10278 body: Expression::Case(Box::new(Case {
10279 operand: None,
10280 whens: vec![
10281 (
10282 Expression::Lt(Box::new(BinaryOp::new(
10283 Expression::Identifier(crate::expressions::Identifier::new("a")),
10284 Expression::Identifier(crate::expressions::Identifier::new("b")),
10285 ))),
10286 Expression::number(1),
10287 ),
10288 (
10289 Expression::Gt(Box::new(BinaryOp::new(
10290 Expression::Identifier(crate::expressions::Identifier::new("a")),
10291 Expression::Identifier(crate::expressions::Identifier::new("b")),
10292 ))),
10293 Expression::Literal(Box::new(Literal::Number("-1".to_string()))),
10294 ),
10295 ],
10296 else_: Some(Expression::number(0)),
10297 comments: Vec::new(),
10298 inferred_type: None,
10299 })),
10300 colon: false,
10301 parameter_types: Vec::new(),
10302 }));
10303 Ok(Expression::Function(Box::new(Function::new(
10304 "ARRAY_SORT".to_string(),
10305 vec![arr, lambda],
10306 ))))
10307 }
10308 _ => Ok(Expression::Function(Box::new(Function::new(
10309 "LIST_REVERSE_SORT".to_string(),
10310 f.args,
10311 )))),
10312 }
10313 }
10314 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
10315 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
10316 let mut args = f.args;
10317 args.push(Expression::string(","));
10318 let name = match target {
10319 DialectType::DuckDB => "STR_SPLIT",
10320 DialectType::Presto | DialectType::Trino => "SPLIT",
10321 DialectType::Spark
10322 | DialectType::Databricks
10323 | DialectType::Hive => "SPLIT",
10324 DialectType::PostgreSQL => "STRING_TO_ARRAY",
10325 DialectType::Redshift => "SPLIT_TO_ARRAY",
10326 _ => "SPLIT",
10327 };
10328 Ok(Expression::Function(Box::new(Function::new(
10329 name.to_string(),
10330 args,
10331 ))))
10332 }
10333 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
10334 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
10335 let name = match target {
10336 DialectType::DuckDB => "STR_SPLIT",
10337 DialectType::Presto | DialectType::Trino => "SPLIT",
10338 DialectType::Spark
10339 | DialectType::Databricks
10340 | DialectType::Hive => "SPLIT",
10341 DialectType::PostgreSQL => "STRING_TO_ARRAY",
10342 DialectType::Redshift => "SPLIT_TO_ARRAY",
10343 _ => "SPLIT",
10344 };
10345 Ok(Expression::Function(Box::new(Function::new(
10346 name.to_string(),
10347 f.args,
10348 ))))
10349 }
10350 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
10351 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
10352 let name = match target {
10353 DialectType::DuckDB => "STR_SPLIT",
10354 DialectType::Presto | DialectType::Trino => "SPLIT",
10355 DialectType::Spark
10356 | DialectType::Databricks
10357 | DialectType::Hive => "SPLIT",
10358 DialectType::Doris | DialectType::StarRocks => {
10359 "SPLIT_BY_STRING"
10360 }
10361 DialectType::PostgreSQL | DialectType::Redshift => {
10362 "STRING_TO_ARRAY"
10363 }
10364 _ => "SPLIT",
10365 };
10366 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
10367 if matches!(
10368 target,
10369 DialectType::Spark
10370 | DialectType::Databricks
10371 | DialectType::Hive
10372 ) {
10373 let mut args = f.args;
10374 let x = args.remove(0);
10375 let sep = args.remove(0);
10376 // Wrap separator in CONCAT('\\Q', sep, '\\E')
10377 let escaped_sep =
10378 Expression::Function(Box::new(Function::new(
10379 "CONCAT".to_string(),
10380 vec![
10381 Expression::string("\\Q"),
10382 sep,
10383 Expression::string("\\E"),
10384 ],
10385 )));
10386 Ok(Expression::Function(Box::new(Function::new(
10387 name.to_string(),
10388 vec![x, escaped_sep],
10389 ))))
10390 } else {
10391 Ok(Expression::Function(Box::new(Function::new(
10392 name.to_string(),
10393 f.args,
10394 ))))
10395 }
10396 }
10397 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
10398 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
10399 let name = match target {
10400 DialectType::DuckDB => "STR_SPLIT_REGEX",
10401 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
10402 DialectType::Spark
10403 | DialectType::Databricks
10404 | DialectType::Hive => "SPLIT",
10405 _ => "REGEXP_SPLIT",
10406 };
10407 Ok(Expression::Function(Box::new(Function::new(
10408 name.to_string(),
10409 f.args,
10410 ))))
10411 }
10412 // SPLIT(str, delim) from Snowflake -> DuckDB with CASE wrapper
10413 "SPLIT"
10414 if f.args.len() == 2
10415 && matches!(source, DialectType::Snowflake)
10416 && matches!(target, DialectType::DuckDB) =>
10417 {
10418 let mut args = f.args;
10419 let str_arg = args.remove(0);
10420 let delim_arg = args.remove(0);
10421
10422 // STR_SPLIT(str, delim) as the base
10423 let base_func = Expression::Function(Box::new(Function::new(
10424 "STR_SPLIT".to_string(),
10425 vec![str_arg.clone(), delim_arg.clone()],
10426 )));
10427
10428 // [str] - array with single element
10429 let array_with_input =
10430 Expression::Array(Box::new(crate::expressions::Array {
10431 expressions: vec![str_arg],
10432 }));
10433
10434 // CASE
10435 // WHEN delim IS NULL THEN NULL
10436 // WHEN delim = '' THEN [str]
10437 // ELSE STR_SPLIT(str, delim)
10438 // END
10439 Ok(Expression::Case(Box::new(Case {
10440 operand: None,
10441 whens: vec![
10442 (
10443 Expression::Is(Box::new(BinaryOp {
10444 left: delim_arg.clone(),
10445 right: Expression::Null(Null),
10446 left_comments: vec![],
10447 operator_comments: vec![],
10448 trailing_comments: vec![],
10449 inferred_type: None,
10450 })),
10451 Expression::Null(Null),
10452 ),
10453 (
10454 Expression::Eq(Box::new(BinaryOp {
10455 left: delim_arg,
10456 right: Expression::string(""),
10457 left_comments: vec![],
10458 operator_comments: vec![],
10459 trailing_comments: vec![],
10460 inferred_type: None,
10461 })),
10462 array_with_input,
10463 ),
10464 ],
10465 else_: Some(base_func),
10466 comments: vec![],
10467 inferred_type: None,
10468 })))
10469 }
10470 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
10471 "SPLIT"
10472 if f.args.len() == 2
10473 && matches!(
10474 source,
10475 DialectType::Presto
10476 | DialectType::Trino
10477 | DialectType::Athena
10478 | DialectType::StarRocks
10479 | DialectType::Doris
10480 )
10481 && matches!(
10482 target,
10483 DialectType::Spark
10484 | DialectType::Databricks
10485 | DialectType::Hive
10486 ) =>
10487 {
10488 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
10489 let mut args = f.args;
10490 let x = args.remove(0);
10491 let sep = args.remove(0);
10492 let escaped_sep = Expression::Function(Box::new(Function::new(
10493 "CONCAT".to_string(),
10494 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
10495 )));
10496 Ok(Expression::Function(Box::new(Function::new(
10497 "SPLIT".to_string(),
10498 vec![x, escaped_sep],
10499 ))))
10500 }
10501 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
10502 // For ClickHouse target, preserve original name to maintain camelCase
10503 "SUBSTRINGINDEX" => {
10504 let name = if matches!(target, DialectType::ClickHouse) {
10505 f.name.clone()
10506 } else {
10507 "SUBSTRING_INDEX".to_string()
10508 };
10509 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
10510 }
10511 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
10512 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
10513 // DuckDB source CARDINALITY -> DuckDB target: keep as CARDINALITY (used for maps)
10514 if name == "CARDINALITY"
10515 && matches!(source, DialectType::DuckDB)
10516 && matches!(target, DialectType::DuckDB)
10517 {
10518 return Ok(Expression::Function(f));
10519 }
10520 // Get the array argument (first arg, drop dimension args)
10521 let mut args = f.args;
10522 let arr = if args.is_empty() {
10523 return Ok(Expression::Function(Box::new(Function::new(
10524 name.to_string(),
10525 args,
10526 ))));
10527 } else {
10528 args.remove(0)
10529 };
10530 let name =
10531 match target {
10532 DialectType::Spark
10533 | DialectType::Databricks
10534 | DialectType::Hive => "SIZE",
10535 DialectType::Presto | DialectType::Trino => "CARDINALITY",
10536 DialectType::BigQuery => "ARRAY_LENGTH",
10537 DialectType::DuckDB => {
10538 // DuckDB: use ARRAY_LENGTH with all args
10539 let mut all_args = vec![arr];
10540 all_args.extend(args);
10541 return Ok(Expression::Function(Box::new(
10542 Function::new("ARRAY_LENGTH".to_string(), all_args),
10543 )));
10544 }
10545 DialectType::PostgreSQL | DialectType::Redshift => {
10546 // Keep ARRAY_LENGTH with dimension arg
10547 let mut all_args = vec![arr];
10548 all_args.extend(args);
10549 return Ok(Expression::Function(Box::new(
10550 Function::new("ARRAY_LENGTH".to_string(), all_args),
10551 )));
10552 }
10553 DialectType::ClickHouse => "LENGTH",
10554 _ => "ARRAY_LENGTH",
10555 };
10556 Ok(Expression::Function(Box::new(Function::new(
10557 name.to_string(),
10558 vec![arr],
10559 ))))
10560 }
10561 // TO_VARIANT(x) -> CAST(x AS VARIANT) for DuckDB
10562 "TO_VARIANT" if f.args.len() == 1 => match target {
10563 DialectType::DuckDB => {
10564 let arg = f.args.into_iter().next().unwrap();
10565 Ok(Expression::Cast(Box::new(Cast {
10566 this: arg,
10567 to: DataType::Custom {
10568 name: "VARIANT".to_string(),
10569 },
10570 double_colon_syntax: false,
10571 trailing_comments: Vec::new(),
10572 format: None,
10573 default: None,
10574 inferred_type: None,
10575 })))
10576 }
10577 _ => Ok(Expression::Function(f)),
10578 },
10579 // JSON_GROUP_ARRAY(x) -> JSON_AGG(x) for PostgreSQL
10580 "JSON_GROUP_ARRAY" if f.args.len() == 1 => match target {
10581 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10582 Function::new("JSON_AGG".to_string(), f.args),
10583 ))),
10584 _ => Ok(Expression::Function(f)),
10585 },
10586 // JSON_GROUP_OBJECT(key, value) -> JSON_OBJECT_AGG(key, value) for PostgreSQL
10587 "JSON_GROUP_OBJECT" if f.args.len() == 2 => match target {
10588 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
10589 Function::new("JSON_OBJECT_AGG".to_string(), f.args),
10590 ))),
10591 _ => Ok(Expression::Function(f)),
10592 },
10593 // UNICODE(x) -> target-specific codepoint function
10594 "UNICODE" if f.args.len() == 1 => {
10595 match target {
10596 DialectType::SQLite | DialectType::DuckDB => {
10597 Ok(Expression::Function(Box::new(Function::new(
10598 "UNICODE".to_string(),
10599 f.args,
10600 ))))
10601 }
10602 DialectType::Oracle => {
10603 // ASCII(UNISTR(x))
10604 let inner = Expression::Function(Box::new(Function::new(
10605 "UNISTR".to_string(),
10606 f.args,
10607 )));
10608 Ok(Expression::Function(Box::new(Function::new(
10609 "ASCII".to_string(),
10610 vec![inner],
10611 ))))
10612 }
10613 DialectType::MySQL => {
10614 // ORD(CONVERT(x USING utf32))
10615 let arg = f.args.into_iter().next().unwrap();
10616 let convert_expr = Expression::ConvertToCharset(Box::new(
10617 crate::expressions::ConvertToCharset {
10618 this: Box::new(arg),
10619 dest: Some(Box::new(Expression::Identifier(
10620 crate::expressions::Identifier::new("utf32"),
10621 ))),
10622 source: None,
10623 },
10624 ));
10625 Ok(Expression::Function(Box::new(Function::new(
10626 "ORD".to_string(),
10627 vec![convert_expr],
10628 ))))
10629 }
10630 _ => Ok(Expression::Function(Box::new(Function::new(
10631 "ASCII".to_string(),
10632 f.args,
10633 )))),
10634 }
10635 }
10636 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
10637 "XOR" if f.args.len() >= 2 => {
10638 match target {
10639 DialectType::ClickHouse => {
10640 // ClickHouse: keep as xor() function with lowercase name
10641 Ok(Expression::Function(Box::new(Function::new(
10642 "xor".to_string(),
10643 f.args,
10644 ))))
10645 }
10646 DialectType::Presto | DialectType::Trino => {
10647 if f.args.len() == 2 {
10648 Ok(Expression::Function(Box::new(Function::new(
10649 "BITWISE_XOR".to_string(),
10650 f.args,
10651 ))))
10652 } else {
10653 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
10654 let mut args = f.args;
10655 let first = args.remove(0);
10656 let second = args.remove(0);
10657 let mut result =
10658 Expression::Function(Box::new(Function::new(
10659 "BITWISE_XOR".to_string(),
10660 vec![first, second],
10661 )));
10662 for arg in args {
10663 result =
10664 Expression::Function(Box::new(Function::new(
10665 "BITWISE_XOR".to_string(),
10666 vec![result, arg],
10667 )));
10668 }
10669 Ok(result)
10670 }
10671 }
10672 DialectType::MySQL
10673 | DialectType::SingleStore
10674 | DialectType::Doris
10675 | DialectType::StarRocks => {
10676 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
10677 let args = f.args;
10678 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
10679 this: None,
10680 expression: None,
10681 expressions: args,
10682 })))
10683 }
10684 DialectType::PostgreSQL | DialectType::Redshift => {
10685 // PostgreSQL: a # b (hash operator for XOR)
10686 let mut args = f.args;
10687 let first = args.remove(0);
10688 let second = args.remove(0);
10689 let mut result = Expression::BitwiseXor(Box::new(
10690 BinaryOp::new(first, second),
10691 ));
10692 for arg in args {
10693 result = Expression::BitwiseXor(Box::new(
10694 BinaryOp::new(result, arg),
10695 ));
10696 }
10697 Ok(result)
10698 }
10699 DialectType::DuckDB => {
10700 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
10701 Ok(Expression::Function(Box::new(Function::new(
10702 "XOR".to_string(),
10703 f.args,
10704 ))))
10705 }
10706 DialectType::BigQuery => {
10707 // BigQuery: a ^ b (caret operator for XOR)
10708 let mut args = f.args;
10709 let first = args.remove(0);
10710 let second = args.remove(0);
10711 let mut result = Expression::BitwiseXor(Box::new(
10712 BinaryOp::new(first, second),
10713 ));
10714 for arg in args {
10715 result = Expression::BitwiseXor(Box::new(
10716 BinaryOp::new(result, arg),
10717 ));
10718 }
10719 Ok(result)
10720 }
10721 _ => Ok(Expression::Function(Box::new(Function::new(
10722 "XOR".to_string(),
10723 f.args,
10724 )))),
10725 }
10726 }
10727 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
10728 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
10729 match target {
10730 DialectType::Spark
10731 | DialectType::Databricks
10732 | DialectType::Hive => {
10733 let mut args = f.args;
10734 args.push(Expression::Identifier(
10735 crate::expressions::Identifier::new("FALSE"),
10736 ));
10737 Ok(Expression::Function(Box::new(Function::new(
10738 "SORT_ARRAY".to_string(),
10739 args,
10740 ))))
10741 }
10742 DialectType::Presto
10743 | DialectType::Trino
10744 | DialectType::Athena => {
10745 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
10746 let arr = f.args.into_iter().next().unwrap();
10747 let lambda = Expression::Lambda(Box::new(
10748 crate::expressions::LambdaExpr {
10749 parameters: vec![
10750 Identifier::new("a"),
10751 Identifier::new("b"),
10752 ],
10753 colon: false,
10754 parameter_types: Vec::new(),
10755 body: Expression::Case(Box::new(Case {
10756 operand: None,
10757 whens: vec![
10758 (
10759 Expression::Lt(Box::new(
10760 BinaryOp::new(
10761 Expression::Identifier(
10762 Identifier::new("a"),
10763 ),
10764 Expression::Identifier(
10765 Identifier::new("b"),
10766 ),
10767 ),
10768 )),
10769 Expression::number(1),
10770 ),
10771 (
10772 Expression::Gt(Box::new(
10773 BinaryOp::new(
10774 Expression::Identifier(
10775 Identifier::new("a"),
10776 ),
10777 Expression::Identifier(
10778 Identifier::new("b"),
10779 ),
10780 ),
10781 )),
10782 Expression::Neg(Box::new(
10783 crate::expressions::UnaryOp {
10784 this: Expression::number(1),
10785 inferred_type: None,
10786 },
10787 )),
10788 ),
10789 ],
10790 else_: Some(Expression::number(0)),
10791 comments: Vec::new(),
10792 inferred_type: None,
10793 })),
10794 },
10795 ));
10796 Ok(Expression::Function(Box::new(Function::new(
10797 "ARRAY_SORT".to_string(),
10798 vec![arr, lambda],
10799 ))))
10800 }
10801 _ => Ok(Expression::Function(Box::new(Function::new(
10802 "ARRAY_REVERSE_SORT".to_string(),
10803 f.args,
10804 )))),
10805 }
10806 }
10807 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
10808 "ENCODE" if f.args.len() == 1 => match target {
10809 DialectType::Spark
10810 | DialectType::Databricks
10811 | DialectType::Hive => {
10812 let mut args = f.args;
10813 args.push(Expression::string("utf-8"));
10814 Ok(Expression::Function(Box::new(Function::new(
10815 "ENCODE".to_string(),
10816 args,
10817 ))))
10818 }
10819 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10820 Ok(Expression::Function(Box::new(Function::new(
10821 "TO_UTF8".to_string(),
10822 f.args,
10823 ))))
10824 }
10825 _ => Ok(Expression::Function(Box::new(Function::new(
10826 "ENCODE".to_string(),
10827 f.args,
10828 )))),
10829 },
10830 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
10831 "DECODE" if f.args.len() == 1 => match target {
10832 DialectType::Spark
10833 | DialectType::Databricks
10834 | DialectType::Hive => {
10835 let mut args = f.args;
10836 args.push(Expression::string("utf-8"));
10837 Ok(Expression::Function(Box::new(Function::new(
10838 "DECODE".to_string(),
10839 args,
10840 ))))
10841 }
10842 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10843 Ok(Expression::Function(Box::new(Function::new(
10844 "FROM_UTF8".to_string(),
10845 f.args,
10846 ))))
10847 }
10848 _ => Ok(Expression::Function(Box::new(Function::new(
10849 "DECODE".to_string(),
10850 f.args,
10851 )))),
10852 },
10853 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
10854 "QUANTILE" if f.args.len() == 2 => {
10855 let name = match target {
10856 DialectType::Spark
10857 | DialectType::Databricks
10858 | DialectType::Hive => "PERCENTILE",
10859 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
10860 DialectType::BigQuery => "PERCENTILE_CONT",
10861 _ => "QUANTILE",
10862 };
10863 Ok(Expression::Function(Box::new(Function::new(
10864 name.to_string(),
10865 f.args,
10866 ))))
10867 }
10868 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
10869 "QUANTILE_CONT" if f.args.len() == 2 => {
10870 let mut args = f.args;
10871 let column = args.remove(0);
10872 let quantile = args.remove(0);
10873 match target {
10874 DialectType::DuckDB => {
10875 Ok(Expression::Function(Box::new(Function::new(
10876 "QUANTILE_CONT".to_string(),
10877 vec![column, quantile],
10878 ))))
10879 }
10880 DialectType::PostgreSQL
10881 | DialectType::Redshift
10882 | DialectType::Snowflake => {
10883 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
10884 let inner = Expression::PercentileCont(Box::new(
10885 crate::expressions::PercentileFunc {
10886 this: column.clone(),
10887 percentile: quantile,
10888 order_by: None,
10889 filter: None,
10890 },
10891 ));
10892 Ok(Expression::WithinGroup(Box::new(
10893 crate::expressions::WithinGroup {
10894 this: inner,
10895 order_by: vec![crate::expressions::Ordered {
10896 this: column,
10897 desc: false,
10898 nulls_first: None,
10899 explicit_asc: false,
10900 with_fill: None,
10901 }],
10902 },
10903 )))
10904 }
10905 _ => Ok(Expression::Function(Box::new(Function::new(
10906 "QUANTILE_CONT".to_string(),
10907 vec![column, quantile],
10908 )))),
10909 }
10910 }
10911 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
10912 "QUANTILE_DISC" if f.args.len() == 2 => {
10913 let mut args = f.args;
10914 let column = args.remove(0);
10915 let quantile = args.remove(0);
10916 match target {
10917 DialectType::DuckDB => {
10918 Ok(Expression::Function(Box::new(Function::new(
10919 "QUANTILE_DISC".to_string(),
10920 vec![column, quantile],
10921 ))))
10922 }
10923 DialectType::PostgreSQL
10924 | DialectType::Redshift
10925 | DialectType::Snowflake => {
10926 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
10927 let inner = Expression::PercentileDisc(Box::new(
10928 crate::expressions::PercentileFunc {
10929 this: column.clone(),
10930 percentile: quantile,
10931 order_by: None,
10932 filter: None,
10933 },
10934 ));
10935 Ok(Expression::WithinGroup(Box::new(
10936 crate::expressions::WithinGroup {
10937 this: inner,
10938 order_by: vec![crate::expressions::Ordered {
10939 this: column,
10940 desc: false,
10941 nulls_first: None,
10942 explicit_asc: false,
10943 with_fill: None,
10944 }],
10945 },
10946 )))
10947 }
10948 _ => Ok(Expression::Function(Box::new(Function::new(
10949 "QUANTILE_DISC".to_string(),
10950 vec![column, quantile],
10951 )))),
10952 }
10953 }
10954 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
10955 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
10956 let name = match target {
10957 DialectType::Presto
10958 | DialectType::Trino
10959 | DialectType::Athena => "APPROX_PERCENTILE",
10960 DialectType::Spark
10961 | DialectType::Databricks
10962 | DialectType::Hive => "PERCENTILE_APPROX",
10963 DialectType::DuckDB => "APPROX_QUANTILE",
10964 DialectType::PostgreSQL | DialectType::Redshift => {
10965 "PERCENTILE_CONT"
10966 }
10967 _ => &f.name,
10968 };
10969 Ok(Expression::Function(Box::new(Function::new(
10970 name.to_string(),
10971 f.args,
10972 ))))
10973 }
10974 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
10975 "EPOCH" if f.args.len() == 1 => {
10976 let name = match target {
10977 DialectType::Spark
10978 | DialectType::Databricks
10979 | DialectType::Hive => "UNIX_TIMESTAMP",
10980 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
10981 _ => "EPOCH",
10982 };
10983 Ok(Expression::Function(Box::new(Function::new(
10984 name.to_string(),
10985 f.args,
10986 ))))
10987 }
10988 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
10989 "EPOCH_MS" if f.args.len() == 1 => {
10990 match target {
10991 DialectType::Spark | DialectType::Databricks => {
10992 Ok(Expression::Function(Box::new(Function::new(
10993 "TIMESTAMP_MILLIS".to_string(),
10994 f.args,
10995 ))))
10996 }
10997 DialectType::Hive => {
10998 // Hive: FROM_UNIXTIME(x / 1000)
10999 let arg = f.args.into_iter().next().unwrap();
11000 let div_expr = Expression::Div(Box::new(
11001 crate::expressions::BinaryOp::new(
11002 arg,
11003 Expression::number(1000),
11004 ),
11005 ));
11006 Ok(Expression::Function(Box::new(Function::new(
11007 "FROM_UNIXTIME".to_string(),
11008 vec![div_expr],
11009 ))))
11010 }
11011 DialectType::Presto | DialectType::Trino => {
11012 Ok(Expression::Function(Box::new(Function::new(
11013 "FROM_UNIXTIME".to_string(),
11014 vec![Expression::Div(Box::new(
11015 crate::expressions::BinaryOp::new(
11016 f.args.into_iter().next().unwrap(),
11017 Expression::number(1000),
11018 ),
11019 ))],
11020 ))))
11021 }
11022 _ => Ok(Expression::Function(Box::new(Function::new(
11023 "EPOCH_MS".to_string(),
11024 f.args,
11025 )))),
11026 }
11027 }
11028 // HASHBYTES('algorithm', x) -> target-specific hash function
11029 "HASHBYTES" if f.args.len() == 2 => {
11030 // Keep HASHBYTES as-is for TSQL target
11031 if matches!(target, DialectType::TSQL) {
11032 return Ok(Expression::Function(f));
11033 }
11034 let algo_expr = &f.args[0];
11035 let algo = match algo_expr {
11036 Expression::Literal(lit)
11037 if matches!(
11038 lit.as_ref(),
11039 crate::expressions::Literal::String(_)
11040 ) =>
11041 {
11042 let crate::expressions::Literal::String(s) = lit.as_ref()
11043 else {
11044 unreachable!()
11045 };
11046 s.to_ascii_uppercase()
11047 }
11048 _ => return Ok(Expression::Function(f)),
11049 };
11050 let data_arg = f.args.into_iter().nth(1).unwrap();
11051 match algo.as_str() {
11052 "SHA1" => {
11053 let name = match target {
11054 DialectType::Spark | DialectType::Databricks => "SHA",
11055 DialectType::Hive => "SHA1",
11056 _ => "SHA1",
11057 };
11058 Ok(Expression::Function(Box::new(Function::new(
11059 name.to_string(),
11060 vec![data_arg],
11061 ))))
11062 }
11063 "SHA2_256" => {
11064 Ok(Expression::Function(Box::new(Function::new(
11065 "SHA2".to_string(),
11066 vec![data_arg, Expression::number(256)],
11067 ))))
11068 }
11069 "SHA2_512" => {
11070 Ok(Expression::Function(Box::new(Function::new(
11071 "SHA2".to_string(),
11072 vec![data_arg, Expression::number(512)],
11073 ))))
11074 }
11075 "MD5" => Ok(Expression::Function(Box::new(Function::new(
11076 "MD5".to_string(),
11077 vec![data_arg],
11078 )))),
11079 _ => Ok(Expression::Function(Box::new(Function::new(
11080 "HASHBYTES".to_string(),
11081 vec![Expression::string(&algo), data_arg],
11082 )))),
11083 }
11084 }
11085 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
11086 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
11087 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
11088 let mut args = f.args;
11089 let json_expr = args.remove(0);
11090 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
11091 let mut json_path = "$".to_string();
11092 for a in &args {
11093 match a {
11094 Expression::Literal(lit)
11095 if matches!(
11096 lit.as_ref(),
11097 crate::expressions::Literal::String(_)
11098 ) =>
11099 {
11100 let crate::expressions::Literal::String(s) =
11101 lit.as_ref()
11102 else {
11103 unreachable!()
11104 };
11105 // Numeric string keys become array indices: [0]
11106 if s.chars().all(|c| c.is_ascii_digit()) {
11107 json_path.push('[');
11108 json_path.push_str(s);
11109 json_path.push(']');
11110 } else {
11111 json_path.push('.');
11112 json_path.push_str(s);
11113 }
11114 }
11115 _ => {
11116 json_path.push_str(".?");
11117 }
11118 }
11119 }
11120 match target {
11121 DialectType::Spark
11122 | DialectType::Databricks
11123 | DialectType::Hive => {
11124 Ok(Expression::Function(Box::new(Function::new(
11125 "GET_JSON_OBJECT".to_string(),
11126 vec![json_expr, Expression::string(&json_path)],
11127 ))))
11128 }
11129 DialectType::Presto | DialectType::Trino => {
11130 let func_name = if is_text {
11131 "JSON_EXTRACT_SCALAR"
11132 } else {
11133 "JSON_EXTRACT"
11134 };
11135 Ok(Expression::Function(Box::new(Function::new(
11136 func_name.to_string(),
11137 vec![json_expr, Expression::string(&json_path)],
11138 ))))
11139 }
11140 DialectType::BigQuery | DialectType::MySQL => {
11141 let func_name = if is_text {
11142 "JSON_EXTRACT_SCALAR"
11143 } else {
11144 "JSON_EXTRACT"
11145 };
11146 Ok(Expression::Function(Box::new(Function::new(
11147 func_name.to_string(),
11148 vec![json_expr, Expression::string(&json_path)],
11149 ))))
11150 }
11151 DialectType::PostgreSQL | DialectType::Materialize => {
11152 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
11153 let func_name = if is_text {
11154 "JSON_EXTRACT_PATH_TEXT"
11155 } else {
11156 "JSON_EXTRACT_PATH"
11157 };
11158 let mut new_args = vec![json_expr];
11159 new_args.extend(args);
11160 Ok(Expression::Function(Box::new(Function::new(
11161 func_name.to_string(),
11162 new_args,
11163 ))))
11164 }
11165 DialectType::DuckDB | DialectType::SQLite => {
11166 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
11167 if is_text {
11168 Ok(Expression::JsonExtractScalar(Box::new(
11169 crate::expressions::JsonExtractFunc {
11170 this: json_expr,
11171 path: Expression::string(&json_path),
11172 returning: None,
11173 arrow_syntax: true,
11174 hash_arrow_syntax: false,
11175 wrapper_option: None,
11176 quotes_option: None,
11177 on_scalar_string: false,
11178 on_error: None,
11179 },
11180 )))
11181 } else {
11182 Ok(Expression::JsonExtract(Box::new(
11183 crate::expressions::JsonExtractFunc {
11184 this: json_expr,
11185 path: Expression::string(&json_path),
11186 returning: None,
11187 arrow_syntax: true,
11188 hash_arrow_syntax: false,
11189 wrapper_option: None,
11190 quotes_option: None,
11191 on_scalar_string: false,
11192 on_error: None,
11193 },
11194 )))
11195 }
11196 }
11197 DialectType::Redshift => {
11198 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
11199 let mut new_args = vec![json_expr];
11200 new_args.extend(args);
11201 Ok(Expression::Function(Box::new(Function::new(
11202 "JSON_EXTRACT_PATH_TEXT".to_string(),
11203 new_args,
11204 ))))
11205 }
11206 DialectType::TSQL => {
11207 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
11208 let jq = Expression::Function(Box::new(Function::new(
11209 "JSON_QUERY".to_string(),
11210 vec![json_expr.clone(), Expression::string(&json_path)],
11211 )));
11212 let jv = Expression::Function(Box::new(Function::new(
11213 "JSON_VALUE".to_string(),
11214 vec![json_expr, Expression::string(&json_path)],
11215 )));
11216 Ok(Expression::Function(Box::new(Function::new(
11217 "ISNULL".to_string(),
11218 vec![jq, jv],
11219 ))))
11220 }
11221 DialectType::ClickHouse => {
11222 let func_name = if is_text {
11223 "JSONExtractString"
11224 } else {
11225 "JSONExtractRaw"
11226 };
11227 let mut new_args = vec![json_expr];
11228 new_args.extend(args);
11229 Ok(Expression::Function(Box::new(Function::new(
11230 func_name.to_string(),
11231 new_args,
11232 ))))
11233 }
11234 _ => {
11235 let func_name = if is_text {
11236 "JSON_EXTRACT_SCALAR"
11237 } else {
11238 "JSON_EXTRACT"
11239 };
11240 Ok(Expression::Function(Box::new(Function::new(
11241 func_name.to_string(),
11242 vec![json_expr, Expression::string(&json_path)],
11243 ))))
11244 }
11245 }
11246 }
11247 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
11248 "APPROX_DISTINCT" if f.args.len() >= 1 => {
11249 let name = match target {
11250 DialectType::Spark
11251 | DialectType::Databricks
11252 | DialectType::Hive
11253 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
11254 _ => "APPROX_DISTINCT",
11255 };
11256 let mut args = f.args;
11257 // Hive doesn't support the accuracy parameter
11258 if name == "APPROX_COUNT_DISTINCT"
11259 && matches!(target, DialectType::Hive)
11260 {
11261 args.truncate(1);
11262 }
11263 Ok(Expression::Function(Box::new(Function::new(
11264 name.to_string(),
11265 args,
11266 ))))
11267 }
11268 // REGEXP_EXTRACT(x, pattern) - normalize default group index
11269 "REGEXP_EXTRACT" if f.args.len() == 2 => {
11270 // Determine source default group index
11271 let source_default = match source {
11272 DialectType::Presto
11273 | DialectType::Trino
11274 | DialectType::DuckDB => 0,
11275 _ => 1, // Hive/Spark/Databricks default = 1
11276 };
11277 // Determine target default group index
11278 let target_default = match target {
11279 DialectType::Presto
11280 | DialectType::Trino
11281 | DialectType::DuckDB
11282 | DialectType::BigQuery => 0,
11283 DialectType::Snowflake => {
11284 // Snowflake uses REGEXP_SUBSTR
11285 return Ok(Expression::Function(Box::new(Function::new(
11286 "REGEXP_SUBSTR".to_string(),
11287 f.args,
11288 ))));
11289 }
11290 _ => 1, // Hive/Spark/Databricks default = 1
11291 };
11292 if source_default != target_default {
11293 let mut args = f.args;
11294 args.push(Expression::number(source_default));
11295 Ok(Expression::Function(Box::new(Function::new(
11296 "REGEXP_EXTRACT".to_string(),
11297 args,
11298 ))))
11299 } else {
11300 Ok(Expression::Function(Box::new(Function::new(
11301 "REGEXP_EXTRACT".to_string(),
11302 f.args,
11303 ))))
11304 }
11305 }
11306 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
11307 "RLIKE" if f.args.len() == 2 => {
11308 let mut args = f.args;
11309 let str_expr = args.remove(0);
11310 let pattern = args.remove(0);
11311 match target {
11312 DialectType::DuckDB => {
11313 // REGEXP_MATCHES(str, pattern)
11314 Ok(Expression::Function(Box::new(Function::new(
11315 "REGEXP_MATCHES".to_string(),
11316 vec![str_expr, pattern],
11317 ))))
11318 }
11319 _ => {
11320 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
11321 Ok(Expression::RegexpLike(Box::new(
11322 crate::expressions::RegexpFunc {
11323 this: str_expr,
11324 pattern,
11325 flags: None,
11326 },
11327 )))
11328 }
11329 }
11330 }
11331 // EOMONTH(date[, month_offset]) -> target-specific
11332 "EOMONTH" if f.args.len() >= 1 => {
11333 let mut args = f.args;
11334 let date_arg = args.remove(0);
11335 let month_offset = if !args.is_empty() {
11336 Some(args.remove(0))
11337 } else {
11338 None
11339 };
11340
11341 // Helper: wrap date in CAST to DATE
11342 let cast_to_date = |e: Expression| -> Expression {
11343 Expression::Cast(Box::new(Cast {
11344 this: e,
11345 to: DataType::Date,
11346 trailing_comments: vec![],
11347 double_colon_syntax: false,
11348 format: None,
11349 default: None,
11350 inferred_type: None,
11351 }))
11352 };
11353
11354 match target {
11355 DialectType::TSQL | DialectType::Fabric => {
11356 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
11357 let date = cast_to_date(date_arg);
11358 let date = if let Some(offset) = month_offset {
11359 Expression::Function(Box::new(Function::new(
11360 "DATEADD".to_string(),
11361 vec![
11362 Expression::Identifier(Identifier::new(
11363 "MONTH",
11364 )),
11365 offset,
11366 date,
11367 ],
11368 )))
11369 } else {
11370 date
11371 };
11372 Ok(Expression::Function(Box::new(Function::new(
11373 "EOMONTH".to_string(),
11374 vec![date],
11375 ))))
11376 }
11377 DialectType::Presto
11378 | DialectType::Trino
11379 | DialectType::Athena => {
11380 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
11381 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
11382 let cast_ts = Expression::Cast(Box::new(Cast {
11383 this: date_arg,
11384 to: DataType::Timestamp {
11385 timezone: false,
11386 precision: None,
11387 },
11388 trailing_comments: vec![],
11389 double_colon_syntax: false,
11390 format: None,
11391 default: None,
11392 inferred_type: None,
11393 }));
11394 let date = cast_to_date(cast_ts);
11395 let date = if let Some(offset) = month_offset {
11396 Expression::Function(Box::new(Function::new(
11397 "DATE_ADD".to_string(),
11398 vec![Expression::string("MONTH"), offset, date],
11399 )))
11400 } else {
11401 date
11402 };
11403 Ok(Expression::Function(Box::new(Function::new(
11404 "LAST_DAY_OF_MONTH".to_string(),
11405 vec![date],
11406 ))))
11407 }
11408 DialectType::PostgreSQL => {
11409 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
11410 let date = cast_to_date(date_arg);
11411 let date = if let Some(offset) = month_offset {
11412 let interval_str = format!(
11413 "{} MONTH",
11414 Self::expr_to_string_static(&offset)
11415 );
11416 Expression::Add(Box::new(
11417 crate::expressions::BinaryOp::new(
11418 date,
11419 Expression::Interval(Box::new(
11420 crate::expressions::Interval {
11421 this: Some(Expression::string(
11422 &interval_str,
11423 )),
11424 unit: None,
11425 },
11426 )),
11427 ),
11428 ))
11429 } else {
11430 date
11431 };
11432 let truncated =
11433 Expression::Function(Box::new(Function::new(
11434 "DATE_TRUNC".to_string(),
11435 vec![Expression::string("MONTH"), date],
11436 )));
11437 let plus_month = Expression::Add(Box::new(
11438 crate::expressions::BinaryOp::new(
11439 truncated,
11440 Expression::Interval(Box::new(
11441 crate::expressions::Interval {
11442 this: Some(Expression::string("1 MONTH")),
11443 unit: None,
11444 },
11445 )),
11446 ),
11447 ));
11448 let minus_day = Expression::Sub(Box::new(
11449 crate::expressions::BinaryOp::new(
11450 plus_month,
11451 Expression::Interval(Box::new(
11452 crate::expressions::Interval {
11453 this: Some(Expression::string("1 DAY")),
11454 unit: None,
11455 },
11456 )),
11457 ),
11458 ));
11459 Ok(Expression::Cast(Box::new(Cast {
11460 this: minus_day,
11461 to: DataType::Date,
11462 trailing_comments: vec![],
11463 double_colon_syntax: false,
11464 format: None,
11465 default: None,
11466 inferred_type: None,
11467 })))
11468 }
11469 DialectType::DuckDB => {
11470 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
11471 let date = cast_to_date(date_arg);
11472 let date = if let Some(offset) = month_offset {
11473 // Wrap negative numbers in parentheses for DuckDB INTERVAL
11474 let interval_val =
11475 if matches!(&offset, Expression::Neg(_)) {
11476 Expression::Paren(Box::new(
11477 crate::expressions::Paren {
11478 this: offset,
11479 trailing_comments: Vec::new(),
11480 },
11481 ))
11482 } else {
11483 offset
11484 };
11485 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
11486 date,
11487 Expression::Interval(Box::new(crate::expressions::Interval {
11488 this: Some(interval_val),
11489 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
11490 unit: crate::expressions::IntervalUnit::Month,
11491 use_plural: false,
11492 }),
11493 })),
11494 )))
11495 } else {
11496 date
11497 };
11498 Ok(Expression::Function(Box::new(Function::new(
11499 "LAST_DAY".to_string(),
11500 vec![date],
11501 ))))
11502 }
11503 DialectType::Snowflake | DialectType::Redshift => {
11504 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
11505 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
11506 let date = if matches!(target, DialectType::Snowflake) {
11507 Expression::Function(Box::new(Function::new(
11508 "TO_DATE".to_string(),
11509 vec![date_arg],
11510 )))
11511 } else {
11512 cast_to_date(date_arg)
11513 };
11514 let date = if let Some(offset) = month_offset {
11515 Expression::Function(Box::new(Function::new(
11516 "DATEADD".to_string(),
11517 vec![
11518 Expression::Identifier(Identifier::new(
11519 "MONTH",
11520 )),
11521 offset,
11522 date,
11523 ],
11524 )))
11525 } else {
11526 date
11527 };
11528 Ok(Expression::Function(Box::new(Function::new(
11529 "LAST_DAY".to_string(),
11530 vec![date],
11531 ))))
11532 }
11533 DialectType::Spark | DialectType::Databricks => {
11534 // Spark: LAST_DAY(TO_DATE(date))
11535 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
11536 let date = Expression::Function(Box::new(Function::new(
11537 "TO_DATE".to_string(),
11538 vec![date_arg],
11539 )));
11540 let date = if let Some(offset) = month_offset {
11541 Expression::Function(Box::new(Function::new(
11542 "ADD_MONTHS".to_string(),
11543 vec![date, offset],
11544 )))
11545 } else {
11546 date
11547 };
11548 Ok(Expression::Function(Box::new(Function::new(
11549 "LAST_DAY".to_string(),
11550 vec![date],
11551 ))))
11552 }
11553 DialectType::MySQL => {
11554 // MySQL: LAST_DAY(DATE(date)) - no offset
11555 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
11556 let date = if let Some(offset) = month_offset {
11557 let iu = crate::expressions::IntervalUnit::Month;
11558 Expression::DateAdd(Box::new(
11559 crate::expressions::DateAddFunc {
11560 this: date_arg,
11561 interval: offset,
11562 unit: iu,
11563 },
11564 ))
11565 } else {
11566 Expression::Function(Box::new(Function::new(
11567 "DATE".to_string(),
11568 vec![date_arg],
11569 )))
11570 };
11571 Ok(Expression::Function(Box::new(Function::new(
11572 "LAST_DAY".to_string(),
11573 vec![date],
11574 ))))
11575 }
11576 DialectType::BigQuery => {
11577 // BigQuery: LAST_DAY(CAST(date AS DATE))
11578 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
11579 let date = cast_to_date(date_arg);
11580 let date = if let Some(offset) = month_offset {
11581 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
11582 this: Some(offset),
11583 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
11584 unit: crate::expressions::IntervalUnit::Month,
11585 use_plural: false,
11586 }),
11587 }));
11588 Expression::Function(Box::new(Function::new(
11589 "DATE_ADD".to_string(),
11590 vec![date, interval],
11591 )))
11592 } else {
11593 date
11594 };
11595 Ok(Expression::Function(Box::new(Function::new(
11596 "LAST_DAY".to_string(),
11597 vec![date],
11598 ))))
11599 }
11600 DialectType::ClickHouse => {
11601 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
11602 let date = Expression::Cast(Box::new(Cast {
11603 this: date_arg,
11604 to: DataType::Nullable {
11605 inner: Box::new(DataType::Date),
11606 },
11607 trailing_comments: vec![],
11608 double_colon_syntax: false,
11609 format: None,
11610 default: None,
11611 inferred_type: None,
11612 }));
11613 let date = if let Some(offset) = month_offset {
11614 Expression::Function(Box::new(Function::new(
11615 "DATE_ADD".to_string(),
11616 vec![
11617 Expression::Identifier(Identifier::new(
11618 "MONTH",
11619 )),
11620 offset,
11621 date,
11622 ],
11623 )))
11624 } else {
11625 date
11626 };
11627 Ok(Expression::Function(Box::new(Function::new(
11628 "LAST_DAY".to_string(),
11629 vec![date],
11630 ))))
11631 }
11632 DialectType::Hive => {
11633 // Hive: LAST_DAY(date)
11634 let date = if let Some(offset) = month_offset {
11635 Expression::Function(Box::new(Function::new(
11636 "ADD_MONTHS".to_string(),
11637 vec![date_arg, offset],
11638 )))
11639 } else {
11640 date_arg
11641 };
11642 Ok(Expression::Function(Box::new(Function::new(
11643 "LAST_DAY".to_string(),
11644 vec![date],
11645 ))))
11646 }
11647 _ => {
11648 // Default: LAST_DAY(date)
11649 let date = if let Some(offset) = month_offset {
11650 let unit =
11651 Expression::Identifier(Identifier::new("MONTH"));
11652 Expression::Function(Box::new(Function::new(
11653 "DATEADD".to_string(),
11654 vec![unit, offset, date_arg],
11655 )))
11656 } else {
11657 date_arg
11658 };
11659 Ok(Expression::Function(Box::new(Function::new(
11660 "LAST_DAY".to_string(),
11661 vec![date],
11662 ))))
11663 }
11664 }
11665 }
11666 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
11667 "LAST_DAY" | "LAST_DAY_OF_MONTH"
11668 if !matches!(source, DialectType::BigQuery)
11669 && f.args.len() >= 1 =>
11670 {
11671 let first_arg = f.args.into_iter().next().unwrap();
11672 match target {
11673 DialectType::TSQL | DialectType::Fabric => {
11674 Ok(Expression::Function(Box::new(Function::new(
11675 "EOMONTH".to_string(),
11676 vec![first_arg],
11677 ))))
11678 }
11679 DialectType::Presto
11680 | DialectType::Trino
11681 | DialectType::Athena => {
11682 Ok(Expression::Function(Box::new(Function::new(
11683 "LAST_DAY_OF_MONTH".to_string(),
11684 vec![first_arg],
11685 ))))
11686 }
11687 _ => Ok(Expression::Function(Box::new(Function::new(
11688 "LAST_DAY".to_string(),
11689 vec![first_arg],
11690 )))),
11691 }
11692 }
11693 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
11694 "MAP"
11695 if f.args.len() == 2
11696 && matches!(
11697 source,
11698 DialectType::Presto
11699 | DialectType::Trino
11700 | DialectType::Athena
11701 ) =>
11702 {
11703 let keys_arg = f.args[0].clone();
11704 let vals_arg = f.args[1].clone();
11705
11706 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
11707 fn extract_array_elements(
11708 expr: &Expression,
11709 ) -> Option<&Vec<Expression>> {
11710 match expr {
11711 Expression::Array(arr) => Some(&arr.expressions),
11712 Expression::ArrayFunc(arr) => Some(&arr.expressions),
11713 Expression::Function(f)
11714 if f.name.eq_ignore_ascii_case("ARRAY") =>
11715 {
11716 Some(&f.args)
11717 }
11718 _ => None,
11719 }
11720 }
11721
11722 match target {
11723 DialectType::Spark | DialectType::Databricks => {
11724 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
11725 Ok(Expression::Function(Box::new(Function::new(
11726 "MAP_FROM_ARRAYS".to_string(),
11727 f.args,
11728 ))))
11729 }
11730 DialectType::Hive => {
11731 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
11732 if let (Some(keys), Some(vals)) = (
11733 extract_array_elements(&keys_arg),
11734 extract_array_elements(&vals_arg),
11735 ) {
11736 if keys.len() == vals.len() {
11737 let mut interleaved = Vec::new();
11738 for (k, v) in keys.iter().zip(vals.iter()) {
11739 interleaved.push(k.clone());
11740 interleaved.push(v.clone());
11741 }
11742 Ok(Expression::Function(Box::new(Function::new(
11743 "MAP".to_string(),
11744 interleaved,
11745 ))))
11746 } else {
11747 Ok(Expression::Function(Box::new(Function::new(
11748 "MAP".to_string(),
11749 f.args,
11750 ))))
11751 }
11752 } else {
11753 Ok(Expression::Function(Box::new(Function::new(
11754 "MAP".to_string(),
11755 f.args,
11756 ))))
11757 }
11758 }
11759 DialectType::Snowflake => {
11760 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
11761 if let (Some(keys), Some(vals)) = (
11762 extract_array_elements(&keys_arg),
11763 extract_array_elements(&vals_arg),
11764 ) {
11765 if keys.len() == vals.len() {
11766 let mut interleaved = Vec::new();
11767 for (k, v) in keys.iter().zip(vals.iter()) {
11768 interleaved.push(k.clone());
11769 interleaved.push(v.clone());
11770 }
11771 Ok(Expression::Function(Box::new(Function::new(
11772 "OBJECT_CONSTRUCT".to_string(),
11773 interleaved,
11774 ))))
11775 } else {
11776 Ok(Expression::Function(Box::new(Function::new(
11777 "MAP".to_string(),
11778 f.args,
11779 ))))
11780 }
11781 } else {
11782 Ok(Expression::Function(Box::new(Function::new(
11783 "MAP".to_string(),
11784 f.args,
11785 ))))
11786 }
11787 }
11788 _ => Ok(Expression::Function(f)),
11789 }
11790 }
11791 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
11792 "MAP"
11793 if f.args.is_empty()
11794 && matches!(
11795 source,
11796 DialectType::Hive
11797 | DialectType::Spark
11798 | DialectType::Databricks
11799 )
11800 && matches!(
11801 target,
11802 DialectType::Presto
11803 | DialectType::Trino
11804 | DialectType::Athena
11805 ) =>
11806 {
11807 let empty_keys =
11808 Expression::Array(Box::new(crate::expressions::Array {
11809 expressions: vec![],
11810 }));
11811 let empty_vals =
11812 Expression::Array(Box::new(crate::expressions::Array {
11813 expressions: vec![],
11814 }));
11815 Ok(Expression::Function(Box::new(Function::new(
11816 "MAP".to_string(),
11817 vec![empty_keys, empty_vals],
11818 ))))
11819 }
11820 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
11821 "MAP"
11822 if f.args.len() >= 2
11823 && f.args.len() % 2 == 0
11824 && matches!(
11825 source,
11826 DialectType::Hive
11827 | DialectType::Spark
11828 | DialectType::Databricks
11829 | DialectType::ClickHouse
11830 ) =>
11831 {
11832 let args = f.args;
11833 match target {
11834 DialectType::DuckDB => {
11835 // MAP([k1, k2], [v1, v2])
11836 let mut keys = Vec::new();
11837 let mut vals = Vec::new();
11838 for (i, arg) in args.into_iter().enumerate() {
11839 if i % 2 == 0 {
11840 keys.push(arg);
11841 } else {
11842 vals.push(arg);
11843 }
11844 }
11845 let keys_arr = Expression::Array(Box::new(
11846 crate::expressions::Array { expressions: keys },
11847 ));
11848 let vals_arr = Expression::Array(Box::new(
11849 crate::expressions::Array { expressions: vals },
11850 ));
11851 Ok(Expression::Function(Box::new(Function::new(
11852 "MAP".to_string(),
11853 vec![keys_arr, vals_arr],
11854 ))))
11855 }
11856 DialectType::Presto | DialectType::Trino => {
11857 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
11858 let mut keys = Vec::new();
11859 let mut vals = Vec::new();
11860 for (i, arg) in args.into_iter().enumerate() {
11861 if i % 2 == 0 {
11862 keys.push(arg);
11863 } else {
11864 vals.push(arg);
11865 }
11866 }
11867 let keys_arr = Expression::Array(Box::new(
11868 crate::expressions::Array { expressions: keys },
11869 ));
11870 let vals_arr = Expression::Array(Box::new(
11871 crate::expressions::Array { expressions: vals },
11872 ));
11873 Ok(Expression::Function(Box::new(Function::new(
11874 "MAP".to_string(),
11875 vec![keys_arr, vals_arr],
11876 ))))
11877 }
11878 DialectType::Snowflake => Ok(Expression::Function(Box::new(
11879 Function::new("OBJECT_CONSTRUCT".to_string(), args),
11880 ))),
11881 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
11882 Function::new("map".to_string(), args),
11883 ))),
11884 _ => Ok(Expression::Function(Box::new(Function::new(
11885 "MAP".to_string(),
11886 args,
11887 )))),
11888 }
11889 }
11890 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
11891 "COLLECT_LIST" if f.args.len() >= 1 => {
11892 let name = match target {
11893 DialectType::Spark
11894 | DialectType::Databricks
11895 | DialectType::Hive => "COLLECT_LIST",
11896 DialectType::DuckDB
11897 | DialectType::PostgreSQL
11898 | DialectType::Redshift
11899 | DialectType::Snowflake
11900 | DialectType::BigQuery => "ARRAY_AGG",
11901 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
11902 _ => "ARRAY_AGG",
11903 };
11904 Ok(Expression::Function(Box::new(Function::new(
11905 name.to_string(),
11906 f.args,
11907 ))))
11908 }
11909 // COLLECT_SET(x) -> target-specific distinct array aggregation
11910 "COLLECT_SET" if f.args.len() >= 1 => {
11911 let name = match target {
11912 DialectType::Spark
11913 | DialectType::Databricks
11914 | DialectType::Hive => "COLLECT_SET",
11915 DialectType::Presto
11916 | DialectType::Trino
11917 | DialectType::Athena => "SET_AGG",
11918 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
11919 _ => "ARRAY_AGG",
11920 };
11921 Ok(Expression::Function(Box::new(Function::new(
11922 name.to_string(),
11923 f.args,
11924 ))))
11925 }
11926 // ISNAN(x) / IS_NAN(x) - normalize
11927 "ISNAN" | "IS_NAN" => {
11928 let name = match target {
11929 DialectType::Spark
11930 | DialectType::Databricks
11931 | DialectType::Hive => "ISNAN",
11932 DialectType::Presto
11933 | DialectType::Trino
11934 | DialectType::Athena => "IS_NAN",
11935 DialectType::BigQuery
11936 | DialectType::PostgreSQL
11937 | DialectType::Redshift => "IS_NAN",
11938 DialectType::ClickHouse => "IS_NAN",
11939 _ => "ISNAN",
11940 };
11941 Ok(Expression::Function(Box::new(Function::new(
11942 name.to_string(),
11943 f.args,
11944 ))))
11945 }
11946 // SPLIT_PART(str, delim, index) -> target-specific
11947 "SPLIT_PART" if f.args.len() == 3 => {
11948 match target {
11949 DialectType::Spark | DialectType::Databricks => {
11950 // Keep as SPLIT_PART (Spark 3.4+)
11951 Ok(Expression::Function(Box::new(Function::new(
11952 "SPLIT_PART".to_string(),
11953 f.args,
11954 ))))
11955 }
11956 DialectType::DuckDB
11957 if matches!(source, DialectType::Snowflake) =>
11958 {
11959 // Snowflake SPLIT_PART -> DuckDB with CASE wrapper:
11960 // - part_index 0 treated as 1
11961 // - empty delimiter: return whole string if index 1 or -1, else ''
11962 let mut args = f.args;
11963 let str_arg = args.remove(0);
11964 let delim_arg = args.remove(0);
11965 let idx_arg = args.remove(0);
11966
11967 // (CASE WHEN idx = 0 THEN 1 ELSE idx END)
11968 let adjusted_idx = Expression::Paren(Box::new(Paren {
11969 this: Expression::Case(Box::new(Case {
11970 operand: None,
11971 whens: vec![(
11972 Expression::Eq(Box::new(BinaryOp {
11973 left: idx_arg.clone(),
11974 right: Expression::number(0),
11975 left_comments: vec![],
11976 operator_comments: vec![],
11977 trailing_comments: vec![],
11978 inferred_type: None,
11979 })),
11980 Expression::number(1),
11981 )],
11982 else_: Some(idx_arg.clone()),
11983 comments: vec![],
11984 inferred_type: None,
11985 })),
11986 trailing_comments: vec![],
11987 }));
11988
11989 // SPLIT_PART(str, delim, adjusted_idx)
11990 let base_func =
11991 Expression::Function(Box::new(Function::new(
11992 "SPLIT_PART".to_string(),
11993 vec![
11994 str_arg.clone(),
11995 delim_arg.clone(),
11996 adjusted_idx.clone(),
11997 ],
11998 )));
11999
12000 // (CASE WHEN adjusted_idx = 1 OR adjusted_idx = -1 THEN str ELSE '' END)
12001 let empty_delim_case = Expression::Paren(Box::new(Paren {
12002 this: Expression::Case(Box::new(Case {
12003 operand: None,
12004 whens: vec![(
12005 Expression::Or(Box::new(BinaryOp {
12006 left: Expression::Eq(Box::new(BinaryOp {
12007 left: adjusted_idx.clone(),
12008 right: Expression::number(1),
12009 left_comments: vec![],
12010 operator_comments: vec![],
12011 trailing_comments: vec![],
12012 inferred_type: None,
12013 })),
12014 right: Expression::Eq(Box::new(BinaryOp {
12015 left: adjusted_idx,
12016 right: Expression::number(-1),
12017 left_comments: vec![],
12018 operator_comments: vec![],
12019 trailing_comments: vec![],
12020 inferred_type: None,
12021 })),
12022 left_comments: vec![],
12023 operator_comments: vec![],
12024 trailing_comments: vec![],
12025 inferred_type: None,
12026 })),
12027 str_arg,
12028 )],
12029 else_: Some(Expression::string("")),
12030 comments: vec![],
12031 inferred_type: None,
12032 })),
12033 trailing_comments: vec![],
12034 }));
12035
12036 // CASE WHEN delim = '' THEN (empty case) ELSE SPLIT_PART(...) END
12037 Ok(Expression::Case(Box::new(Case {
12038 operand: None,
12039 whens: vec![(
12040 Expression::Eq(Box::new(BinaryOp {
12041 left: delim_arg,
12042 right: Expression::string(""),
12043 left_comments: vec![],
12044 operator_comments: vec![],
12045 trailing_comments: vec![],
12046 inferred_type: None,
12047 })),
12048 empty_delim_case,
12049 )],
12050 else_: Some(base_func),
12051 comments: vec![],
12052 inferred_type: None,
12053 })))
12054 }
12055 DialectType::DuckDB
12056 | DialectType::PostgreSQL
12057 | DialectType::Snowflake
12058 | DialectType::Redshift
12059 | DialectType::Trino
12060 | DialectType::Presto => Ok(Expression::Function(Box::new(
12061 Function::new("SPLIT_PART".to_string(), f.args),
12062 ))),
12063 DialectType::Hive => {
12064 // SPLIT(str, delim)[index]
12065 // Complex conversion, just keep as-is for now
12066 Ok(Expression::Function(Box::new(Function::new(
12067 "SPLIT_PART".to_string(),
12068 f.args,
12069 ))))
12070 }
12071 _ => Ok(Expression::Function(Box::new(Function::new(
12072 "SPLIT_PART".to_string(),
12073 f.args,
12074 )))),
12075 }
12076 }
12077 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
12078 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
12079 let is_scalar = name == "JSON_EXTRACT_SCALAR";
12080 match target {
12081 DialectType::Spark
12082 | DialectType::Databricks
12083 | DialectType::Hive => {
12084 let mut args = f.args;
12085 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
12086 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
12087 if let Some(Expression::Function(inner)) = args.first() {
12088 if inner.name.eq_ignore_ascii_case("TRY")
12089 && inner.args.len() == 1
12090 {
12091 let mut inner_args = inner.args.clone();
12092 args[0] = inner_args.remove(0);
12093 }
12094 }
12095 Ok(Expression::Function(Box::new(Function::new(
12096 "GET_JSON_OBJECT".to_string(),
12097 args,
12098 ))))
12099 }
12100 DialectType::DuckDB | DialectType::SQLite => {
12101 // json -> path syntax
12102 let mut args = f.args;
12103 let json_expr = args.remove(0);
12104 let path = args.remove(0);
12105 Ok(Expression::JsonExtract(Box::new(
12106 crate::expressions::JsonExtractFunc {
12107 this: json_expr,
12108 path,
12109 returning: None,
12110 arrow_syntax: true,
12111 hash_arrow_syntax: false,
12112 wrapper_option: None,
12113 quotes_option: None,
12114 on_scalar_string: false,
12115 on_error: None,
12116 },
12117 )))
12118 }
12119 DialectType::TSQL => {
12120 let func_name = if is_scalar {
12121 "JSON_VALUE"
12122 } else {
12123 "JSON_QUERY"
12124 };
12125 Ok(Expression::Function(Box::new(Function::new(
12126 func_name.to_string(),
12127 f.args,
12128 ))))
12129 }
12130 DialectType::PostgreSQL | DialectType::Redshift => {
12131 let func_name = if is_scalar {
12132 "JSON_EXTRACT_PATH_TEXT"
12133 } else {
12134 "JSON_EXTRACT_PATH"
12135 };
12136 Ok(Expression::Function(Box::new(Function::new(
12137 func_name.to_string(),
12138 f.args,
12139 ))))
12140 }
12141 _ => Ok(Expression::Function(Box::new(Function::new(
12142 name.to_string(),
12143 f.args,
12144 )))),
12145 }
12146 }
12147 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
12148 "JSON_SEARCH"
12149 if matches!(target, DialectType::DuckDB)
12150 && (3..=5).contains(&f.args.len()) =>
12151 {
12152 let args = &f.args;
12153
12154 // Only rewrite deterministic modes and NULL/no escape-char variant.
12155 let mode = match &args[1] {
12156 Expression::Literal(lit)
12157 if matches!(
12158 lit.as_ref(),
12159 crate::expressions::Literal::String(_)
12160 ) =>
12161 {
12162 let crate::expressions::Literal::String(s) = lit.as_ref()
12163 else {
12164 unreachable!()
12165 };
12166 s.to_ascii_lowercase()
12167 }
12168 _ => return Ok(Expression::Function(f)),
12169 };
12170 if mode != "one" && mode != "all" {
12171 return Ok(Expression::Function(f));
12172 }
12173 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
12174 return Ok(Expression::Function(f));
12175 }
12176
12177 let json_doc_sql = match Generator::sql(&args[0]) {
12178 Ok(sql) => sql,
12179 Err(_) => return Ok(Expression::Function(f)),
12180 };
12181 let search_sql = match Generator::sql(&args[2]) {
12182 Ok(sql) => sql,
12183 Err(_) => return Ok(Expression::Function(f)),
12184 };
12185 let path_sql = if args.len() == 5 {
12186 match Generator::sql(&args[4]) {
12187 Ok(sql) => sql,
12188 Err(_) => return Ok(Expression::Function(f)),
12189 }
12190 } else {
12191 "'$'".to_string()
12192 };
12193
12194 let rewrite_sql = if mode == "all" {
12195 format!(
12196 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
12197 json_doc_sql, path_sql, search_sql
12198 )
12199 } else {
12200 format!(
12201 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
12202 json_doc_sql, path_sql, search_sql
12203 )
12204 };
12205
12206 Ok(Expression::Raw(crate::expressions::Raw {
12207 sql: rewrite_sql,
12208 }))
12209 }
12210 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
12211 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
12212 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
12213 if f.args.len() >= 2
12214 && matches!(source, DialectType::SingleStore) =>
12215 {
12216 let is_bson = name == "BSON_EXTRACT_BSON";
12217 let mut args = f.args;
12218 let json_expr = args.remove(0);
12219
12220 // Build JSONPath from remaining arguments
12221 let mut path = String::from("$");
12222 for arg in &args {
12223 if let Expression::Literal(lit) = arg {
12224 if let crate::expressions::Literal::String(s) = lit.as_ref()
12225 {
12226 // Check if it's a numeric string (array index)
12227 if s.parse::<i64>().is_ok() {
12228 path.push('[');
12229 path.push_str(s);
12230 path.push(']');
12231 } else {
12232 path.push('.');
12233 path.push_str(s);
12234 }
12235 }
12236 }
12237 }
12238
12239 let target_func = if is_bson {
12240 "JSONB_EXTRACT"
12241 } else {
12242 "JSON_EXTRACT"
12243 };
12244 Ok(Expression::Function(Box::new(Function::new(
12245 target_func.to_string(),
12246 vec![json_expr, Expression::string(&path)],
12247 ))))
12248 }
12249 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
12250 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
12251 Ok(Expression::Function(Box::new(Function {
12252 name: "arraySum".to_string(),
12253 args: f.args,
12254 distinct: f.distinct,
12255 trailing_comments: f.trailing_comments,
12256 use_bracket_syntax: f.use_bracket_syntax,
12257 no_parens: f.no_parens,
12258 quoted: f.quoted,
12259 span: None,
12260 inferred_type: None,
12261 })))
12262 }
12263 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
12264 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
12265 // and is handled by JsonQueryValueConvert action. This handles the case where
12266 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
12267 "JSON_QUERY" | "JSON_VALUE"
12268 if f.args.len() == 2
12269 && matches!(
12270 source,
12271 DialectType::TSQL | DialectType::Fabric
12272 ) =>
12273 {
12274 match target {
12275 DialectType::Spark
12276 | DialectType::Databricks
12277 | DialectType::Hive => Ok(Expression::Function(Box::new(
12278 Function::new("GET_JSON_OBJECT".to_string(), f.args),
12279 ))),
12280 _ => Ok(Expression::Function(Box::new(Function::new(
12281 name.to_string(),
12282 f.args,
12283 )))),
12284 }
12285 }
12286 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
12287 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
12288 let arg = f.args.into_iter().next().unwrap();
12289 let is_hive_source = matches!(
12290 source,
12291 DialectType::Hive
12292 | DialectType::Spark
12293 | DialectType::Databricks
12294 );
12295 match target {
12296 DialectType::DuckDB if is_hive_source => {
12297 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
12298 let strptime =
12299 Expression::Function(Box::new(Function::new(
12300 "STRPTIME".to_string(),
12301 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
12302 )));
12303 Ok(Expression::Function(Box::new(Function::new(
12304 "EPOCH".to_string(),
12305 vec![strptime],
12306 ))))
12307 }
12308 DialectType::Presto | DialectType::Trino if is_hive_source => {
12309 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
12310 let cast_varchar =
12311 Expression::Cast(Box::new(crate::expressions::Cast {
12312 this: arg.clone(),
12313 to: DataType::VarChar {
12314 length: None,
12315 parenthesized_length: false,
12316 },
12317 trailing_comments: vec![],
12318 double_colon_syntax: false,
12319 format: None,
12320 default: None,
12321 inferred_type: None,
12322 }));
12323 let date_parse =
12324 Expression::Function(Box::new(Function::new(
12325 "DATE_PARSE".to_string(),
12326 vec![
12327 cast_varchar,
12328 Expression::string("%Y-%m-%d %T"),
12329 ],
12330 )));
12331 let try_expr = Expression::Function(Box::new(
12332 Function::new("TRY".to_string(), vec![date_parse]),
12333 ));
12334 let date_format =
12335 Expression::Function(Box::new(Function::new(
12336 "DATE_FORMAT".to_string(),
12337 vec![arg, Expression::string("%Y-%m-%d %T")],
12338 )));
12339 let parse_datetime =
12340 Expression::Function(Box::new(Function::new(
12341 "PARSE_DATETIME".to_string(),
12342 vec![
12343 date_format,
12344 Expression::string("yyyy-MM-dd HH:mm:ss"),
12345 ],
12346 )));
12347 let coalesce =
12348 Expression::Function(Box::new(Function::new(
12349 "COALESCE".to_string(),
12350 vec![try_expr, parse_datetime],
12351 )));
12352 Ok(Expression::Function(Box::new(Function::new(
12353 "TO_UNIXTIME".to_string(),
12354 vec![coalesce],
12355 ))))
12356 }
12357 DialectType::Presto | DialectType::Trino => {
12358 Ok(Expression::Function(Box::new(Function::new(
12359 "TO_UNIXTIME".to_string(),
12360 vec![arg],
12361 ))))
12362 }
12363 _ => Ok(Expression::Function(Box::new(Function::new(
12364 "UNIX_TIMESTAMP".to_string(),
12365 vec![arg],
12366 )))),
12367 }
12368 }
12369 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
12370 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
12371 DialectType::Spark
12372 | DialectType::Databricks
12373 | DialectType::Hive => Ok(Expression::Function(Box::new(
12374 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
12375 ))),
12376 _ => Ok(Expression::Function(Box::new(Function::new(
12377 "TO_UNIX_TIMESTAMP".to_string(),
12378 f.args,
12379 )))),
12380 },
12381 // CURDATE() -> CURRENT_DATE
12382 "CURDATE" => {
12383 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
12384 }
12385 // CURTIME() -> CURRENT_TIME
12386 "CURTIME" => {
12387 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
12388 precision: None,
12389 }))
12390 }
12391 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive, LIST_SORT for DuckDB
12392 "ARRAY_SORT" if f.args.len() >= 1 => {
12393 match target {
12394 DialectType::Hive => {
12395 let mut args = f.args;
12396 args.truncate(1); // Drop lambda comparator
12397 Ok(Expression::Function(Box::new(Function::new(
12398 "SORT_ARRAY".to_string(),
12399 args,
12400 ))))
12401 }
12402 DialectType::DuckDB
12403 if matches!(source, DialectType::Snowflake) =>
12404 {
12405 // Snowflake ARRAY_SORT(arr[, asc_bool[, nulls_first_bool]]) -> DuckDB LIST_SORT(arr[, 'ASC'/'DESC'[, 'NULLS FIRST']])
12406 let mut args_iter = f.args.into_iter();
12407 let arr = args_iter.next().unwrap();
12408 let asc_arg = args_iter.next();
12409 let nulls_first_arg = args_iter.next();
12410
12411 let is_asc_bool = asc_arg
12412 .as_ref()
12413 .map(|a| matches!(a, Expression::Boolean(_)))
12414 .unwrap_or(false);
12415 let is_nf_bool = nulls_first_arg
12416 .as_ref()
12417 .map(|a| matches!(a, Expression::Boolean(_)))
12418 .unwrap_or(false);
12419
12420 // No boolean args: pass through as-is
12421 if !is_asc_bool && !is_nf_bool {
12422 let mut result_args = vec![arr];
12423 if let Some(asc) = asc_arg {
12424 result_args.push(asc);
12425 if let Some(nf) = nulls_first_arg {
12426 result_args.push(nf);
12427 }
12428 }
12429 Ok(Expression::Function(Box::new(Function::new(
12430 "LIST_SORT".to_string(),
12431 result_args,
12432 ))))
12433 } else {
12434 // Has boolean args: convert to DuckDB LIST_SORT format
12435 let descending = matches!(&asc_arg, Some(Expression::Boolean(b)) if !b.value);
12436
12437 // Snowflake defaults: nulls_first = TRUE for DESC, FALSE for ASC
12438 let nulls_are_first = match &nulls_first_arg {
12439 Some(Expression::Boolean(b)) => b.value,
12440 None if is_asc_bool => descending, // Snowflake default
12441 _ => false,
12442 };
12443 let nulls_first_sql = if nulls_are_first {
12444 Some(Expression::string("NULLS FIRST"))
12445 } else {
12446 None
12447 };
12448
12449 if !is_asc_bool {
12450 // asc is non-boolean expression, nulls_first is boolean
12451 let mut result_args = vec![arr];
12452 if let Some(asc) = asc_arg {
12453 result_args.push(asc);
12454 }
12455 if let Some(nf) = nulls_first_sql {
12456 result_args.push(nf);
12457 }
12458 Ok(Expression::Function(Box::new(Function::new(
12459 "LIST_SORT".to_string(),
12460 result_args,
12461 ))))
12462 } else {
12463 if !descending && !nulls_are_first {
12464 // ASC, NULLS LAST (default) -> LIST_SORT(arr)
12465 Ok(Expression::Function(Box::new(
12466 Function::new(
12467 "LIST_SORT".to_string(),
12468 vec![arr],
12469 ),
12470 )))
12471 } else if descending && !nulls_are_first {
12472 // DESC, NULLS LAST -> ARRAY_REVERSE_SORT(arr)
12473 Ok(Expression::Function(Box::new(
12474 Function::new(
12475 "ARRAY_REVERSE_SORT".to_string(),
12476 vec![arr],
12477 ),
12478 )))
12479 } else {
12480 // NULLS FIRST -> LIST_SORT(arr, 'ASC'/'DESC', 'NULLS FIRST')
12481 let order_str =
12482 if descending { "DESC" } else { "ASC" };
12483 Ok(Expression::Function(Box::new(
12484 Function::new(
12485 "LIST_SORT".to_string(),
12486 vec![
12487 arr,
12488 Expression::string(order_str),
12489 Expression::string("NULLS FIRST"),
12490 ],
12491 ),
12492 )))
12493 }
12494 }
12495 }
12496 }
12497 DialectType::DuckDB => {
12498 // Non-Snowflake source: ARRAY_SORT(x, lambda) -> ARRAY_SORT(x) (drop comparator)
12499 let mut args = f.args;
12500 args.truncate(1); // Drop lambda comparator for DuckDB
12501 Ok(Expression::Function(Box::new(Function::new(
12502 "ARRAY_SORT".to_string(),
12503 args,
12504 ))))
12505 }
12506 _ => Ok(Expression::Function(f)),
12507 }
12508 }
12509 // SORT_ARRAY(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, keep for Hive/Spark
12510 "SORT_ARRAY" if f.args.len() == 1 => match target {
12511 DialectType::Hive
12512 | DialectType::Spark
12513 | DialectType::Databricks => Ok(Expression::Function(f)),
12514 DialectType::DuckDB => Ok(Expression::Function(Box::new(
12515 Function::new("LIST_SORT".to_string(), f.args),
12516 ))),
12517 _ => Ok(Expression::Function(Box::new(Function::new(
12518 "ARRAY_SORT".to_string(),
12519 f.args,
12520 )))),
12521 },
12522 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
12523 "SORT_ARRAY" if f.args.len() == 2 => {
12524 let is_desc =
12525 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
12526 if is_desc {
12527 match target {
12528 DialectType::DuckDB => {
12529 Ok(Expression::Function(Box::new(Function::new(
12530 "ARRAY_REVERSE_SORT".to_string(),
12531 vec![f.args.into_iter().next().unwrap()],
12532 ))))
12533 }
12534 DialectType::Presto | DialectType::Trino => {
12535 let arr_arg = f.args.into_iter().next().unwrap();
12536 let a = Expression::Column(Box::new(
12537 crate::expressions::Column {
12538 name: crate::expressions::Identifier::new("a"),
12539 table: None,
12540 join_mark: false,
12541 trailing_comments: Vec::new(),
12542 span: None,
12543 inferred_type: None,
12544 },
12545 ));
12546 let b = Expression::Column(Box::new(
12547 crate::expressions::Column {
12548 name: crate::expressions::Identifier::new("b"),
12549 table: None,
12550 join_mark: false,
12551 trailing_comments: Vec::new(),
12552 span: None,
12553 inferred_type: None,
12554 },
12555 ));
12556 let case_expr = Expression::Case(Box::new(
12557 crate::expressions::Case {
12558 operand: None,
12559 whens: vec![
12560 (
12561 Expression::Lt(Box::new(
12562 BinaryOp::new(a.clone(), b.clone()),
12563 )),
12564 Expression::Literal(Box::new(
12565 Literal::Number("1".to_string()),
12566 )),
12567 ),
12568 (
12569 Expression::Gt(Box::new(
12570 BinaryOp::new(a.clone(), b.clone()),
12571 )),
12572 Expression::Literal(Box::new(
12573 Literal::Number("-1".to_string()),
12574 )),
12575 ),
12576 ],
12577 else_: Some(Expression::Literal(Box::new(
12578 Literal::Number("0".to_string()),
12579 ))),
12580 comments: Vec::new(),
12581 inferred_type: None,
12582 },
12583 ));
12584 let lambda = Expression::Lambda(Box::new(
12585 crate::expressions::LambdaExpr {
12586 parameters: vec![
12587 crate::expressions::Identifier::new("a"),
12588 crate::expressions::Identifier::new("b"),
12589 ],
12590 body: case_expr,
12591 colon: false,
12592 parameter_types: Vec::new(),
12593 },
12594 ));
12595 Ok(Expression::Function(Box::new(Function::new(
12596 "ARRAY_SORT".to_string(),
12597 vec![arr_arg, lambda],
12598 ))))
12599 }
12600 _ => Ok(Expression::Function(f)),
12601 }
12602 } else {
12603 // SORT_ARRAY(x, TRUE) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for others
12604 match target {
12605 DialectType::Hive => Ok(Expression::Function(f)),
12606 DialectType::DuckDB => {
12607 Ok(Expression::Function(Box::new(Function::new(
12608 "LIST_SORT".to_string(),
12609 vec![f.args.into_iter().next().unwrap()],
12610 ))))
12611 }
12612 _ => Ok(Expression::Function(Box::new(Function::new(
12613 "ARRAY_SORT".to_string(),
12614 vec![f.args.into_iter().next().unwrap()],
12615 )))),
12616 }
12617 }
12618 }
12619 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
12620 "LEFT" if f.args.len() == 2 => {
12621 match target {
12622 DialectType::Hive
12623 | DialectType::Presto
12624 | DialectType::Trino
12625 | DialectType::Athena => {
12626 let x = f.args[0].clone();
12627 let n = f.args[1].clone();
12628 Ok(Expression::Function(Box::new(Function::new(
12629 "SUBSTRING".to_string(),
12630 vec![x, Expression::number(1), n],
12631 ))))
12632 }
12633 DialectType::Spark | DialectType::Databricks
12634 if matches!(
12635 source,
12636 DialectType::TSQL | DialectType::Fabric
12637 ) =>
12638 {
12639 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
12640 let x = f.args[0].clone();
12641 let n = f.args[1].clone();
12642 let cast_x = Expression::Cast(Box::new(Cast {
12643 this: x,
12644 to: DataType::VarChar {
12645 length: None,
12646 parenthesized_length: false,
12647 },
12648 double_colon_syntax: false,
12649 trailing_comments: Vec::new(),
12650 format: None,
12651 default: None,
12652 inferred_type: None,
12653 }));
12654 Ok(Expression::Function(Box::new(Function::new(
12655 "LEFT".to_string(),
12656 vec![cast_x, n],
12657 ))))
12658 }
12659 _ => Ok(Expression::Function(f)),
12660 }
12661 }
12662 "RIGHT" if f.args.len() == 2 => {
12663 match target {
12664 DialectType::Hive
12665 | DialectType::Presto
12666 | DialectType::Trino
12667 | DialectType::Athena => {
12668 let x = f.args[0].clone();
12669 let n = f.args[1].clone();
12670 // SUBSTRING(x, LENGTH(x) - (n - 1))
12671 let len_x = Expression::Function(Box::new(Function::new(
12672 "LENGTH".to_string(),
12673 vec![x.clone()],
12674 )));
12675 let n_minus_1 = Expression::Sub(Box::new(
12676 crate::expressions::BinaryOp::new(
12677 n,
12678 Expression::number(1),
12679 ),
12680 ));
12681 let n_minus_1_paren = Expression::Paren(Box::new(
12682 crate::expressions::Paren {
12683 this: n_minus_1,
12684 trailing_comments: Vec::new(),
12685 },
12686 ));
12687 let offset = Expression::Sub(Box::new(
12688 crate::expressions::BinaryOp::new(
12689 len_x,
12690 n_minus_1_paren,
12691 ),
12692 ));
12693 Ok(Expression::Function(Box::new(Function::new(
12694 "SUBSTRING".to_string(),
12695 vec![x, offset],
12696 ))))
12697 }
12698 DialectType::Spark | DialectType::Databricks
12699 if matches!(
12700 source,
12701 DialectType::TSQL | DialectType::Fabric
12702 ) =>
12703 {
12704 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
12705 let x = f.args[0].clone();
12706 let n = f.args[1].clone();
12707 let cast_x = Expression::Cast(Box::new(Cast {
12708 this: x,
12709 to: DataType::VarChar {
12710 length: None,
12711 parenthesized_length: false,
12712 },
12713 double_colon_syntax: false,
12714 trailing_comments: Vec::new(),
12715 format: None,
12716 default: None,
12717 inferred_type: None,
12718 }));
12719 Ok(Expression::Function(Box::new(Function::new(
12720 "RIGHT".to_string(),
12721 vec![cast_x, n],
12722 ))))
12723 }
12724 _ => Ok(Expression::Function(f)),
12725 }
12726 }
12727 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
12728 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
12729 DialectType::Snowflake => Ok(Expression::Function(Box::new(
12730 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
12731 ))),
12732 DialectType::Spark | DialectType::Databricks => {
12733 Ok(Expression::Function(Box::new(Function::new(
12734 "MAP_FROM_ARRAYS".to_string(),
12735 f.args,
12736 ))))
12737 }
12738 _ => Ok(Expression::Function(Box::new(Function::new(
12739 "MAP".to_string(),
12740 f.args,
12741 )))),
12742 },
12743 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
12744 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
12745 "LIKE" if f.args.len() >= 2 => {
12746 let (this, pattern) = if matches!(source, DialectType::SQLite) {
12747 // SQLite: LIKE(pattern, string) -> string LIKE pattern
12748 (f.args[1].clone(), f.args[0].clone())
12749 } else {
12750 // Standard: LIKE(string, pattern) -> string LIKE pattern
12751 (f.args[0].clone(), f.args[1].clone())
12752 };
12753 let escape = if f.args.len() >= 3 {
12754 Some(f.args[2].clone())
12755 } else {
12756 None
12757 };
12758 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
12759 left: this,
12760 right: pattern,
12761 escape,
12762 quantifier: None,
12763 inferred_type: None,
12764 })))
12765 }
12766 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
12767 "ILIKE" if f.args.len() >= 2 => {
12768 let this = f.args[0].clone();
12769 let pattern = f.args[1].clone();
12770 let escape = if f.args.len() >= 3 {
12771 Some(f.args[2].clone())
12772 } else {
12773 None
12774 };
12775 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
12776 left: this,
12777 right: pattern,
12778 escape,
12779 quantifier: None,
12780 inferred_type: None,
12781 })))
12782 }
12783 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
12784 "CHAR" if f.args.len() == 1 => match target {
12785 DialectType::MySQL
12786 | DialectType::SingleStore
12787 | DialectType::TSQL => Ok(Expression::Function(f)),
12788 _ => Ok(Expression::Function(Box::new(Function::new(
12789 "CHR".to_string(),
12790 f.args,
12791 )))),
12792 },
12793 // CONCAT(a, b) -> a || b for PostgreSQL
12794 "CONCAT"
12795 if f.args.len() == 2
12796 && matches!(target, DialectType::PostgreSQL)
12797 && matches!(
12798 source,
12799 DialectType::ClickHouse | DialectType::MySQL
12800 ) =>
12801 {
12802 let mut args = f.args;
12803 let right = args.pop().unwrap();
12804 let left = args.pop().unwrap();
12805 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
12806 this: Box::new(left),
12807 expression: Box::new(right),
12808 safe: None,
12809 })))
12810 }
12811 // ARRAY_TO_STRING(arr, delim) -> target-specific
12812 "ARRAY_TO_STRING"
12813 if f.args.len() == 2
12814 && matches!(target, DialectType::DuckDB)
12815 && matches!(source, DialectType::Snowflake) =>
12816 {
12817 let mut args = f.args;
12818 let arr = args.remove(0);
12819 let sep = args.remove(0);
12820 // sep IS NULL
12821 let sep_is_null = Expression::IsNull(Box::new(IsNull {
12822 this: sep.clone(),
12823 not: false,
12824 postfix_form: false,
12825 }));
12826 // COALESCE(CAST(x AS TEXT), '')
12827 let cast_x = Expression::Cast(Box::new(Cast {
12828 this: Expression::Identifier(Identifier::new("x")),
12829 to: DataType::Text,
12830 trailing_comments: Vec::new(),
12831 double_colon_syntax: false,
12832 format: None,
12833 default: None,
12834 inferred_type: None,
12835 }));
12836 let coalesce = Expression::Coalesce(Box::new(
12837 crate::expressions::VarArgFunc {
12838 original_name: None,
12839 expressions: vec![
12840 cast_x,
12841 Expression::Literal(Box::new(Literal::String(
12842 String::new(),
12843 ))),
12844 ],
12845 inferred_type: None,
12846 },
12847 ));
12848 let lambda =
12849 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
12850 parameters: vec![Identifier::new("x")],
12851 body: coalesce,
12852 colon: false,
12853 parameter_types: Vec::new(),
12854 }));
12855 let list_transform = Expression::Function(Box::new(Function::new(
12856 "LIST_TRANSFORM".to_string(),
12857 vec![arr, lambda],
12858 )));
12859 let array_to_string =
12860 Expression::Function(Box::new(Function::new(
12861 "ARRAY_TO_STRING".to_string(),
12862 vec![list_transform, sep],
12863 )));
12864 Ok(Expression::Case(Box::new(Case {
12865 operand: None,
12866 whens: vec![(sep_is_null, Expression::Null(Null))],
12867 else_: Some(array_to_string),
12868 comments: Vec::new(),
12869 inferred_type: None,
12870 })))
12871 }
12872 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
12873 DialectType::Presto | DialectType::Trino => {
12874 Ok(Expression::Function(Box::new(Function::new(
12875 "ARRAY_JOIN".to_string(),
12876 f.args,
12877 ))))
12878 }
12879 DialectType::TSQL => Ok(Expression::Function(Box::new(
12880 Function::new("STRING_AGG".to_string(), f.args),
12881 ))),
12882 _ => Ok(Expression::Function(f)),
12883 },
12884 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
12885 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
12886 DialectType::Spark
12887 | DialectType::Databricks
12888 | DialectType::Hive => Ok(Expression::Function(Box::new(
12889 Function::new("CONCAT".to_string(), f.args),
12890 ))),
12891 DialectType::Snowflake => Ok(Expression::Function(Box::new(
12892 Function::new("ARRAY_CAT".to_string(), f.args),
12893 ))),
12894 DialectType::Redshift => Ok(Expression::Function(Box::new(
12895 Function::new("ARRAY_CONCAT".to_string(), f.args),
12896 ))),
12897 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12898 Function::new("ARRAY_CAT".to_string(), f.args),
12899 ))),
12900 DialectType::DuckDB => Ok(Expression::Function(Box::new(
12901 Function::new("LIST_CONCAT".to_string(), f.args),
12902 ))),
12903 DialectType::Presto | DialectType::Trino => {
12904 Ok(Expression::Function(Box::new(Function::new(
12905 "CONCAT".to_string(),
12906 f.args,
12907 ))))
12908 }
12909 DialectType::BigQuery => Ok(Expression::Function(Box::new(
12910 Function::new("ARRAY_CONCAT".to_string(), f.args),
12911 ))),
12912 _ => Ok(Expression::Function(f)),
12913 },
12914 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
12915 "HAS" if f.args.len() == 2 => match target {
12916 DialectType::Spark
12917 | DialectType::Databricks
12918 | DialectType::Hive => Ok(Expression::Function(Box::new(
12919 Function::new("ARRAY_CONTAINS".to_string(), f.args),
12920 ))),
12921 DialectType::Presto | DialectType::Trino => {
12922 Ok(Expression::Function(Box::new(Function::new(
12923 "CONTAINS".to_string(),
12924 f.args,
12925 ))))
12926 }
12927 _ => Ok(Expression::Function(f)),
12928 },
12929 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
12930 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
12931 Function::new("COALESCE".to_string(), f.args),
12932 ))),
12933 // ISNULL(x) in MySQL -> (x IS NULL)
12934 "ISNULL"
12935 if f.args.len() == 1
12936 && matches!(source, DialectType::MySQL)
12937 && matches!(target, DialectType::MySQL) =>
12938 {
12939 let arg = f.args.into_iter().next().unwrap();
12940 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
12941 this: Expression::IsNull(Box::new(
12942 crate::expressions::IsNull {
12943 this: arg,
12944 not: false,
12945 postfix_form: false,
12946 },
12947 )),
12948 trailing_comments: Vec::new(),
12949 })))
12950 }
12951 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
12952 "MONTHNAME"
12953 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
12954 {
12955 let arg = f.args.into_iter().next().unwrap();
12956 Ok(Expression::Function(Box::new(Function::new(
12957 "DATE_FORMAT".to_string(),
12958 vec![arg, Expression::string("%M")],
12959 ))))
12960 }
12961 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
12962 "SPLITBYSTRING" if f.args.len() == 2 => {
12963 let sep = f.args[0].clone();
12964 let str_arg = f.args[1].clone();
12965 match target {
12966 DialectType::DuckDB => Ok(Expression::Function(Box::new(
12967 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
12968 ))),
12969 DialectType::Doris => {
12970 Ok(Expression::Function(Box::new(Function::new(
12971 "SPLIT_BY_STRING".to_string(),
12972 vec![str_arg, sep],
12973 ))))
12974 }
12975 DialectType::Hive
12976 | DialectType::Spark
12977 | DialectType::Databricks => {
12978 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
12979 let escaped =
12980 Expression::Function(Box::new(Function::new(
12981 "CONCAT".to_string(),
12982 vec![
12983 Expression::string("\\Q"),
12984 sep,
12985 Expression::string("\\E"),
12986 ],
12987 )));
12988 Ok(Expression::Function(Box::new(Function::new(
12989 "SPLIT".to_string(),
12990 vec![str_arg, escaped],
12991 ))))
12992 }
12993 _ => Ok(Expression::Function(f)),
12994 }
12995 }
12996 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
12997 "SPLITBYREGEXP" if f.args.len() == 2 => {
12998 let sep = f.args[0].clone();
12999 let str_arg = f.args[1].clone();
13000 match target {
13001 DialectType::DuckDB => {
13002 Ok(Expression::Function(Box::new(Function::new(
13003 "STR_SPLIT_REGEX".to_string(),
13004 vec![str_arg, sep],
13005 ))))
13006 }
13007 DialectType::Hive
13008 | DialectType::Spark
13009 | DialectType::Databricks => {
13010 Ok(Expression::Function(Box::new(Function::new(
13011 "SPLIT".to_string(),
13012 vec![str_arg, sep],
13013 ))))
13014 }
13015 _ => Ok(Expression::Function(f)),
13016 }
13017 }
13018 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
13019 "TOMONDAY" => {
13020 if f.args.len() == 1 {
13021 let arg = f.args.into_iter().next().unwrap();
13022 match target {
13023 DialectType::Doris => {
13024 Ok(Expression::Function(Box::new(Function::new(
13025 "DATE_TRUNC".to_string(),
13026 vec![arg, Expression::string("WEEK")],
13027 ))))
13028 }
13029 _ => Ok(Expression::Function(Box::new(Function::new(
13030 "DATE_TRUNC".to_string(),
13031 vec![Expression::string("WEEK"), arg],
13032 )))),
13033 }
13034 } else {
13035 Ok(Expression::Function(f))
13036 }
13037 }
13038 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
13039 "COLLECT_LIST" if f.args.len() == 1 => match target {
13040 DialectType::Spark
13041 | DialectType::Databricks
13042 | DialectType::Hive => Ok(Expression::Function(f)),
13043 _ => Ok(Expression::Function(Box::new(Function::new(
13044 "ARRAY_AGG".to_string(),
13045 f.args,
13046 )))),
13047 },
13048 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
13049 "TO_CHAR"
13050 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
13051 {
13052 let arg = f.args.into_iter().next().unwrap();
13053 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
13054 this: arg,
13055 to: DataType::Custom {
13056 name: "STRING".to_string(),
13057 },
13058 double_colon_syntax: false,
13059 trailing_comments: Vec::new(),
13060 format: None,
13061 default: None,
13062 inferred_type: None,
13063 })))
13064 }
13065 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
13066 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
13067 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
13068 Function::new("RANDOM".to_string(), vec![]),
13069 ))),
13070 _ => Ok(Expression::Function(f)),
13071 },
13072 // ClickHouse formatDateTime -> target-specific
13073 "FORMATDATETIME" if f.args.len() >= 2 => match target {
13074 DialectType::MySQL => Ok(Expression::Function(Box::new(
13075 Function::new("DATE_FORMAT".to_string(), f.args),
13076 ))),
13077 _ => Ok(Expression::Function(f)),
13078 },
13079 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
13080 "REPLICATE" if f.args.len() == 2 => match target {
13081 DialectType::TSQL => Ok(Expression::Function(f)),
13082 _ => Ok(Expression::Function(Box::new(Function::new(
13083 "REPEAT".to_string(),
13084 f.args,
13085 )))),
13086 },
13087 // LEN(x) -> LENGTH(x) for non-TSQL targets
13088 // No CAST needed when arg is already a string literal
13089 "LEN" if f.args.len() == 1 => {
13090 match target {
13091 DialectType::TSQL => Ok(Expression::Function(f)),
13092 DialectType::Spark | DialectType::Databricks => {
13093 let arg = f.args.into_iter().next().unwrap();
13094 // Don't wrap string literals with CAST - they're already strings
13095 let is_string = matches!(
13096 &arg,
13097 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
13098 );
13099 let final_arg = if is_string {
13100 arg
13101 } else {
13102 Expression::Cast(Box::new(Cast {
13103 this: arg,
13104 to: DataType::VarChar {
13105 length: None,
13106 parenthesized_length: false,
13107 },
13108 double_colon_syntax: false,
13109 trailing_comments: Vec::new(),
13110 format: None,
13111 default: None,
13112 inferred_type: None,
13113 }))
13114 };
13115 Ok(Expression::Function(Box::new(Function::new(
13116 "LENGTH".to_string(),
13117 vec![final_arg],
13118 ))))
13119 }
13120 _ => {
13121 let arg = f.args.into_iter().next().unwrap();
13122 Ok(Expression::Function(Box::new(Function::new(
13123 "LENGTH".to_string(),
13124 vec![arg],
13125 ))))
13126 }
13127 }
13128 }
13129 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
13130 "COUNT_BIG" if f.args.len() == 1 => match target {
13131 DialectType::TSQL => Ok(Expression::Function(f)),
13132 _ => Ok(Expression::Function(Box::new(Function::new(
13133 "COUNT".to_string(),
13134 f.args,
13135 )))),
13136 },
13137 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
13138 "DATEFROMPARTS" if f.args.len() == 3 => match target {
13139 DialectType::TSQL => Ok(Expression::Function(f)),
13140 _ => Ok(Expression::Function(Box::new(Function::new(
13141 "MAKE_DATE".to_string(),
13142 f.args,
13143 )))),
13144 },
13145 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
13146 "REGEXP_LIKE" if f.args.len() >= 2 => {
13147 let str_expr = f.args[0].clone();
13148 let pattern = f.args[1].clone();
13149 let flags = if f.args.len() >= 3 {
13150 Some(f.args[2].clone())
13151 } else {
13152 None
13153 };
13154 match target {
13155 DialectType::DuckDB => {
13156 let mut new_args = vec![str_expr, pattern];
13157 if let Some(fl) = flags {
13158 new_args.push(fl);
13159 }
13160 Ok(Expression::Function(Box::new(Function::new(
13161 "REGEXP_MATCHES".to_string(),
13162 new_args,
13163 ))))
13164 }
13165 _ => Ok(Expression::RegexpLike(Box::new(
13166 crate::expressions::RegexpFunc {
13167 this: str_expr,
13168 pattern,
13169 flags,
13170 },
13171 ))),
13172 }
13173 }
13174 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
13175 "ARRAYJOIN" if f.args.len() == 1 => match target {
13176 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
13177 Function::new("UNNEST".to_string(), f.args),
13178 ))),
13179 _ => Ok(Expression::Function(f)),
13180 },
13181 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
13182 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
13183 match target {
13184 DialectType::TSQL => Ok(Expression::Function(f)),
13185 DialectType::DuckDB => {
13186 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
13187 let mut args = f.args;
13188 let ms = args.pop().unwrap();
13189 let s = args.pop().unwrap();
13190 // s + (ms / 1000.0)
13191 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
13192 ms,
13193 Expression::Literal(Box::new(
13194 crate::expressions::Literal::Number(
13195 "1000.0".to_string(),
13196 ),
13197 )),
13198 )));
13199 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
13200 s,
13201 Expression::Paren(Box::new(Paren {
13202 this: ms_frac,
13203 trailing_comments: vec![],
13204 })),
13205 )));
13206 args.push(s_with_ms);
13207 Ok(Expression::Function(Box::new(Function::new(
13208 "MAKE_TIMESTAMP".to_string(),
13209 args,
13210 ))))
13211 }
13212 DialectType::Snowflake => {
13213 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
13214 let mut args = f.args;
13215 let ms = args.pop().unwrap();
13216 // ms * 1000000
13217 let ns = Expression::Mul(Box::new(BinaryOp::new(
13218 ms,
13219 Expression::number(1000000),
13220 )));
13221 args.push(ns);
13222 Ok(Expression::Function(Box::new(Function::new(
13223 "TIMESTAMP_FROM_PARTS".to_string(),
13224 args,
13225 ))))
13226 }
13227 _ => {
13228 // Default: keep function name for other targets
13229 Ok(Expression::Function(Box::new(Function::new(
13230 "DATETIMEFROMPARTS".to_string(),
13231 f.args,
13232 ))))
13233 }
13234 }
13235 }
13236 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
13237 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
13238 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
13239 let is_try = name == "TRY_CONVERT";
13240 let type_expr = f.args[0].clone();
13241 let value_expr = f.args[1].clone();
13242 let style = if f.args.len() >= 3 {
13243 Some(&f.args[2])
13244 } else {
13245 None
13246 };
13247
13248 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
13249 if matches!(target, DialectType::TSQL) {
13250 let normalized_type = match &type_expr {
13251 Expression::DataType(dt) => {
13252 let new_dt = match dt {
13253 DataType::Int { .. } => DataType::Custom {
13254 name: "INTEGER".to_string(),
13255 },
13256 _ => dt.clone(),
13257 };
13258 Expression::DataType(new_dt)
13259 }
13260 Expression::Identifier(id) => {
13261 if id.name.eq_ignore_ascii_case("INT") {
13262 Expression::Identifier(
13263 crate::expressions::Identifier::new("INTEGER"),
13264 )
13265 } else {
13266 let upper = id.name.to_ascii_uppercase();
13267 Expression::Identifier(
13268 crate::expressions::Identifier::new(upper),
13269 )
13270 }
13271 }
13272 Expression::Column(col) => {
13273 if col.name.name.eq_ignore_ascii_case("INT") {
13274 Expression::Identifier(
13275 crate::expressions::Identifier::new("INTEGER"),
13276 )
13277 } else {
13278 let upper = col.name.name.to_ascii_uppercase();
13279 Expression::Identifier(
13280 crate::expressions::Identifier::new(upper),
13281 )
13282 }
13283 }
13284 _ => type_expr.clone(),
13285 };
13286 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
13287 let mut new_args = vec![normalized_type, value_expr];
13288 if let Some(s) = style {
13289 new_args.push(s.clone());
13290 }
13291 return Ok(Expression::Function(Box::new(Function::new(
13292 func_name.to_string(),
13293 new_args,
13294 ))));
13295 }
13296
13297 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
13298 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
13299 match e {
13300 Expression::DataType(dt) => {
13301 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
13302 match dt {
13303 DataType::Custom { name }
13304 if name.starts_with("NVARCHAR(")
13305 || name.starts_with("NCHAR(") =>
13306 {
13307 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
13308 let inner = &name[name.find('(').unwrap() + 1
13309 ..name.len() - 1];
13310 if inner.eq_ignore_ascii_case("MAX") {
13311 Some(DataType::Text)
13312 } else if let Ok(len) = inner.parse::<u32>() {
13313 if name.starts_with("NCHAR") {
13314 Some(DataType::Char {
13315 length: Some(len),
13316 })
13317 } else {
13318 Some(DataType::VarChar {
13319 length: Some(len),
13320 parenthesized_length: false,
13321 })
13322 }
13323 } else {
13324 Some(dt.clone())
13325 }
13326 }
13327 DataType::Custom { name } if name == "NVARCHAR" => {
13328 Some(DataType::VarChar {
13329 length: None,
13330 parenthesized_length: false,
13331 })
13332 }
13333 DataType::Custom { name } if name == "NCHAR" => {
13334 Some(DataType::Char { length: None })
13335 }
13336 DataType::Custom { name }
13337 if name == "NVARCHAR(MAX)"
13338 || name == "VARCHAR(MAX)" =>
13339 {
13340 Some(DataType::Text)
13341 }
13342 _ => Some(dt.clone()),
13343 }
13344 }
13345 Expression::Identifier(id) => {
13346 let name = id.name.to_ascii_uppercase();
13347 match name.as_str() {
13348 "INT" | "INTEGER" => Some(DataType::Int {
13349 length: None,
13350 integer_spelling: false,
13351 }),
13352 "BIGINT" => Some(DataType::BigInt { length: None }),
13353 "SMALLINT" => {
13354 Some(DataType::SmallInt { length: None })
13355 }
13356 "TINYINT" => {
13357 Some(DataType::TinyInt { length: None })
13358 }
13359 "FLOAT" => Some(DataType::Float {
13360 precision: None,
13361 scale: None,
13362 real_spelling: false,
13363 }),
13364 "REAL" => Some(DataType::Float {
13365 precision: None,
13366 scale: None,
13367 real_spelling: true,
13368 }),
13369 "DATETIME" | "DATETIME2" => {
13370 Some(DataType::Timestamp {
13371 timezone: false,
13372 precision: None,
13373 })
13374 }
13375 "DATE" => Some(DataType::Date),
13376 "BIT" => Some(DataType::Boolean),
13377 "TEXT" => Some(DataType::Text),
13378 "NUMERIC" => Some(DataType::Decimal {
13379 precision: None,
13380 scale: None,
13381 }),
13382 "MONEY" => Some(DataType::Decimal {
13383 precision: Some(15),
13384 scale: Some(4),
13385 }),
13386 "SMALLMONEY" => Some(DataType::Decimal {
13387 precision: Some(6),
13388 scale: Some(4),
13389 }),
13390 "VARCHAR" => Some(DataType::VarChar {
13391 length: None,
13392 parenthesized_length: false,
13393 }),
13394 "NVARCHAR" => Some(DataType::VarChar {
13395 length: None,
13396 parenthesized_length: false,
13397 }),
13398 "CHAR" => Some(DataType::Char { length: None }),
13399 "NCHAR" => Some(DataType::Char { length: None }),
13400 _ => Some(DataType::Custom { name }),
13401 }
13402 }
13403 Expression::Column(col) => {
13404 let name = col.name.name.to_ascii_uppercase();
13405 match name.as_str() {
13406 "INT" | "INTEGER" => Some(DataType::Int {
13407 length: None,
13408 integer_spelling: false,
13409 }),
13410 "BIGINT" => Some(DataType::BigInt { length: None }),
13411 "FLOAT" => Some(DataType::Float {
13412 precision: None,
13413 scale: None,
13414 real_spelling: false,
13415 }),
13416 "DATETIME" | "DATETIME2" => {
13417 Some(DataType::Timestamp {
13418 timezone: false,
13419 precision: None,
13420 })
13421 }
13422 "DATE" => Some(DataType::Date),
13423 "NUMERIC" => Some(DataType::Decimal {
13424 precision: None,
13425 scale: None,
13426 }),
13427 "VARCHAR" => Some(DataType::VarChar {
13428 length: None,
13429 parenthesized_length: false,
13430 }),
13431 "NVARCHAR" => Some(DataType::VarChar {
13432 length: None,
13433 parenthesized_length: false,
13434 }),
13435 "CHAR" => Some(DataType::Char { length: None }),
13436 "NCHAR" => Some(DataType::Char { length: None }),
13437 _ => Some(DataType::Custom { name }),
13438 }
13439 }
13440 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
13441 Expression::Function(f) => {
13442 let fname = f.name.to_ascii_uppercase();
13443 match fname.as_str() {
13444 "VARCHAR" | "NVARCHAR" => {
13445 let len = f.args.first().and_then(|a| {
13446 if let Expression::Literal(lit) = a
13447 {
13448 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13449 n.parse::<u32>().ok()
13450 } else { None }
13451 } else if let Expression::Identifier(id) = a
13452 {
13453 if id.name.eq_ignore_ascii_case("MAX") {
13454 None
13455 } else {
13456 None
13457 }
13458 } else {
13459 None
13460 }
13461 });
13462 // Check for VARCHAR(MAX) -> TEXT
13463 let is_max = f.args.first().map_or(false, |a| {
13464 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
13465 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
13466 });
13467 if is_max {
13468 Some(DataType::Text)
13469 } else {
13470 Some(DataType::VarChar {
13471 length: len,
13472 parenthesized_length: false,
13473 })
13474 }
13475 }
13476 "NCHAR" | "CHAR" => {
13477 let len = f.args.first().and_then(|a| {
13478 if let Expression::Literal(lit) = a
13479 {
13480 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13481 n.parse::<u32>().ok()
13482 } else { None }
13483 } else {
13484 None
13485 }
13486 });
13487 Some(DataType::Char { length: len })
13488 }
13489 "NUMERIC" | "DECIMAL" => {
13490 let precision = f.args.first().and_then(|a| {
13491 if let Expression::Literal(lit) = a
13492 {
13493 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13494 n.parse::<u32>().ok()
13495 } else { None }
13496 } else {
13497 None
13498 }
13499 });
13500 let scale = f.args.get(1).and_then(|a| {
13501 if let Expression::Literal(lit) = a
13502 {
13503 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13504 n.parse::<u32>().ok()
13505 } else { None }
13506 } else {
13507 None
13508 }
13509 });
13510 Some(DataType::Decimal { precision, scale })
13511 }
13512 _ => None,
13513 }
13514 }
13515 _ => None,
13516 }
13517 }
13518
13519 if let Some(mut dt) = expr_to_datatype(&type_expr) {
13520 // For TSQL source: VARCHAR/CHAR without length defaults to 30
13521 let is_tsql_source =
13522 matches!(source, DialectType::TSQL | DialectType::Fabric);
13523 if is_tsql_source {
13524 match &dt {
13525 DataType::VarChar { length: None, .. } => {
13526 dt = DataType::VarChar {
13527 length: Some(30),
13528 parenthesized_length: false,
13529 };
13530 }
13531 DataType::Char { length: None } => {
13532 dt = DataType::Char { length: Some(30) };
13533 }
13534 _ => {}
13535 }
13536 }
13537
13538 // Determine if this is a string type
13539 let is_string_type = matches!(
13540 dt,
13541 DataType::VarChar { .. }
13542 | DataType::Char { .. }
13543 | DataType::Text
13544 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
13545 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
13546 || name.starts_with("VARCHAR(") || name == "VARCHAR"
13547 || name == "STRING");
13548
13549 // Determine if this is a date/time type
13550 let is_datetime_type = matches!(
13551 dt,
13552 DataType::Timestamp { .. } | DataType::Date
13553 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
13554 || name == "DATETIME2" || name == "SMALLDATETIME");
13555
13556 // Check for date conversion with style
13557 if style.is_some() {
13558 let style_num = style.and_then(|s| {
13559 if let Expression::Literal(lit) = s {
13560 if let crate::expressions::Literal::Number(n) =
13561 lit.as_ref()
13562 {
13563 n.parse::<u32>().ok()
13564 } else {
13565 None
13566 }
13567 } else {
13568 None
13569 }
13570 });
13571
13572 // TSQL CONVERT date styles (Java format)
13573 let format_str = style_num.and_then(|n| match n {
13574 101 => Some("MM/dd/yyyy"),
13575 102 => Some("yyyy.MM.dd"),
13576 103 => Some("dd/MM/yyyy"),
13577 104 => Some("dd.MM.yyyy"),
13578 105 => Some("dd-MM-yyyy"),
13579 108 => Some("HH:mm:ss"),
13580 110 => Some("MM-dd-yyyy"),
13581 112 => Some("yyyyMMdd"),
13582 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
13583 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
13584 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
13585 _ => None,
13586 });
13587
13588 // Non-string, non-datetime types with style: just CAST, ignore the style
13589 if !is_string_type && !is_datetime_type {
13590 let cast_expr = if is_try {
13591 Expression::TryCast(Box::new(
13592 crate::expressions::Cast {
13593 this: value_expr,
13594 to: dt,
13595 trailing_comments: Vec::new(),
13596 double_colon_syntax: false,
13597 format: None,
13598 default: None,
13599 inferred_type: None,
13600 },
13601 ))
13602 } else {
13603 Expression::Cast(Box::new(
13604 crate::expressions::Cast {
13605 this: value_expr,
13606 to: dt,
13607 trailing_comments: Vec::new(),
13608 double_colon_syntax: false,
13609 format: None,
13610 default: None,
13611 inferred_type: None,
13612 },
13613 ))
13614 };
13615 return Ok(cast_expr);
13616 }
13617
13618 if let Some(java_fmt) = format_str {
13619 let c_fmt = java_fmt
13620 .replace("yyyy", "%Y")
13621 .replace("MM", "%m")
13622 .replace("dd", "%d")
13623 .replace("HH", "%H")
13624 .replace("mm", "%M")
13625 .replace("ss", "%S")
13626 .replace("SSSSSS", "%f")
13627 .replace("SSS", "%f")
13628 .replace("'T'", "T");
13629
13630 // For datetime target types: style is the INPUT format for parsing strings -> dates
13631 if is_datetime_type {
13632 match target {
13633 DialectType::DuckDB => {
13634 return Ok(Expression::Function(Box::new(
13635 Function::new(
13636 "STRPTIME".to_string(),
13637 vec![
13638 value_expr,
13639 Expression::string(&c_fmt),
13640 ],
13641 ),
13642 )));
13643 }
13644 DialectType::Spark
13645 | DialectType::Databricks => {
13646 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
13647 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
13648 let func_name =
13649 if matches!(dt, DataType::Date) {
13650 "TO_DATE"
13651 } else {
13652 "TO_TIMESTAMP"
13653 };
13654 return Ok(Expression::Function(Box::new(
13655 Function::new(
13656 func_name.to_string(),
13657 vec![
13658 value_expr,
13659 Expression::string(java_fmt),
13660 ],
13661 ),
13662 )));
13663 }
13664 DialectType::Hive => {
13665 return Ok(Expression::Function(Box::new(
13666 Function::new(
13667 "TO_TIMESTAMP".to_string(),
13668 vec![
13669 value_expr,
13670 Expression::string(java_fmt),
13671 ],
13672 ),
13673 )));
13674 }
13675 _ => {
13676 return Ok(Expression::Cast(Box::new(
13677 crate::expressions::Cast {
13678 this: value_expr,
13679 to: dt,
13680 trailing_comments: Vec::new(),
13681 double_colon_syntax: false,
13682 format: None,
13683 default: None,
13684 inferred_type: None,
13685 },
13686 )));
13687 }
13688 }
13689 }
13690
13691 // For string target types: style is the OUTPUT format for dates -> strings
13692 match target {
13693 DialectType::DuckDB => Ok(Expression::Function(
13694 Box::new(Function::new(
13695 "STRPTIME".to_string(),
13696 vec![
13697 value_expr,
13698 Expression::string(&c_fmt),
13699 ],
13700 )),
13701 )),
13702 DialectType::Spark | DialectType::Databricks => {
13703 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
13704 // Determine the target string type
13705 let string_dt = match &dt {
13706 DataType::VarChar {
13707 length: Some(l),
13708 ..
13709 } => DataType::VarChar {
13710 length: Some(*l),
13711 parenthesized_length: false,
13712 },
13713 DataType::Text => DataType::Custom {
13714 name: "STRING".to_string(),
13715 },
13716 _ => DataType::Custom {
13717 name: "STRING".to_string(),
13718 },
13719 };
13720 let date_format_expr = Expression::Function(
13721 Box::new(Function::new(
13722 "DATE_FORMAT".to_string(),
13723 vec![
13724 value_expr,
13725 Expression::string(java_fmt),
13726 ],
13727 )),
13728 );
13729 let cast_expr = if is_try {
13730 Expression::TryCast(Box::new(
13731 crate::expressions::Cast {
13732 this: date_format_expr,
13733 to: string_dt,
13734 trailing_comments: Vec::new(),
13735 double_colon_syntax: false,
13736 format: None,
13737 default: None,
13738 inferred_type: None,
13739 },
13740 ))
13741 } else {
13742 Expression::Cast(Box::new(
13743 crate::expressions::Cast {
13744 this: date_format_expr,
13745 to: string_dt,
13746 trailing_comments: Vec::new(),
13747 double_colon_syntax: false,
13748 format: None,
13749 default: None,
13750 inferred_type: None,
13751 },
13752 ))
13753 };
13754 Ok(cast_expr)
13755 }
13756 DialectType::MySQL | DialectType::SingleStore => {
13757 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
13758 let mysql_fmt = java_fmt
13759 .replace("yyyy", "%Y")
13760 .replace("MM", "%m")
13761 .replace("dd", "%d")
13762 .replace("HH:mm:ss.SSSSSS", "%T")
13763 .replace("HH:mm:ss", "%T")
13764 .replace("HH", "%H")
13765 .replace("mm", "%i")
13766 .replace("ss", "%S");
13767 let date_format_expr = Expression::Function(
13768 Box::new(Function::new(
13769 "DATE_FORMAT".to_string(),
13770 vec![
13771 value_expr,
13772 Expression::string(&mysql_fmt),
13773 ],
13774 )),
13775 );
13776 // MySQL uses CHAR for string casts
13777 let mysql_dt = match &dt {
13778 DataType::VarChar { length, .. } => {
13779 DataType::Char { length: *length }
13780 }
13781 _ => dt,
13782 };
13783 Ok(Expression::Cast(Box::new(
13784 crate::expressions::Cast {
13785 this: date_format_expr,
13786 to: mysql_dt,
13787 trailing_comments: Vec::new(),
13788 double_colon_syntax: false,
13789 format: None,
13790 default: None,
13791 inferred_type: None,
13792 },
13793 )))
13794 }
13795 DialectType::Hive => {
13796 let func_name = "TO_TIMESTAMP";
13797 Ok(Expression::Function(Box::new(
13798 Function::new(
13799 func_name.to_string(),
13800 vec![
13801 value_expr,
13802 Expression::string(java_fmt),
13803 ],
13804 ),
13805 )))
13806 }
13807 _ => Ok(Expression::Cast(Box::new(
13808 crate::expressions::Cast {
13809 this: value_expr,
13810 to: dt,
13811 trailing_comments: Vec::new(),
13812 double_colon_syntax: false,
13813 format: None,
13814 default: None,
13815 inferred_type: None,
13816 },
13817 ))),
13818 }
13819 } else {
13820 // Unknown style, just CAST
13821 let cast_expr = if is_try {
13822 Expression::TryCast(Box::new(
13823 crate::expressions::Cast {
13824 this: value_expr,
13825 to: dt,
13826 trailing_comments: Vec::new(),
13827 double_colon_syntax: false,
13828 format: None,
13829 default: None,
13830 inferred_type: None,
13831 },
13832 ))
13833 } else {
13834 Expression::Cast(Box::new(
13835 crate::expressions::Cast {
13836 this: value_expr,
13837 to: dt,
13838 trailing_comments: Vec::new(),
13839 double_colon_syntax: false,
13840 format: None,
13841 default: None,
13842 inferred_type: None,
13843 },
13844 ))
13845 };
13846 Ok(cast_expr)
13847 }
13848 } else {
13849 // No style - simple CAST
13850 let final_dt = if matches!(
13851 target,
13852 DialectType::MySQL | DialectType::SingleStore
13853 ) {
13854 match &dt {
13855 DataType::Int { .. }
13856 | DataType::BigInt { .. }
13857 | DataType::SmallInt { .. }
13858 | DataType::TinyInt { .. } => DataType::Custom {
13859 name: "SIGNED".to_string(),
13860 },
13861 DataType::VarChar { length, .. } => {
13862 DataType::Char { length: *length }
13863 }
13864 _ => dt,
13865 }
13866 } else {
13867 dt
13868 };
13869 let cast_expr = if is_try {
13870 Expression::TryCast(Box::new(
13871 crate::expressions::Cast {
13872 this: value_expr,
13873 to: final_dt,
13874 trailing_comments: Vec::new(),
13875 double_colon_syntax: false,
13876 format: None,
13877 default: None,
13878 inferred_type: None,
13879 },
13880 ))
13881 } else {
13882 Expression::Cast(Box::new(crate::expressions::Cast {
13883 this: value_expr,
13884 to: final_dt,
13885 trailing_comments: Vec::new(),
13886 double_colon_syntax: false,
13887 format: None,
13888 default: None,
13889 inferred_type: None,
13890 }))
13891 };
13892 Ok(cast_expr)
13893 }
13894 } else {
13895 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
13896 Ok(Expression::Function(f))
13897 }
13898 }
13899 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
13900 "STRFTIME" if f.args.len() == 2 => {
13901 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
13902 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
13903 // SQLite: args[0] = format, args[1] = value
13904 (f.args[1].clone(), &f.args[0])
13905 } else {
13906 // DuckDB and others: args[0] = value, args[1] = format
13907 (f.args[0].clone(), &f.args[1])
13908 };
13909
13910 // Helper to convert C-style format to Java-style
13911 fn c_to_java_format(fmt: &str) -> String {
13912 fmt.replace("%Y", "yyyy")
13913 .replace("%m", "MM")
13914 .replace("%d", "dd")
13915 .replace("%H", "HH")
13916 .replace("%M", "mm")
13917 .replace("%S", "ss")
13918 .replace("%f", "SSSSSS")
13919 .replace("%y", "yy")
13920 .replace("%-m", "M")
13921 .replace("%-d", "d")
13922 .replace("%-H", "H")
13923 .replace("%-I", "h")
13924 .replace("%I", "hh")
13925 .replace("%p", "a")
13926 .replace("%j", "DDD")
13927 .replace("%a", "EEE")
13928 .replace("%b", "MMM")
13929 .replace("%F", "yyyy-MM-dd")
13930 .replace("%T", "HH:mm:ss")
13931 }
13932
13933 // Helper: recursively convert format strings within expressions (handles CONCAT)
13934 fn convert_fmt_expr(
13935 expr: &Expression,
13936 converter: &dyn Fn(&str) -> String,
13937 ) -> Expression {
13938 match expr {
13939 Expression::Literal(lit)
13940 if matches!(
13941 lit.as_ref(),
13942 crate::expressions::Literal::String(_)
13943 ) =>
13944 {
13945 let crate::expressions::Literal::String(s) =
13946 lit.as_ref()
13947 else {
13948 unreachable!()
13949 };
13950 Expression::string(&converter(s))
13951 }
13952 Expression::Function(func)
13953 if func.name.eq_ignore_ascii_case("CONCAT") =>
13954 {
13955 let new_args: Vec<Expression> = func
13956 .args
13957 .iter()
13958 .map(|a| convert_fmt_expr(a, converter))
13959 .collect();
13960 Expression::Function(Box::new(Function::new(
13961 "CONCAT".to_string(),
13962 new_args,
13963 )))
13964 }
13965 other => other.clone(),
13966 }
13967 }
13968
13969 match target {
13970 DialectType::DuckDB => {
13971 if matches!(source, DialectType::SQLite) {
13972 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
13973 let cast_val = Expression::Cast(Box::new(Cast {
13974 this: val,
13975 to: crate::expressions::DataType::Timestamp {
13976 precision: None,
13977 timezone: false,
13978 },
13979 trailing_comments: Vec::new(),
13980 double_colon_syntax: false,
13981 format: None,
13982 default: None,
13983 inferred_type: None,
13984 }));
13985 Ok(Expression::Function(Box::new(Function::new(
13986 "STRFTIME".to_string(),
13987 vec![cast_val, fmt_expr.clone()],
13988 ))))
13989 } else {
13990 Ok(Expression::Function(f))
13991 }
13992 }
13993 DialectType::Spark
13994 | DialectType::Databricks
13995 | DialectType::Hive => {
13996 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
13997 let converted_fmt =
13998 convert_fmt_expr(fmt_expr, &c_to_java_format);
13999 Ok(Expression::Function(Box::new(Function::new(
14000 "DATE_FORMAT".to_string(),
14001 vec![val, converted_fmt],
14002 ))))
14003 }
14004 DialectType::TSQL | DialectType::Fabric => {
14005 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
14006 let converted_fmt =
14007 convert_fmt_expr(fmt_expr, &c_to_java_format);
14008 Ok(Expression::Function(Box::new(Function::new(
14009 "FORMAT".to_string(),
14010 vec![val, converted_fmt],
14011 ))))
14012 }
14013 DialectType::Presto
14014 | DialectType::Trino
14015 | DialectType::Athena => {
14016 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
14017 if let Expression::Literal(lit) = fmt_expr {
14018 if let crate::expressions::Literal::String(s) =
14019 lit.as_ref()
14020 {
14021 let presto_fmt = duckdb_to_presto_format(s);
14022 Ok(Expression::Function(Box::new(Function::new(
14023 "DATE_FORMAT".to_string(),
14024 vec![val, Expression::string(&presto_fmt)],
14025 ))))
14026 } else {
14027 Ok(Expression::Function(Box::new(Function::new(
14028 "DATE_FORMAT".to_string(),
14029 vec![val, fmt_expr.clone()],
14030 ))))
14031 }
14032 } else {
14033 Ok(Expression::Function(Box::new(Function::new(
14034 "DATE_FORMAT".to_string(),
14035 vec![val, fmt_expr.clone()],
14036 ))))
14037 }
14038 }
14039 DialectType::BigQuery => {
14040 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
14041 if let Expression::Literal(lit) = fmt_expr {
14042 if let crate::expressions::Literal::String(s) =
14043 lit.as_ref()
14044 {
14045 let bq_fmt = duckdb_to_bigquery_format(s);
14046 Ok(Expression::Function(Box::new(Function::new(
14047 "FORMAT_DATE".to_string(),
14048 vec![Expression::string(&bq_fmt), val],
14049 ))))
14050 } else {
14051 Ok(Expression::Function(Box::new(Function::new(
14052 "FORMAT_DATE".to_string(),
14053 vec![fmt_expr.clone(), val],
14054 ))))
14055 }
14056 } else {
14057 Ok(Expression::Function(Box::new(Function::new(
14058 "FORMAT_DATE".to_string(),
14059 vec![fmt_expr.clone(), val],
14060 ))))
14061 }
14062 }
14063 DialectType::PostgreSQL | DialectType::Redshift => {
14064 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
14065 if let Expression::Literal(lit) = fmt_expr {
14066 if let crate::expressions::Literal::String(s) =
14067 lit.as_ref()
14068 {
14069 let pg_fmt = s
14070 .replace("%Y", "YYYY")
14071 .replace("%m", "MM")
14072 .replace("%d", "DD")
14073 .replace("%H", "HH24")
14074 .replace("%M", "MI")
14075 .replace("%S", "SS")
14076 .replace("%y", "YY")
14077 .replace("%-m", "FMMM")
14078 .replace("%-d", "FMDD")
14079 .replace("%-H", "FMHH24")
14080 .replace("%-I", "FMHH12")
14081 .replace("%p", "AM")
14082 .replace("%F", "YYYY-MM-DD")
14083 .replace("%T", "HH24:MI:SS");
14084 Ok(Expression::Function(Box::new(Function::new(
14085 "TO_CHAR".to_string(),
14086 vec![val, Expression::string(&pg_fmt)],
14087 ))))
14088 } else {
14089 Ok(Expression::Function(Box::new(Function::new(
14090 "TO_CHAR".to_string(),
14091 vec![val, fmt_expr.clone()],
14092 ))))
14093 }
14094 } else {
14095 Ok(Expression::Function(Box::new(Function::new(
14096 "TO_CHAR".to_string(),
14097 vec![val, fmt_expr.clone()],
14098 ))))
14099 }
14100 }
14101 _ => Ok(Expression::Function(f)),
14102 }
14103 }
14104 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
14105 "STRPTIME" if f.args.len() == 2 => {
14106 let val = f.args[0].clone();
14107 let fmt_expr = &f.args[1];
14108
14109 fn c_to_java_format_parse(fmt: &str) -> String {
14110 fmt.replace("%Y", "yyyy")
14111 .replace("%m", "MM")
14112 .replace("%d", "dd")
14113 .replace("%H", "HH")
14114 .replace("%M", "mm")
14115 .replace("%S", "ss")
14116 .replace("%f", "SSSSSS")
14117 .replace("%y", "yy")
14118 .replace("%-m", "M")
14119 .replace("%-d", "d")
14120 .replace("%-H", "H")
14121 .replace("%-I", "h")
14122 .replace("%I", "hh")
14123 .replace("%p", "a")
14124 .replace("%F", "yyyy-MM-dd")
14125 .replace("%T", "HH:mm:ss")
14126 }
14127
14128 match target {
14129 DialectType::DuckDB => Ok(Expression::Function(f)),
14130 DialectType::Spark | DialectType::Databricks => {
14131 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
14132 if let Expression::Literal(lit) = fmt_expr {
14133 if let crate::expressions::Literal::String(s) =
14134 lit.as_ref()
14135 {
14136 let java_fmt = c_to_java_format_parse(s);
14137 Ok(Expression::Function(Box::new(Function::new(
14138 "TO_TIMESTAMP".to_string(),
14139 vec![val, Expression::string(&java_fmt)],
14140 ))))
14141 } else {
14142 Ok(Expression::Function(Box::new(Function::new(
14143 "TO_TIMESTAMP".to_string(),
14144 vec![val, fmt_expr.clone()],
14145 ))))
14146 }
14147 } else {
14148 Ok(Expression::Function(Box::new(Function::new(
14149 "TO_TIMESTAMP".to_string(),
14150 vec![val, fmt_expr.clone()],
14151 ))))
14152 }
14153 }
14154 DialectType::Hive => {
14155 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
14156 if let Expression::Literal(lit) = fmt_expr {
14157 if let crate::expressions::Literal::String(s) =
14158 lit.as_ref()
14159 {
14160 let java_fmt = c_to_java_format_parse(s);
14161 let unix_ts =
14162 Expression::Function(Box::new(Function::new(
14163 "UNIX_TIMESTAMP".to_string(),
14164 vec![val, Expression::string(&java_fmt)],
14165 )));
14166 let from_unix =
14167 Expression::Function(Box::new(Function::new(
14168 "FROM_UNIXTIME".to_string(),
14169 vec![unix_ts],
14170 )));
14171 Ok(Expression::Cast(Box::new(
14172 crate::expressions::Cast {
14173 this: from_unix,
14174 to: DataType::Timestamp {
14175 timezone: false,
14176 precision: None,
14177 },
14178 trailing_comments: Vec::new(),
14179 double_colon_syntax: false,
14180 format: None,
14181 default: None,
14182 inferred_type: None,
14183 },
14184 )))
14185 } else {
14186 Ok(Expression::Function(f))
14187 }
14188 } else {
14189 Ok(Expression::Function(f))
14190 }
14191 }
14192 DialectType::Presto
14193 | DialectType::Trino
14194 | DialectType::Athena => {
14195 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
14196 if let Expression::Literal(lit) = fmt_expr {
14197 if let crate::expressions::Literal::String(s) =
14198 lit.as_ref()
14199 {
14200 let presto_fmt = duckdb_to_presto_format(s);
14201 Ok(Expression::Function(Box::new(Function::new(
14202 "DATE_PARSE".to_string(),
14203 vec![val, Expression::string(&presto_fmt)],
14204 ))))
14205 } else {
14206 Ok(Expression::Function(Box::new(Function::new(
14207 "DATE_PARSE".to_string(),
14208 vec![val, fmt_expr.clone()],
14209 ))))
14210 }
14211 } else {
14212 Ok(Expression::Function(Box::new(Function::new(
14213 "DATE_PARSE".to_string(),
14214 vec![val, fmt_expr.clone()],
14215 ))))
14216 }
14217 }
14218 DialectType::BigQuery => {
14219 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
14220 if let Expression::Literal(lit) = fmt_expr {
14221 if let crate::expressions::Literal::String(s) =
14222 lit.as_ref()
14223 {
14224 let bq_fmt = duckdb_to_bigquery_format(s);
14225 Ok(Expression::Function(Box::new(Function::new(
14226 "PARSE_TIMESTAMP".to_string(),
14227 vec![Expression::string(&bq_fmt), val],
14228 ))))
14229 } else {
14230 Ok(Expression::Function(Box::new(Function::new(
14231 "PARSE_TIMESTAMP".to_string(),
14232 vec![fmt_expr.clone(), val],
14233 ))))
14234 }
14235 } else {
14236 Ok(Expression::Function(Box::new(Function::new(
14237 "PARSE_TIMESTAMP".to_string(),
14238 vec![fmt_expr.clone(), val],
14239 ))))
14240 }
14241 }
14242 _ => Ok(Expression::Function(f)),
14243 }
14244 }
14245 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
14246 "DATE_FORMAT"
14247 if f.args.len() >= 2
14248 && matches!(
14249 source,
14250 DialectType::Presto
14251 | DialectType::Trino
14252 | DialectType::Athena
14253 ) =>
14254 {
14255 let val = f.args[0].clone();
14256 let fmt_expr = &f.args[1];
14257
14258 match target {
14259 DialectType::Presto
14260 | DialectType::Trino
14261 | DialectType::Athena => {
14262 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
14263 if let Expression::Literal(lit) = fmt_expr {
14264 if let crate::expressions::Literal::String(s) =
14265 lit.as_ref()
14266 {
14267 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
14268 Ok(Expression::Function(Box::new(Function::new(
14269 "DATE_FORMAT".to_string(),
14270 vec![val, Expression::string(&normalized)],
14271 ))))
14272 } else {
14273 Ok(Expression::Function(f))
14274 }
14275 } else {
14276 Ok(Expression::Function(f))
14277 }
14278 }
14279 DialectType::Hive
14280 | DialectType::Spark
14281 | DialectType::Databricks => {
14282 // Convert Presto C-style to Java-style format
14283 if let Expression::Literal(lit) = fmt_expr {
14284 if let crate::expressions::Literal::String(s) =
14285 lit.as_ref()
14286 {
14287 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
14288 Ok(Expression::Function(Box::new(Function::new(
14289 "DATE_FORMAT".to_string(),
14290 vec![val, Expression::string(&java_fmt)],
14291 ))))
14292 } else {
14293 Ok(Expression::Function(f))
14294 }
14295 } else {
14296 Ok(Expression::Function(f))
14297 }
14298 }
14299 DialectType::DuckDB => {
14300 // Convert to STRFTIME(val, duckdb_fmt)
14301 if let Expression::Literal(lit) = fmt_expr {
14302 if let crate::expressions::Literal::String(s) =
14303 lit.as_ref()
14304 {
14305 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
14306 Ok(Expression::Function(Box::new(Function::new(
14307 "STRFTIME".to_string(),
14308 vec![val, Expression::string(&duckdb_fmt)],
14309 ))))
14310 } else {
14311 Ok(Expression::Function(Box::new(Function::new(
14312 "STRFTIME".to_string(),
14313 vec![val, fmt_expr.clone()],
14314 ))))
14315 }
14316 } else {
14317 Ok(Expression::Function(Box::new(Function::new(
14318 "STRFTIME".to_string(),
14319 vec![val, fmt_expr.clone()],
14320 ))))
14321 }
14322 }
14323 DialectType::BigQuery => {
14324 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
14325 if let Expression::Literal(lit) = fmt_expr {
14326 if let crate::expressions::Literal::String(s) =
14327 lit.as_ref()
14328 {
14329 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
14330 Ok(Expression::Function(Box::new(Function::new(
14331 "FORMAT_DATE".to_string(),
14332 vec![Expression::string(&bq_fmt), val],
14333 ))))
14334 } else {
14335 Ok(Expression::Function(Box::new(Function::new(
14336 "FORMAT_DATE".to_string(),
14337 vec![fmt_expr.clone(), val],
14338 ))))
14339 }
14340 } else {
14341 Ok(Expression::Function(Box::new(Function::new(
14342 "FORMAT_DATE".to_string(),
14343 vec![fmt_expr.clone(), val],
14344 ))))
14345 }
14346 }
14347 _ => Ok(Expression::Function(f)),
14348 }
14349 }
14350 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
14351 "DATE_PARSE"
14352 if f.args.len() >= 2
14353 && matches!(
14354 source,
14355 DialectType::Presto
14356 | DialectType::Trino
14357 | DialectType::Athena
14358 ) =>
14359 {
14360 let val = f.args[0].clone();
14361 let fmt_expr = &f.args[1];
14362
14363 match target {
14364 DialectType::Presto
14365 | DialectType::Trino
14366 | DialectType::Athena => {
14367 // Presto -> Presto: normalize format
14368 if let Expression::Literal(lit) = fmt_expr {
14369 if let crate::expressions::Literal::String(s) =
14370 lit.as_ref()
14371 {
14372 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
14373 Ok(Expression::Function(Box::new(Function::new(
14374 "DATE_PARSE".to_string(),
14375 vec![val, Expression::string(&normalized)],
14376 ))))
14377 } else {
14378 Ok(Expression::Function(f))
14379 }
14380 } else {
14381 Ok(Expression::Function(f))
14382 }
14383 }
14384 DialectType::Hive => {
14385 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
14386 if let Expression::Literal(lit) = fmt_expr {
14387 if let crate::expressions::Literal::String(s) =
14388 lit.as_ref()
14389 {
14390 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
14391 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
14392 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
14393 this: val,
14394 to: DataType::Timestamp { timezone: false, precision: None },
14395 trailing_comments: Vec::new(),
14396 double_colon_syntax: false,
14397 format: None,
14398 default: None,
14399 inferred_type: None,
14400 })))
14401 } else {
14402 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
14403 Ok(Expression::Function(Box::new(Function::new(
14404 "TO_TIMESTAMP".to_string(),
14405 vec![val, Expression::string(&java_fmt)],
14406 ))))
14407 }
14408 } else {
14409 Ok(Expression::Function(f))
14410 }
14411 } else {
14412 Ok(Expression::Function(f))
14413 }
14414 }
14415 DialectType::Spark | DialectType::Databricks => {
14416 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
14417 if let Expression::Literal(lit) = fmt_expr {
14418 if let crate::expressions::Literal::String(s) =
14419 lit.as_ref()
14420 {
14421 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
14422 Ok(Expression::Function(Box::new(Function::new(
14423 "TO_TIMESTAMP".to_string(),
14424 vec![val, Expression::string(&java_fmt)],
14425 ))))
14426 } else {
14427 Ok(Expression::Function(f))
14428 }
14429 } else {
14430 Ok(Expression::Function(f))
14431 }
14432 }
14433 DialectType::DuckDB => {
14434 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
14435 if let Expression::Literal(lit) = fmt_expr {
14436 if let crate::expressions::Literal::String(s) =
14437 lit.as_ref()
14438 {
14439 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
14440 Ok(Expression::Function(Box::new(Function::new(
14441 "STRPTIME".to_string(),
14442 vec![val, Expression::string(&duckdb_fmt)],
14443 ))))
14444 } else {
14445 Ok(Expression::Function(Box::new(Function::new(
14446 "STRPTIME".to_string(),
14447 vec![val, fmt_expr.clone()],
14448 ))))
14449 }
14450 } else {
14451 Ok(Expression::Function(Box::new(Function::new(
14452 "STRPTIME".to_string(),
14453 vec![val, fmt_expr.clone()],
14454 ))))
14455 }
14456 }
14457 _ => Ok(Expression::Function(f)),
14458 }
14459 }
14460 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
14461 "FROM_BASE64"
14462 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
14463 {
14464 Ok(Expression::Function(Box::new(Function::new(
14465 "UNBASE64".to_string(),
14466 f.args,
14467 ))))
14468 }
14469 "TO_BASE64"
14470 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
14471 {
14472 Ok(Expression::Function(Box::new(Function::new(
14473 "BASE64".to_string(),
14474 f.args,
14475 ))))
14476 }
14477 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
14478 "FROM_UNIXTIME"
14479 if f.args.len() == 1
14480 && matches!(
14481 source,
14482 DialectType::Presto
14483 | DialectType::Trino
14484 | DialectType::Athena
14485 )
14486 && matches!(
14487 target,
14488 DialectType::Spark | DialectType::Databricks
14489 ) =>
14490 {
14491 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
14492 let from_unix = Expression::Function(Box::new(Function::new(
14493 "FROM_UNIXTIME".to_string(),
14494 f.args,
14495 )));
14496 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
14497 this: from_unix,
14498 to: DataType::Timestamp {
14499 timezone: false,
14500 precision: None,
14501 },
14502 trailing_comments: Vec::new(),
14503 double_colon_syntax: false,
14504 format: None,
14505 default: None,
14506 inferred_type: None,
14507 })))
14508 }
14509 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
14510 "DATE_FORMAT"
14511 if f.args.len() >= 2
14512 && !matches!(
14513 target,
14514 DialectType::Hive
14515 | DialectType::Spark
14516 | DialectType::Databricks
14517 | DialectType::MySQL
14518 | DialectType::SingleStore
14519 ) =>
14520 {
14521 let val = f.args[0].clone();
14522 let fmt_expr = &f.args[1];
14523 let is_hive_source = matches!(
14524 source,
14525 DialectType::Hive
14526 | DialectType::Spark
14527 | DialectType::Databricks
14528 );
14529
14530 fn java_to_c_format(fmt: &str) -> String {
14531 // Replace Java patterns with C strftime patterns.
14532 // Uses multi-pass to handle patterns that conflict.
14533 // First pass: replace multi-char patterns (longer first)
14534 let result = fmt
14535 .replace("yyyy", "%Y")
14536 .replace("SSSSSS", "%f")
14537 .replace("EEEE", "%W")
14538 .replace("MM", "%m")
14539 .replace("dd", "%d")
14540 .replace("HH", "%H")
14541 .replace("mm", "%M")
14542 .replace("ss", "%S")
14543 .replace("yy", "%y");
14544 // Second pass: handle single-char timezone patterns
14545 // z -> %Z (timezone name), Z -> %z (timezone offset)
14546 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
14547 let mut out = String::new();
14548 let chars: Vec<char> = result.chars().collect();
14549 let mut i = 0;
14550 while i < chars.len() {
14551 if chars[i] == '%' && i + 1 < chars.len() {
14552 // Already a format specifier, skip both chars
14553 out.push(chars[i]);
14554 out.push(chars[i + 1]);
14555 i += 2;
14556 } else if chars[i] == 'z' {
14557 out.push_str("%Z");
14558 i += 1;
14559 } else if chars[i] == 'Z' {
14560 out.push_str("%z");
14561 i += 1;
14562 } else {
14563 out.push(chars[i]);
14564 i += 1;
14565 }
14566 }
14567 out
14568 }
14569
14570 fn java_to_presto_format(fmt: &str) -> String {
14571 // Presto uses %T for HH:MM:SS
14572 let c_fmt = java_to_c_format(fmt);
14573 c_fmt.replace("%H:%M:%S", "%T")
14574 }
14575
14576 fn java_to_bq_format(fmt: &str) -> String {
14577 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
14578 let c_fmt = java_to_c_format(fmt);
14579 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
14580 }
14581
14582 // For Hive source, CAST string literals to appropriate type
14583 let cast_val = if is_hive_source {
14584 match &val {
14585 Expression::Literal(lit)
14586 if matches!(
14587 lit.as_ref(),
14588 crate::expressions::Literal::String(_)
14589 ) =>
14590 {
14591 match target {
14592 DialectType::DuckDB
14593 | DialectType::Presto
14594 | DialectType::Trino
14595 | DialectType::Athena => {
14596 Self::ensure_cast_timestamp(val.clone())
14597 }
14598 DialectType::BigQuery => {
14599 // BigQuery: CAST(val AS DATETIME)
14600 Expression::Cast(Box::new(
14601 crate::expressions::Cast {
14602 this: val.clone(),
14603 to: DataType::Custom {
14604 name: "DATETIME".to_string(),
14605 },
14606 trailing_comments: vec![],
14607 double_colon_syntax: false,
14608 format: None,
14609 default: None,
14610 inferred_type: None,
14611 },
14612 ))
14613 }
14614 _ => val.clone(),
14615 }
14616 }
14617 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
14618 Expression::Cast(c)
14619 if matches!(c.to, DataType::Date)
14620 && matches!(
14621 target,
14622 DialectType::Presto
14623 | DialectType::Trino
14624 | DialectType::Athena
14625 ) =>
14626 {
14627 Expression::Cast(Box::new(crate::expressions::Cast {
14628 this: val.clone(),
14629 to: DataType::Timestamp {
14630 timezone: false,
14631 precision: None,
14632 },
14633 trailing_comments: vec![],
14634 double_colon_syntax: false,
14635 format: None,
14636 default: None,
14637 inferred_type: None,
14638 }))
14639 }
14640 Expression::Literal(lit)
14641 if matches!(
14642 lit.as_ref(),
14643 crate::expressions::Literal::Date(_)
14644 ) && matches!(
14645 target,
14646 DialectType::Presto
14647 | DialectType::Trino
14648 | DialectType::Athena
14649 ) =>
14650 {
14651 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
14652 let cast_date = Self::date_literal_to_cast(val.clone());
14653 Expression::Cast(Box::new(crate::expressions::Cast {
14654 this: cast_date,
14655 to: DataType::Timestamp {
14656 timezone: false,
14657 precision: None,
14658 },
14659 trailing_comments: vec![],
14660 double_colon_syntax: false,
14661 format: None,
14662 default: None,
14663 inferred_type: None,
14664 }))
14665 }
14666 _ => val.clone(),
14667 }
14668 } else {
14669 val.clone()
14670 };
14671
14672 match target {
14673 DialectType::DuckDB => {
14674 if let Expression::Literal(lit) = fmt_expr {
14675 if let crate::expressions::Literal::String(s) =
14676 lit.as_ref()
14677 {
14678 let c_fmt = if is_hive_source {
14679 java_to_c_format(s)
14680 } else {
14681 s.clone()
14682 };
14683 Ok(Expression::Function(Box::new(Function::new(
14684 "STRFTIME".to_string(),
14685 vec![cast_val, Expression::string(&c_fmt)],
14686 ))))
14687 } else {
14688 Ok(Expression::Function(Box::new(Function::new(
14689 "STRFTIME".to_string(),
14690 vec![cast_val, fmt_expr.clone()],
14691 ))))
14692 }
14693 } else {
14694 Ok(Expression::Function(Box::new(Function::new(
14695 "STRFTIME".to_string(),
14696 vec![cast_val, fmt_expr.clone()],
14697 ))))
14698 }
14699 }
14700 DialectType::Presto
14701 | DialectType::Trino
14702 | DialectType::Athena => {
14703 if is_hive_source {
14704 if let Expression::Literal(lit) = fmt_expr {
14705 if let crate::expressions::Literal::String(s) =
14706 lit.as_ref()
14707 {
14708 let p_fmt = java_to_presto_format(s);
14709 Ok(Expression::Function(Box::new(
14710 Function::new(
14711 "DATE_FORMAT".to_string(),
14712 vec![
14713 cast_val,
14714 Expression::string(&p_fmt),
14715 ],
14716 ),
14717 )))
14718 } else {
14719 Ok(Expression::Function(Box::new(
14720 Function::new(
14721 "DATE_FORMAT".to_string(),
14722 vec![cast_val, fmt_expr.clone()],
14723 ),
14724 )))
14725 }
14726 } else {
14727 Ok(Expression::Function(Box::new(Function::new(
14728 "DATE_FORMAT".to_string(),
14729 vec![cast_val, fmt_expr.clone()],
14730 ))))
14731 }
14732 } else {
14733 Ok(Expression::Function(Box::new(Function::new(
14734 "DATE_FORMAT".to_string(),
14735 f.args,
14736 ))))
14737 }
14738 }
14739 DialectType::BigQuery => {
14740 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
14741 if let Expression::Literal(lit) = fmt_expr {
14742 if let crate::expressions::Literal::String(s) =
14743 lit.as_ref()
14744 {
14745 let bq_fmt = if is_hive_source {
14746 java_to_bq_format(s)
14747 } else {
14748 java_to_c_format(s)
14749 };
14750 Ok(Expression::Function(Box::new(Function::new(
14751 "FORMAT_DATE".to_string(),
14752 vec![Expression::string(&bq_fmt), cast_val],
14753 ))))
14754 } else {
14755 Ok(Expression::Function(Box::new(Function::new(
14756 "FORMAT_DATE".to_string(),
14757 vec![fmt_expr.clone(), cast_val],
14758 ))))
14759 }
14760 } else {
14761 Ok(Expression::Function(Box::new(Function::new(
14762 "FORMAT_DATE".to_string(),
14763 vec![fmt_expr.clone(), cast_val],
14764 ))))
14765 }
14766 }
14767 DialectType::PostgreSQL | DialectType::Redshift => {
14768 if let Expression::Literal(lit) = fmt_expr {
14769 if let crate::expressions::Literal::String(s) =
14770 lit.as_ref()
14771 {
14772 let pg_fmt = s
14773 .replace("yyyy", "YYYY")
14774 .replace("MM", "MM")
14775 .replace("dd", "DD")
14776 .replace("HH", "HH24")
14777 .replace("mm", "MI")
14778 .replace("ss", "SS")
14779 .replace("yy", "YY");
14780 Ok(Expression::Function(Box::new(Function::new(
14781 "TO_CHAR".to_string(),
14782 vec![val, Expression::string(&pg_fmt)],
14783 ))))
14784 } else {
14785 Ok(Expression::Function(Box::new(Function::new(
14786 "TO_CHAR".to_string(),
14787 vec![val, fmt_expr.clone()],
14788 ))))
14789 }
14790 } else {
14791 Ok(Expression::Function(Box::new(Function::new(
14792 "TO_CHAR".to_string(),
14793 vec![val, fmt_expr.clone()],
14794 ))))
14795 }
14796 }
14797 _ => Ok(Expression::Function(f)),
14798 }
14799 }
14800 // DATEDIFF(unit, start, end) - 3-arg form
14801 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
14802 "DATEDIFF" if f.args.len() == 3 => {
14803 let mut args = f.args;
14804 // SQLite source: args = (date1, date2, unit_string)
14805 // Standard source: args = (unit, start, end)
14806 let (_arg0, arg1, arg2, unit_str) =
14807 if matches!(source, DialectType::SQLite) {
14808 let date1 = args.remove(0);
14809 let date2 = args.remove(0);
14810 let unit_expr = args.remove(0);
14811 let unit_s = Self::get_unit_str_static(&unit_expr);
14812
14813 // For SQLite target, generate JULIANDAY arithmetic directly
14814 if matches!(target, DialectType::SQLite) {
14815 let jd_first = Expression::Function(Box::new(
14816 Function::new("JULIANDAY".to_string(), vec![date1]),
14817 ));
14818 let jd_second = Expression::Function(Box::new(
14819 Function::new("JULIANDAY".to_string(), vec![date2]),
14820 ));
14821 let diff = Expression::Sub(Box::new(
14822 crate::expressions::BinaryOp::new(
14823 jd_first, jd_second,
14824 ),
14825 ));
14826 let paren_diff = Expression::Paren(Box::new(
14827 crate::expressions::Paren {
14828 this: diff,
14829 trailing_comments: Vec::new(),
14830 },
14831 ));
14832 let adjusted = match unit_s.as_str() {
14833 "HOUR" => Expression::Mul(Box::new(
14834 crate::expressions::BinaryOp::new(
14835 paren_diff,
14836 Expression::Literal(Box::new(
14837 Literal::Number("24.0".to_string()),
14838 )),
14839 ),
14840 )),
14841 "MINUTE" => Expression::Mul(Box::new(
14842 crate::expressions::BinaryOp::new(
14843 paren_diff,
14844 Expression::Literal(Box::new(
14845 Literal::Number("1440.0".to_string()),
14846 )),
14847 ),
14848 )),
14849 "SECOND" => Expression::Mul(Box::new(
14850 crate::expressions::BinaryOp::new(
14851 paren_diff,
14852 Expression::Literal(Box::new(
14853 Literal::Number("86400.0".to_string()),
14854 )),
14855 ),
14856 )),
14857 "MONTH" => Expression::Div(Box::new(
14858 crate::expressions::BinaryOp::new(
14859 paren_diff,
14860 Expression::Literal(Box::new(
14861 Literal::Number("30.0".to_string()),
14862 )),
14863 ),
14864 )),
14865 "YEAR" => Expression::Div(Box::new(
14866 crate::expressions::BinaryOp::new(
14867 paren_diff,
14868 Expression::Literal(Box::new(
14869 Literal::Number("365.0".to_string()),
14870 )),
14871 ),
14872 )),
14873 _ => paren_diff,
14874 };
14875 return Ok(Expression::Cast(Box::new(Cast {
14876 this: adjusted,
14877 to: DataType::Int {
14878 length: None,
14879 integer_spelling: true,
14880 },
14881 trailing_comments: vec![],
14882 double_colon_syntax: false,
14883 format: None,
14884 default: None,
14885 inferred_type: None,
14886 })));
14887 }
14888
14889 // For other targets, remap to standard (unit, start, end) form
14890 let unit_ident =
14891 Expression::Identifier(Identifier::new(&unit_s));
14892 (unit_ident, date1, date2, unit_s)
14893 } else {
14894 let arg0 = args.remove(0);
14895 let arg1 = args.remove(0);
14896 let arg2 = args.remove(0);
14897 let unit_s = Self::get_unit_str_static(&arg0);
14898 (arg0, arg1, arg2, unit_s)
14899 };
14900
14901 // For Hive/Spark source, string literal dates need to be cast
14902 // Note: Databricks is excluded - it handles string args like standard SQL
14903 let is_hive_spark =
14904 matches!(source, DialectType::Hive | DialectType::Spark);
14905
14906 match target {
14907 DialectType::Snowflake => {
14908 let unit =
14909 Expression::Identifier(Identifier::new(&unit_str));
14910 // Use ensure_to_date_preserved to add TO_DATE with a marker
14911 // that prevents the Snowflake TO_DATE handler from converting it to CAST
14912 let d1 = if is_hive_spark {
14913 Self::ensure_to_date_preserved(arg1)
14914 } else {
14915 arg1
14916 };
14917 let d2 = if is_hive_spark {
14918 Self::ensure_to_date_preserved(arg2)
14919 } else {
14920 arg2
14921 };
14922 Ok(Expression::Function(Box::new(Function::new(
14923 "DATEDIFF".to_string(),
14924 vec![unit, d1, d2],
14925 ))))
14926 }
14927 DialectType::Redshift => {
14928 let unit =
14929 Expression::Identifier(Identifier::new(&unit_str));
14930 let d1 = if is_hive_spark {
14931 Self::ensure_cast_date(arg1)
14932 } else {
14933 arg1
14934 };
14935 let d2 = if is_hive_spark {
14936 Self::ensure_cast_date(arg2)
14937 } else {
14938 arg2
14939 };
14940 Ok(Expression::Function(Box::new(Function::new(
14941 "DATEDIFF".to_string(),
14942 vec![unit, d1, d2],
14943 ))))
14944 }
14945 DialectType::TSQL => {
14946 let unit =
14947 Expression::Identifier(Identifier::new(&unit_str));
14948 Ok(Expression::Function(Box::new(Function::new(
14949 "DATEDIFF".to_string(),
14950 vec![unit, arg1, arg2],
14951 ))))
14952 }
14953 DialectType::DuckDB => {
14954 let is_redshift_tsql = matches!(
14955 source,
14956 DialectType::Redshift | DialectType::TSQL
14957 );
14958 if is_hive_spark {
14959 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
14960 let d1 = Self::ensure_cast_date(arg1);
14961 let d2 = Self::ensure_cast_date(arg2);
14962 Ok(Expression::Function(Box::new(Function::new(
14963 "DATE_DIFF".to_string(),
14964 vec![Expression::string(&unit_str), d1, d2],
14965 ))))
14966 } else if matches!(source, DialectType::Snowflake) {
14967 // For Snowflake source: special handling per unit
14968 match unit_str.as_str() {
14969 "NANOSECOND" => {
14970 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
14971 fn cast_to_timestamp_ns(
14972 expr: Expression,
14973 ) -> Expression
14974 {
14975 Expression::Cast(Box::new(Cast {
14976 this: expr,
14977 to: DataType::Custom {
14978 name: "TIMESTAMP_NS".to_string(),
14979 },
14980 trailing_comments: vec![],
14981 double_colon_syntax: false,
14982 format: None,
14983 default: None,
14984 inferred_type: None,
14985 }))
14986 }
14987 let epoch_end = Expression::Function(Box::new(
14988 Function::new(
14989 "EPOCH_NS".to_string(),
14990 vec![cast_to_timestamp_ns(arg2)],
14991 ),
14992 ));
14993 let epoch_start = Expression::Function(
14994 Box::new(Function::new(
14995 "EPOCH_NS".to_string(),
14996 vec![cast_to_timestamp_ns(arg1)],
14997 )),
14998 );
14999 Ok(Expression::Sub(Box::new(BinaryOp::new(
15000 epoch_end,
15001 epoch_start,
15002 ))))
15003 }
15004 "WEEK" => {
15005 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
15006 let d1 = Self::force_cast_date(arg1);
15007 let d2 = Self::force_cast_date(arg2);
15008 let dt1 = Expression::Function(Box::new(
15009 Function::new(
15010 "DATE_TRUNC".to_string(),
15011 vec![Expression::string("WEEK"), d1],
15012 ),
15013 ));
15014 let dt2 = Expression::Function(Box::new(
15015 Function::new(
15016 "DATE_TRUNC".to_string(),
15017 vec![Expression::string("WEEK"), d2],
15018 ),
15019 ));
15020 Ok(Expression::Function(Box::new(
15021 Function::new(
15022 "DATE_DIFF".to_string(),
15023 vec![
15024 Expression::string(&unit_str),
15025 dt1,
15026 dt2,
15027 ],
15028 ),
15029 )))
15030 }
15031 _ => {
15032 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
15033 let d1 = Self::force_cast_date(arg1);
15034 let d2 = Self::force_cast_date(arg2);
15035 Ok(Expression::Function(Box::new(
15036 Function::new(
15037 "DATE_DIFF".to_string(),
15038 vec![
15039 Expression::string(&unit_str),
15040 d1,
15041 d2,
15042 ],
15043 ),
15044 )))
15045 }
15046 }
15047 } else if is_redshift_tsql {
15048 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
15049 let d1 = Self::force_cast_timestamp(arg1);
15050 let d2 = Self::force_cast_timestamp(arg2);
15051 Ok(Expression::Function(Box::new(Function::new(
15052 "DATE_DIFF".to_string(),
15053 vec![Expression::string(&unit_str), d1, d2],
15054 ))))
15055 } else {
15056 // Keep as DATEDIFF so DuckDB's transform_datediff handles
15057 // DATE_TRUNC for WEEK, CAST for string literals, etc.
15058 let unit =
15059 Expression::Identifier(Identifier::new(&unit_str));
15060 Ok(Expression::Function(Box::new(Function::new(
15061 "DATEDIFF".to_string(),
15062 vec![unit, arg1, arg2],
15063 ))))
15064 }
15065 }
15066 DialectType::BigQuery => {
15067 let is_redshift_tsql = matches!(
15068 source,
15069 DialectType::Redshift
15070 | DialectType::TSQL
15071 | DialectType::Snowflake
15072 );
15073 let cast_d1 = if is_hive_spark {
15074 Self::ensure_cast_date(arg1)
15075 } else if is_redshift_tsql {
15076 Self::force_cast_datetime(arg1)
15077 } else {
15078 Self::ensure_cast_datetime(arg1)
15079 };
15080 let cast_d2 = if is_hive_spark {
15081 Self::ensure_cast_date(arg2)
15082 } else if is_redshift_tsql {
15083 Self::force_cast_datetime(arg2)
15084 } else {
15085 Self::ensure_cast_datetime(arg2)
15086 };
15087 let unit =
15088 Expression::Identifier(Identifier::new(&unit_str));
15089 Ok(Expression::Function(Box::new(Function::new(
15090 "DATE_DIFF".to_string(),
15091 vec![cast_d2, cast_d1, unit],
15092 ))))
15093 }
15094 DialectType::Presto
15095 | DialectType::Trino
15096 | DialectType::Athena => {
15097 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
15098 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
15099 let is_redshift_tsql = matches!(
15100 source,
15101 DialectType::Redshift
15102 | DialectType::TSQL
15103 | DialectType::Snowflake
15104 );
15105 let d1 = if is_hive_spark {
15106 Self::double_cast_timestamp_date(arg1)
15107 } else if is_redshift_tsql {
15108 Self::force_cast_timestamp(arg1)
15109 } else {
15110 arg1
15111 };
15112 let d2 = if is_hive_spark {
15113 Self::double_cast_timestamp_date(arg2)
15114 } else if is_redshift_tsql {
15115 Self::force_cast_timestamp(arg2)
15116 } else {
15117 arg2
15118 };
15119 Ok(Expression::Function(Box::new(Function::new(
15120 "DATE_DIFF".to_string(),
15121 vec![Expression::string(&unit_str), d1, d2],
15122 ))))
15123 }
15124 DialectType::Hive => match unit_str.as_str() {
15125 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
15126 this: Expression::Function(Box::new(Function::new(
15127 "MONTHS_BETWEEN".to_string(),
15128 vec![arg2, arg1],
15129 ))),
15130 to: DataType::Int {
15131 length: None,
15132 integer_spelling: false,
15133 },
15134 trailing_comments: vec![],
15135 double_colon_syntax: false,
15136 format: None,
15137 default: None,
15138 inferred_type: None,
15139 }))),
15140 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
15141 this: Expression::Div(Box::new(
15142 crate::expressions::BinaryOp::new(
15143 Expression::Function(Box::new(Function::new(
15144 "DATEDIFF".to_string(),
15145 vec![arg2, arg1],
15146 ))),
15147 Expression::number(7),
15148 ),
15149 )),
15150 to: DataType::Int {
15151 length: None,
15152 integer_spelling: false,
15153 },
15154 trailing_comments: vec![],
15155 double_colon_syntax: false,
15156 format: None,
15157 default: None,
15158 inferred_type: None,
15159 }))),
15160 _ => Ok(Expression::Function(Box::new(Function::new(
15161 "DATEDIFF".to_string(),
15162 vec![arg2, arg1],
15163 )))),
15164 },
15165 DialectType::Spark | DialectType::Databricks => {
15166 let unit =
15167 Expression::Identifier(Identifier::new(&unit_str));
15168 Ok(Expression::Function(Box::new(Function::new(
15169 "DATEDIFF".to_string(),
15170 vec![unit, arg1, arg2],
15171 ))))
15172 }
15173 _ => {
15174 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
15175 let d1 = if is_hive_spark {
15176 Self::ensure_cast_date(arg1)
15177 } else {
15178 arg1
15179 };
15180 let d2 = if is_hive_spark {
15181 Self::ensure_cast_date(arg2)
15182 } else {
15183 arg2
15184 };
15185 let unit =
15186 Expression::Identifier(Identifier::new(&unit_str));
15187 Ok(Expression::Function(Box::new(Function::new(
15188 "DATEDIFF".to_string(),
15189 vec![unit, d1, d2],
15190 ))))
15191 }
15192 }
15193 }
15194 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
15195 "DATEDIFF" if f.args.len() == 2 => {
15196 let mut args = f.args;
15197 let arg0 = args.remove(0);
15198 let arg1 = args.remove(0);
15199
15200 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
15201 // Also recognizes TryCast/Cast to DATE that may have been produced by
15202 // cross-dialect TO_DATE -> TRY_CAST conversion
15203 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
15204 if let Expression::Function(ref f) = e {
15205 if f.name.eq_ignore_ascii_case("TO_DATE")
15206 && f.args.len() == 1
15207 {
15208 return (f.args[0].clone(), true);
15209 }
15210 }
15211 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
15212 if let Expression::TryCast(ref c) = e {
15213 if matches!(c.to, DataType::Date) {
15214 return (e, true); // Already properly cast, return as-is
15215 }
15216 }
15217 (e, false)
15218 };
15219
15220 match target {
15221 DialectType::DuckDB => {
15222 // For Hive source, always CAST to DATE
15223 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
15224 let cast_d0 = if matches!(
15225 source,
15226 DialectType::Hive
15227 | DialectType::Spark
15228 | DialectType::Databricks
15229 ) {
15230 let (inner, was_to_date) = unwrap_to_date(arg1);
15231 if was_to_date {
15232 // Already a date expression, use directly
15233 if matches!(&inner, Expression::TryCast(_)) {
15234 inner // Already TRY_CAST(x AS DATE)
15235 } else {
15236 Self::try_cast_date(inner)
15237 }
15238 } else {
15239 Self::force_cast_date(inner)
15240 }
15241 } else {
15242 Self::ensure_cast_date(arg1)
15243 };
15244 let cast_d1 = if matches!(
15245 source,
15246 DialectType::Hive
15247 | DialectType::Spark
15248 | DialectType::Databricks
15249 ) {
15250 let (inner, was_to_date) = unwrap_to_date(arg0);
15251 if was_to_date {
15252 if matches!(&inner, Expression::TryCast(_)) {
15253 inner
15254 } else {
15255 Self::try_cast_date(inner)
15256 }
15257 } else {
15258 Self::force_cast_date(inner)
15259 }
15260 } else {
15261 Self::ensure_cast_date(arg0)
15262 };
15263 Ok(Expression::Function(Box::new(Function::new(
15264 "DATE_DIFF".to_string(),
15265 vec![Expression::string("DAY"), cast_d0, cast_d1],
15266 ))))
15267 }
15268 DialectType::Presto
15269 | DialectType::Trino
15270 | DialectType::Athena => {
15271 // For Hive/Spark source, apply double_cast_timestamp_date
15272 // For other sources (MySQL etc.), just swap args without casting
15273 if matches!(
15274 source,
15275 DialectType::Hive
15276 | DialectType::Spark
15277 | DialectType::Databricks
15278 ) {
15279 let cast_fn = |e: Expression| -> Expression {
15280 let (inner, was_to_date) = unwrap_to_date(e);
15281 if was_to_date {
15282 let first_cast =
15283 Self::double_cast_timestamp_date(inner);
15284 Self::double_cast_timestamp_date(first_cast)
15285 } else {
15286 Self::double_cast_timestamp_date(inner)
15287 }
15288 };
15289 Ok(Expression::Function(Box::new(Function::new(
15290 "DATE_DIFF".to_string(),
15291 vec![
15292 Expression::string("DAY"),
15293 cast_fn(arg1),
15294 cast_fn(arg0),
15295 ],
15296 ))))
15297 } else {
15298 Ok(Expression::Function(Box::new(Function::new(
15299 "DATE_DIFF".to_string(),
15300 vec![Expression::string("DAY"), arg1, arg0],
15301 ))))
15302 }
15303 }
15304 DialectType::Redshift => {
15305 let unit = Expression::Identifier(Identifier::new("DAY"));
15306 Ok(Expression::Function(Box::new(Function::new(
15307 "DATEDIFF".to_string(),
15308 vec![unit, arg1, arg0],
15309 ))))
15310 }
15311 _ => Ok(Expression::Function(Box::new(Function::new(
15312 "DATEDIFF".to_string(),
15313 vec![arg0, arg1],
15314 )))),
15315 }
15316 }
15317 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
15318 "DATE_DIFF" if f.args.len() == 3 => {
15319 let mut args = f.args;
15320 let arg0 = args.remove(0);
15321 let arg1 = args.remove(0);
15322 let arg2 = args.remove(0);
15323 let unit_str = Self::get_unit_str_static(&arg0);
15324
15325 match target {
15326 DialectType::DuckDB => {
15327 // DuckDB: DATE_DIFF('UNIT', start, end)
15328 Ok(Expression::Function(Box::new(Function::new(
15329 "DATE_DIFF".to_string(),
15330 vec![Expression::string(&unit_str), arg1, arg2],
15331 ))))
15332 }
15333 DialectType::Presto
15334 | DialectType::Trino
15335 | DialectType::Athena => {
15336 Ok(Expression::Function(Box::new(Function::new(
15337 "DATE_DIFF".to_string(),
15338 vec![Expression::string(&unit_str), arg1, arg2],
15339 ))))
15340 }
15341 DialectType::ClickHouse => {
15342 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
15343 let unit =
15344 Expression::Identifier(Identifier::new(&unit_str));
15345 Ok(Expression::Function(Box::new(Function::new(
15346 "DATE_DIFF".to_string(),
15347 vec![unit, arg1, arg2],
15348 ))))
15349 }
15350 DialectType::Snowflake | DialectType::Redshift => {
15351 let unit =
15352 Expression::Identifier(Identifier::new(&unit_str));
15353 Ok(Expression::Function(Box::new(Function::new(
15354 "DATEDIFF".to_string(),
15355 vec![unit, arg1, arg2],
15356 ))))
15357 }
15358 _ => {
15359 let unit =
15360 Expression::Identifier(Identifier::new(&unit_str));
15361 Ok(Expression::Function(Box::new(Function::new(
15362 "DATEDIFF".to_string(),
15363 vec![unit, arg1, arg2],
15364 ))))
15365 }
15366 }
15367 }
15368 // DATEADD(unit, val, date) - 3-arg form
15369 "DATEADD" if f.args.len() == 3 => {
15370 let mut args = f.args;
15371 let arg0 = args.remove(0);
15372 let arg1 = args.remove(0);
15373 let arg2 = args.remove(0);
15374 let unit_str = Self::get_unit_str_static(&arg0);
15375
15376 // Normalize TSQL unit abbreviations to standard names
15377 let unit_str = match unit_str.as_str() {
15378 "YY" | "YYYY" => "YEAR".to_string(),
15379 "QQ" | "Q" => "QUARTER".to_string(),
15380 "MM" | "M" => "MONTH".to_string(),
15381 "WK" | "WW" => "WEEK".to_string(),
15382 "DD" | "D" | "DY" => "DAY".to_string(),
15383 "HH" => "HOUR".to_string(),
15384 "MI" | "N" => "MINUTE".to_string(),
15385 "SS" | "S" => "SECOND".to_string(),
15386 "MS" => "MILLISECOND".to_string(),
15387 "MCS" | "US" => "MICROSECOND".to_string(),
15388 _ => unit_str,
15389 };
15390 match target {
15391 DialectType::Snowflake => {
15392 let unit =
15393 Expression::Identifier(Identifier::new(&unit_str));
15394 // Cast string literal to TIMESTAMP, but not for Snowflake source
15395 // (Snowflake natively accepts string literals in DATEADD)
15396 let arg2 = if matches!(
15397 &arg2,
15398 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15399 ) && !matches!(source, DialectType::Snowflake)
15400 {
15401 Expression::Cast(Box::new(Cast {
15402 this: arg2,
15403 to: DataType::Timestamp {
15404 precision: None,
15405 timezone: false,
15406 },
15407 trailing_comments: Vec::new(),
15408 double_colon_syntax: false,
15409 format: None,
15410 default: None,
15411 inferred_type: None,
15412 }))
15413 } else {
15414 arg2
15415 };
15416 Ok(Expression::Function(Box::new(Function::new(
15417 "DATEADD".to_string(),
15418 vec![unit, arg1, arg2],
15419 ))))
15420 }
15421 DialectType::TSQL => {
15422 let unit =
15423 Expression::Identifier(Identifier::new(&unit_str));
15424 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
15425 let arg2 = if matches!(
15426 &arg2,
15427 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15428 ) && !matches!(
15429 source,
15430 DialectType::Spark
15431 | DialectType::Databricks
15432 | DialectType::Hive
15433 ) {
15434 Expression::Cast(Box::new(Cast {
15435 this: arg2,
15436 to: DataType::Custom {
15437 name: "DATETIME2".to_string(),
15438 },
15439 trailing_comments: Vec::new(),
15440 double_colon_syntax: false,
15441 format: None,
15442 default: None,
15443 inferred_type: None,
15444 }))
15445 } else {
15446 arg2
15447 };
15448 Ok(Expression::Function(Box::new(Function::new(
15449 "DATEADD".to_string(),
15450 vec![unit, arg1, arg2],
15451 ))))
15452 }
15453 DialectType::Redshift => {
15454 let unit =
15455 Expression::Identifier(Identifier::new(&unit_str));
15456 Ok(Expression::Function(Box::new(Function::new(
15457 "DATEADD".to_string(),
15458 vec![unit, arg1, arg2],
15459 ))))
15460 }
15461 DialectType::Databricks => {
15462 let unit =
15463 Expression::Identifier(Identifier::new(&unit_str));
15464 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
15465 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
15466 let func_name = if matches!(
15467 source,
15468 DialectType::TSQL
15469 | DialectType::Fabric
15470 | DialectType::Databricks
15471 | DialectType::Snowflake
15472 ) {
15473 "DATEADD"
15474 } else {
15475 "DATE_ADD"
15476 };
15477 Ok(Expression::Function(Box::new(Function::new(
15478 func_name.to_string(),
15479 vec![unit, arg1, arg2],
15480 ))))
15481 }
15482 DialectType::DuckDB => {
15483 // Special handling for NANOSECOND from Snowflake
15484 if unit_str == "NANOSECOND"
15485 && matches!(source, DialectType::Snowflake)
15486 {
15487 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
15488 let cast_ts = Expression::Cast(Box::new(Cast {
15489 this: arg2,
15490 to: DataType::Custom {
15491 name: "TIMESTAMP_NS".to_string(),
15492 },
15493 trailing_comments: vec![],
15494 double_colon_syntax: false,
15495 format: None,
15496 default: None,
15497 inferred_type: None,
15498 }));
15499 let epoch_ns =
15500 Expression::Function(Box::new(Function::new(
15501 "EPOCH_NS".to_string(),
15502 vec![cast_ts],
15503 )));
15504 let sum = Expression::Add(Box::new(BinaryOp::new(
15505 epoch_ns, arg1,
15506 )));
15507 Ok(Expression::Function(Box::new(Function::new(
15508 "MAKE_TIMESTAMP_NS".to_string(),
15509 vec![sum],
15510 ))))
15511 } else {
15512 // DuckDB: convert to date + INTERVAL syntax with CAST
15513 let iu = Self::parse_interval_unit_static(&unit_str);
15514 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15515 this: Some(arg1),
15516 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
15517 }));
15518 // Cast string literal to TIMESTAMP
15519 let arg2 = if matches!(
15520 &arg2,
15521 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15522 ) {
15523 Expression::Cast(Box::new(Cast {
15524 this: arg2,
15525 to: DataType::Timestamp {
15526 precision: None,
15527 timezone: false,
15528 },
15529 trailing_comments: Vec::new(),
15530 double_colon_syntax: false,
15531 format: None,
15532 default: None,
15533 inferred_type: None,
15534 }))
15535 } else {
15536 arg2
15537 };
15538 Ok(Expression::Add(Box::new(
15539 crate::expressions::BinaryOp::new(arg2, interval),
15540 )))
15541 }
15542 }
15543 DialectType::Spark => {
15544 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
15545 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
15546 if matches!(source, DialectType::TSQL | DialectType::Fabric)
15547 {
15548 fn multiply_expr_spark(
15549 expr: Expression,
15550 factor: i64,
15551 ) -> Expression
15552 {
15553 if let Expression::Literal(lit) = &expr {
15554 if let crate::expressions::Literal::Number(n) =
15555 lit.as_ref()
15556 {
15557 if let Ok(val) = n.parse::<i64>() {
15558 return Expression::Literal(Box::new(
15559 crate::expressions::Literal::Number(
15560 (val * factor).to_string(),
15561 ),
15562 ));
15563 }
15564 }
15565 }
15566 Expression::Mul(Box::new(
15567 crate::expressions::BinaryOp::new(
15568 expr,
15569 Expression::Literal(Box::new(
15570 crate::expressions::Literal::Number(
15571 factor.to_string(),
15572 ),
15573 )),
15574 ),
15575 ))
15576 }
15577 let normalized_unit = match unit_str.as_str() {
15578 "YEAR" | "YY" | "YYYY" => "YEAR",
15579 "QUARTER" | "QQ" | "Q" => "QUARTER",
15580 "MONTH" | "MM" | "M" => "MONTH",
15581 "WEEK" | "WK" | "WW" => "WEEK",
15582 "DAY" | "DD" | "D" | "DY" => "DAY",
15583 _ => &unit_str,
15584 };
15585 match normalized_unit {
15586 "YEAR" => {
15587 let months = multiply_expr_spark(arg1, 12);
15588 Ok(Expression::Function(Box::new(
15589 Function::new(
15590 "ADD_MONTHS".to_string(),
15591 vec![arg2, months],
15592 ),
15593 )))
15594 }
15595 "QUARTER" => {
15596 let months = multiply_expr_spark(arg1, 3);
15597 Ok(Expression::Function(Box::new(
15598 Function::new(
15599 "ADD_MONTHS".to_string(),
15600 vec![arg2, months],
15601 ),
15602 )))
15603 }
15604 "MONTH" => Ok(Expression::Function(Box::new(
15605 Function::new(
15606 "ADD_MONTHS".to_string(),
15607 vec![arg2, arg1],
15608 ),
15609 ))),
15610 "WEEK" => {
15611 let days = multiply_expr_spark(arg1, 7);
15612 Ok(Expression::Function(Box::new(
15613 Function::new(
15614 "DATE_ADD".to_string(),
15615 vec![arg2, days],
15616 ),
15617 )))
15618 }
15619 "DAY" => Ok(Expression::Function(Box::new(
15620 Function::new(
15621 "DATE_ADD".to_string(),
15622 vec![arg2, arg1],
15623 ),
15624 ))),
15625 _ => {
15626 let unit = Expression::Identifier(
15627 Identifier::new(&unit_str),
15628 );
15629 Ok(Expression::Function(Box::new(
15630 Function::new(
15631 "DATE_ADD".to_string(),
15632 vec![unit, arg1, arg2],
15633 ),
15634 )))
15635 }
15636 }
15637 } else {
15638 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
15639 let unit =
15640 Expression::Identifier(Identifier::new(&unit_str));
15641 Ok(Expression::Function(Box::new(Function::new(
15642 "DATE_ADD".to_string(),
15643 vec![unit, arg1, arg2],
15644 ))))
15645 }
15646 }
15647 DialectType::Hive => match unit_str.as_str() {
15648 "MONTH" => {
15649 Ok(Expression::Function(Box::new(Function::new(
15650 "ADD_MONTHS".to_string(),
15651 vec![arg2, arg1],
15652 ))))
15653 }
15654 _ => Ok(Expression::Function(Box::new(Function::new(
15655 "DATE_ADD".to_string(),
15656 vec![arg2, arg1],
15657 )))),
15658 },
15659 DialectType::Presto
15660 | DialectType::Trino
15661 | DialectType::Athena => {
15662 // Cast string literal date to TIMESTAMP
15663 let arg2 = if matches!(
15664 &arg2,
15665 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15666 ) {
15667 Expression::Cast(Box::new(Cast {
15668 this: arg2,
15669 to: DataType::Timestamp {
15670 precision: None,
15671 timezone: false,
15672 },
15673 trailing_comments: Vec::new(),
15674 double_colon_syntax: false,
15675 format: None,
15676 default: None,
15677 inferred_type: None,
15678 }))
15679 } else {
15680 arg2
15681 };
15682 Ok(Expression::Function(Box::new(Function::new(
15683 "DATE_ADD".to_string(),
15684 vec![Expression::string(&unit_str), arg1, arg2],
15685 ))))
15686 }
15687 DialectType::MySQL => {
15688 let iu = Self::parse_interval_unit_static(&unit_str);
15689 Ok(Expression::DateAdd(Box::new(
15690 crate::expressions::DateAddFunc {
15691 this: arg2,
15692 interval: arg1,
15693 unit: iu,
15694 },
15695 )))
15696 }
15697 DialectType::PostgreSQL => {
15698 // Cast string literal date to TIMESTAMP
15699 let arg2 = if matches!(
15700 &arg2,
15701 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15702 ) {
15703 Expression::Cast(Box::new(Cast {
15704 this: arg2,
15705 to: DataType::Timestamp {
15706 precision: None,
15707 timezone: false,
15708 },
15709 trailing_comments: Vec::new(),
15710 double_colon_syntax: false,
15711 format: None,
15712 default: None,
15713 inferred_type: None,
15714 }))
15715 } else {
15716 arg2
15717 };
15718 let interval = Expression::Interval(Box::new(
15719 crate::expressions::Interval {
15720 this: Some(Expression::string(&format!(
15721 "{} {}",
15722 Self::expr_to_string_static(&arg1),
15723 unit_str
15724 ))),
15725 unit: None,
15726 },
15727 ));
15728 Ok(Expression::Add(Box::new(
15729 crate::expressions::BinaryOp::new(arg2, interval),
15730 )))
15731 }
15732 DialectType::BigQuery => {
15733 let iu = Self::parse_interval_unit_static(&unit_str);
15734 let interval = Expression::Interval(Box::new(
15735 crate::expressions::Interval {
15736 this: Some(arg1),
15737 unit: Some(
15738 crate::expressions::IntervalUnitSpec::Simple {
15739 unit: iu,
15740 use_plural: false,
15741 },
15742 ),
15743 },
15744 ));
15745 // Non-TSQL sources: CAST string literal to DATETIME
15746 let arg2 = if !matches!(
15747 source,
15748 DialectType::TSQL | DialectType::Fabric
15749 ) && matches!(
15750 &arg2,
15751 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15752 ) {
15753 Expression::Cast(Box::new(Cast {
15754 this: arg2,
15755 to: DataType::Custom {
15756 name: "DATETIME".to_string(),
15757 },
15758 trailing_comments: Vec::new(),
15759 double_colon_syntax: false,
15760 format: None,
15761 default: None,
15762 inferred_type: None,
15763 }))
15764 } else {
15765 arg2
15766 };
15767 Ok(Expression::Function(Box::new(Function::new(
15768 "DATE_ADD".to_string(),
15769 vec![arg2, interval],
15770 ))))
15771 }
15772 _ => {
15773 let unit =
15774 Expression::Identifier(Identifier::new(&unit_str));
15775 Ok(Expression::Function(Box::new(Function::new(
15776 "DATEADD".to_string(),
15777 vec![unit, arg1, arg2],
15778 ))))
15779 }
15780 }
15781 }
15782 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
15783 // or (date, val, 'UNIT') from Generic canonical form
15784 "DATE_ADD" if f.args.len() == 3 => {
15785 let mut args = f.args;
15786 let arg0 = args.remove(0);
15787 let arg1 = args.remove(0);
15788 let arg2 = args.remove(0);
15789 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
15790 // where arg2 is a string literal matching a unit name
15791 let arg2_unit = match &arg2 {
15792 Expression::Literal(lit)
15793 if matches!(lit.as_ref(), Literal::String(_)) =>
15794 {
15795 let Literal::String(s) = lit.as_ref() else {
15796 unreachable!()
15797 };
15798 let u = s.to_ascii_uppercase();
15799 if matches!(
15800 u.as_str(),
15801 "DAY"
15802 | "MONTH"
15803 | "YEAR"
15804 | "HOUR"
15805 | "MINUTE"
15806 | "SECOND"
15807 | "WEEK"
15808 | "QUARTER"
15809 | "MILLISECOND"
15810 | "MICROSECOND"
15811 ) {
15812 Some(u)
15813 } else {
15814 None
15815 }
15816 }
15817 _ => None,
15818 };
15819 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
15820 let (unit_str, val, date) = if let Some(u) = arg2_unit {
15821 (u, arg1, arg0)
15822 } else {
15823 (Self::get_unit_str_static(&arg0), arg1, arg2)
15824 };
15825 // Alias for backward compat with the rest of the match
15826 let arg1 = val;
15827 let arg2 = date;
15828
15829 match target {
15830 DialectType::Presto
15831 | DialectType::Trino
15832 | DialectType::Athena => {
15833 Ok(Expression::Function(Box::new(Function::new(
15834 "DATE_ADD".to_string(),
15835 vec![Expression::string(&unit_str), arg1, arg2],
15836 ))))
15837 }
15838 DialectType::DuckDB => {
15839 let iu = Self::parse_interval_unit_static(&unit_str);
15840 let interval = Expression::Interval(Box::new(
15841 crate::expressions::Interval {
15842 this: Some(arg1),
15843 unit: Some(
15844 crate::expressions::IntervalUnitSpec::Simple {
15845 unit: iu,
15846 use_plural: false,
15847 },
15848 ),
15849 },
15850 ));
15851 Ok(Expression::Add(Box::new(
15852 crate::expressions::BinaryOp::new(arg2, interval),
15853 )))
15854 }
15855 DialectType::PostgreSQL
15856 | DialectType::Materialize
15857 | DialectType::RisingWave => {
15858 // PostgreSQL: x + INTERVAL '1 DAY'
15859 let amount_str = Self::expr_to_string_static(&arg1);
15860 let interval = Expression::Interval(Box::new(
15861 crate::expressions::Interval {
15862 this: Some(Expression::string(&format!(
15863 "{} {}",
15864 amount_str, unit_str
15865 ))),
15866 unit: None,
15867 },
15868 ));
15869 Ok(Expression::Add(Box::new(
15870 crate::expressions::BinaryOp::new(arg2, interval),
15871 )))
15872 }
15873 DialectType::Snowflake
15874 | DialectType::TSQL
15875 | DialectType::Redshift => {
15876 let unit =
15877 Expression::Identifier(Identifier::new(&unit_str));
15878 Ok(Expression::Function(Box::new(Function::new(
15879 "DATEADD".to_string(),
15880 vec![unit, arg1, arg2],
15881 ))))
15882 }
15883 DialectType::BigQuery
15884 | DialectType::MySQL
15885 | DialectType::Doris
15886 | DialectType::StarRocks
15887 | DialectType::Drill => {
15888 // DATE_ADD(date, INTERVAL amount UNIT)
15889 let iu = Self::parse_interval_unit_static(&unit_str);
15890 let interval = Expression::Interval(Box::new(
15891 crate::expressions::Interval {
15892 this: Some(arg1),
15893 unit: Some(
15894 crate::expressions::IntervalUnitSpec::Simple {
15895 unit: iu,
15896 use_plural: false,
15897 },
15898 ),
15899 },
15900 ));
15901 Ok(Expression::Function(Box::new(Function::new(
15902 "DATE_ADD".to_string(),
15903 vec![arg2, interval],
15904 ))))
15905 }
15906 DialectType::SQLite => {
15907 // SQLite: DATE(x, '1 DAY')
15908 // Build the string '1 DAY' from amount and unit
15909 let amount_str = match &arg1 {
15910 Expression::Literal(lit)
15911 if matches!(lit.as_ref(), Literal::Number(_)) =>
15912 {
15913 let Literal::Number(n) = lit.as_ref() else {
15914 unreachable!()
15915 };
15916 n.clone()
15917 }
15918 _ => "1".to_string(),
15919 };
15920 Ok(Expression::Function(Box::new(Function::new(
15921 "DATE".to_string(),
15922 vec![
15923 arg2,
15924 Expression::string(format!(
15925 "{} {}",
15926 amount_str, unit_str
15927 )),
15928 ],
15929 ))))
15930 }
15931 DialectType::Dremio => {
15932 // Dremio: DATE_ADD(date, amount) - drops unit
15933 Ok(Expression::Function(Box::new(Function::new(
15934 "DATE_ADD".to_string(),
15935 vec![arg2, arg1],
15936 ))))
15937 }
15938 DialectType::Spark => {
15939 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
15940 if unit_str == "DAY" {
15941 Ok(Expression::Function(Box::new(Function::new(
15942 "DATE_ADD".to_string(),
15943 vec![arg2, arg1],
15944 ))))
15945 } else {
15946 let unit =
15947 Expression::Identifier(Identifier::new(&unit_str));
15948 Ok(Expression::Function(Box::new(Function::new(
15949 "DATE_ADD".to_string(),
15950 vec![unit, arg1, arg2],
15951 ))))
15952 }
15953 }
15954 DialectType::Databricks => {
15955 let unit =
15956 Expression::Identifier(Identifier::new(&unit_str));
15957 Ok(Expression::Function(Box::new(Function::new(
15958 "DATE_ADD".to_string(),
15959 vec![unit, arg1, arg2],
15960 ))))
15961 }
15962 DialectType::Hive => {
15963 // Hive: DATE_ADD(date, val) for DAY
15964 Ok(Expression::Function(Box::new(Function::new(
15965 "DATE_ADD".to_string(),
15966 vec![arg2, arg1],
15967 ))))
15968 }
15969 _ => {
15970 let unit =
15971 Expression::Identifier(Identifier::new(&unit_str));
15972 Ok(Expression::Function(Box::new(Function::new(
15973 "DATE_ADD".to_string(),
15974 vec![unit, arg1, arg2],
15975 ))))
15976 }
15977 }
15978 }
15979 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
15980 "DATE_ADD"
15981 if f.args.len() == 2
15982 && matches!(
15983 source,
15984 DialectType::Hive
15985 | DialectType::Spark
15986 | DialectType::Databricks
15987 | DialectType::Generic
15988 ) =>
15989 {
15990 let mut args = f.args;
15991 let date = args.remove(0);
15992 let days = args.remove(0);
15993 match target {
15994 DialectType::Hive | DialectType::Spark => {
15995 // Keep as DATE_ADD(date, days) for Hive/Spark
15996 Ok(Expression::Function(Box::new(Function::new(
15997 "DATE_ADD".to_string(),
15998 vec![date, days],
15999 ))))
16000 }
16001 DialectType::Databricks => {
16002 // Databricks: DATEADD(DAY, days, date)
16003 Ok(Expression::Function(Box::new(Function::new(
16004 "DATEADD".to_string(),
16005 vec![
16006 Expression::Identifier(Identifier::new("DAY")),
16007 days,
16008 date,
16009 ],
16010 ))))
16011 }
16012 DialectType::DuckDB => {
16013 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
16014 let cast_date = Self::ensure_cast_date(date);
16015 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
16016 let interval_val = if matches!(
16017 days,
16018 Expression::Mul(_)
16019 | Expression::Sub(_)
16020 | Expression::Add(_)
16021 ) {
16022 Expression::Paren(Box::new(crate::expressions::Paren {
16023 this: days,
16024 trailing_comments: vec![],
16025 }))
16026 } else {
16027 days
16028 };
16029 let interval = Expression::Interval(Box::new(
16030 crate::expressions::Interval {
16031 this: Some(interval_val),
16032 unit: Some(
16033 crate::expressions::IntervalUnitSpec::Simple {
16034 unit: crate::expressions::IntervalUnit::Day,
16035 use_plural: false,
16036 },
16037 ),
16038 },
16039 ));
16040 Ok(Expression::Add(Box::new(
16041 crate::expressions::BinaryOp::new(cast_date, interval),
16042 )))
16043 }
16044 DialectType::Snowflake => {
16045 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
16046 let cast_date = if matches!(
16047 source,
16048 DialectType::Hive
16049 | DialectType::Spark
16050 | DialectType::Databricks
16051 ) {
16052 if matches!(
16053 date,
16054 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
16055 ) {
16056 Self::double_cast_timestamp_date(date)
16057 } else {
16058 date
16059 }
16060 } else {
16061 date
16062 };
16063 Ok(Expression::Function(Box::new(Function::new(
16064 "DATEADD".to_string(),
16065 vec![
16066 Expression::Identifier(Identifier::new("DAY")),
16067 days,
16068 cast_date,
16069 ],
16070 ))))
16071 }
16072 DialectType::Redshift => {
16073 Ok(Expression::Function(Box::new(Function::new(
16074 "DATEADD".to_string(),
16075 vec![
16076 Expression::Identifier(Identifier::new("DAY")),
16077 days,
16078 date,
16079 ],
16080 ))))
16081 }
16082 DialectType::TSQL | DialectType::Fabric => {
16083 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
16084 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
16085 let cast_date = if matches!(
16086 source,
16087 DialectType::Hive | DialectType::Spark
16088 ) {
16089 if matches!(
16090 date,
16091 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
16092 ) {
16093 Self::double_cast_datetime2_date(date)
16094 } else {
16095 date
16096 }
16097 } else {
16098 date
16099 };
16100 Ok(Expression::Function(Box::new(Function::new(
16101 "DATEADD".to_string(),
16102 vec![
16103 Expression::Identifier(Identifier::new("DAY")),
16104 days,
16105 cast_date,
16106 ],
16107 ))))
16108 }
16109 DialectType::Presto
16110 | DialectType::Trino
16111 | DialectType::Athena => {
16112 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
16113 let cast_date = if matches!(
16114 source,
16115 DialectType::Hive
16116 | DialectType::Spark
16117 | DialectType::Databricks
16118 ) {
16119 if matches!(
16120 date,
16121 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
16122 ) {
16123 Self::double_cast_timestamp_date(date)
16124 } else {
16125 date
16126 }
16127 } else {
16128 date
16129 };
16130 Ok(Expression::Function(Box::new(Function::new(
16131 "DATE_ADD".to_string(),
16132 vec![Expression::string("DAY"), days, cast_date],
16133 ))))
16134 }
16135 DialectType::BigQuery => {
16136 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
16137 let cast_date = if matches!(
16138 source,
16139 DialectType::Hive
16140 | DialectType::Spark
16141 | DialectType::Databricks
16142 ) {
16143 Self::double_cast_datetime_date(date)
16144 } else {
16145 date
16146 };
16147 // Wrap complex expressions in Paren for interval
16148 let interval_val = if matches!(
16149 days,
16150 Expression::Mul(_)
16151 | Expression::Sub(_)
16152 | Expression::Add(_)
16153 ) {
16154 Expression::Paren(Box::new(crate::expressions::Paren {
16155 this: days,
16156 trailing_comments: vec![],
16157 }))
16158 } else {
16159 days
16160 };
16161 let interval = Expression::Interval(Box::new(
16162 crate::expressions::Interval {
16163 this: Some(interval_val),
16164 unit: Some(
16165 crate::expressions::IntervalUnitSpec::Simple {
16166 unit: crate::expressions::IntervalUnit::Day,
16167 use_plural: false,
16168 },
16169 ),
16170 },
16171 ));
16172 Ok(Expression::Function(Box::new(Function::new(
16173 "DATE_ADD".to_string(),
16174 vec![cast_date, interval],
16175 ))))
16176 }
16177 DialectType::MySQL => {
16178 let iu = crate::expressions::IntervalUnit::Day;
16179 Ok(Expression::DateAdd(Box::new(
16180 crate::expressions::DateAddFunc {
16181 this: date,
16182 interval: days,
16183 unit: iu,
16184 },
16185 )))
16186 }
16187 DialectType::PostgreSQL => {
16188 let interval = Expression::Interval(Box::new(
16189 crate::expressions::Interval {
16190 this: Some(Expression::string(&format!(
16191 "{} DAY",
16192 Self::expr_to_string_static(&days)
16193 ))),
16194 unit: None,
16195 },
16196 ));
16197 Ok(Expression::Add(Box::new(
16198 crate::expressions::BinaryOp::new(date, interval),
16199 )))
16200 }
16201 DialectType::Doris
16202 | DialectType::StarRocks
16203 | DialectType::Drill => {
16204 // DATE_ADD(date, INTERVAL days DAY)
16205 let interval = Expression::Interval(Box::new(
16206 crate::expressions::Interval {
16207 this: Some(days),
16208 unit: Some(
16209 crate::expressions::IntervalUnitSpec::Simple {
16210 unit: crate::expressions::IntervalUnit::Day,
16211 use_plural: false,
16212 },
16213 ),
16214 },
16215 ));
16216 Ok(Expression::Function(Box::new(Function::new(
16217 "DATE_ADD".to_string(),
16218 vec![date, interval],
16219 ))))
16220 }
16221 _ => Ok(Expression::Function(Box::new(Function::new(
16222 "DATE_ADD".to_string(),
16223 vec![date, days],
16224 )))),
16225 }
16226 }
16227 // DATE_ADD(date, INTERVAL val UNIT) - MySQL 2-arg form with INTERVAL as 2nd arg
16228 "DATE_ADD"
16229 if f.args.len() == 2
16230 && matches!(
16231 source,
16232 DialectType::MySQL | DialectType::SingleStore
16233 )
16234 && matches!(&f.args[1], Expression::Interval(_)) =>
16235 {
16236 let mut args = f.args;
16237 let date = args.remove(0);
16238 let interval_expr = args.remove(0);
16239 let (val, unit) = Self::extract_interval_parts(&interval_expr);
16240 let unit_str = Self::interval_unit_to_string(&unit);
16241 let is_literal = matches!(&val,
16242 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_))
16243 );
16244
16245 match target {
16246 DialectType::MySQL | DialectType::SingleStore => {
16247 // Keep as DATE_ADD(date, INTERVAL val UNIT)
16248 Ok(Expression::Function(Box::new(Function::new(
16249 "DATE_ADD".to_string(),
16250 vec![date, interval_expr],
16251 ))))
16252 }
16253 DialectType::PostgreSQL => {
16254 if is_literal {
16255 // Literal: date + INTERVAL 'val UNIT'
16256 let interval = Expression::Interval(Box::new(
16257 crate::expressions::Interval {
16258 this: Some(Expression::Literal(Box::new(
16259 Literal::String(format!(
16260 "{} {}",
16261 Self::expr_to_string(&val),
16262 unit_str
16263 )),
16264 ))),
16265 unit: None,
16266 },
16267 ));
16268 Ok(Expression::Add(Box::new(
16269 crate::expressions::BinaryOp::new(date, interval),
16270 )))
16271 } else {
16272 // Non-literal (column ref): date + INTERVAL '1 UNIT' * val
16273 let interval_one = Expression::Interval(Box::new(
16274 crate::expressions::Interval {
16275 this: Some(Expression::Literal(Box::new(
16276 Literal::String(format!("1 {}", unit_str)),
16277 ))),
16278 unit: None,
16279 },
16280 ));
16281 let mul = Expression::Mul(Box::new(
16282 crate::expressions::BinaryOp::new(
16283 interval_one,
16284 val,
16285 ),
16286 ));
16287 Ok(Expression::Add(Box::new(
16288 crate::expressions::BinaryOp::new(date, mul),
16289 )))
16290 }
16291 }
16292 _ => {
16293 // Default: keep as DATE_ADD(date, interval)
16294 Ok(Expression::Function(Box::new(Function::new(
16295 "DATE_ADD".to_string(),
16296 vec![date, interval_expr],
16297 ))))
16298 }
16299 }
16300 }
16301 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
16302 "DATE_SUB"
16303 if f.args.len() == 2
16304 && matches!(
16305 source,
16306 DialectType::Hive
16307 | DialectType::Spark
16308 | DialectType::Databricks
16309 ) =>
16310 {
16311 let mut args = f.args;
16312 let date = args.remove(0);
16313 let days = args.remove(0);
16314 // Helper to create days * -1
16315 let make_neg_days = |d: Expression| -> Expression {
16316 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
16317 d,
16318 Expression::Literal(Box::new(Literal::Number(
16319 "-1".to_string(),
16320 ))),
16321 )))
16322 };
16323 let is_string_literal = matches!(date, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)));
16324 match target {
16325 DialectType::Hive
16326 | DialectType::Spark
16327 | DialectType::Databricks => {
16328 // Keep as DATE_SUB(date, days) for Hive/Spark
16329 Ok(Expression::Function(Box::new(Function::new(
16330 "DATE_SUB".to_string(),
16331 vec![date, days],
16332 ))))
16333 }
16334 DialectType::DuckDB => {
16335 let cast_date = Self::ensure_cast_date(date);
16336 let neg = make_neg_days(days);
16337 let interval = Expression::Interval(Box::new(
16338 crate::expressions::Interval {
16339 this: Some(Expression::Paren(Box::new(
16340 crate::expressions::Paren {
16341 this: neg,
16342 trailing_comments: vec![],
16343 },
16344 ))),
16345 unit: Some(
16346 crate::expressions::IntervalUnitSpec::Simple {
16347 unit: crate::expressions::IntervalUnit::Day,
16348 use_plural: false,
16349 },
16350 ),
16351 },
16352 ));
16353 Ok(Expression::Add(Box::new(
16354 crate::expressions::BinaryOp::new(cast_date, interval),
16355 )))
16356 }
16357 DialectType::Snowflake => {
16358 let cast_date = if is_string_literal {
16359 Self::double_cast_timestamp_date(date)
16360 } else {
16361 date
16362 };
16363 let neg = make_neg_days(days);
16364 Ok(Expression::Function(Box::new(Function::new(
16365 "DATEADD".to_string(),
16366 vec![
16367 Expression::Identifier(Identifier::new("DAY")),
16368 neg,
16369 cast_date,
16370 ],
16371 ))))
16372 }
16373 DialectType::Redshift => {
16374 let neg = make_neg_days(days);
16375 Ok(Expression::Function(Box::new(Function::new(
16376 "DATEADD".to_string(),
16377 vec![
16378 Expression::Identifier(Identifier::new("DAY")),
16379 neg,
16380 date,
16381 ],
16382 ))))
16383 }
16384 DialectType::TSQL | DialectType::Fabric => {
16385 let cast_date = if is_string_literal {
16386 Self::double_cast_datetime2_date(date)
16387 } else {
16388 date
16389 };
16390 let neg = make_neg_days(days);
16391 Ok(Expression::Function(Box::new(Function::new(
16392 "DATEADD".to_string(),
16393 vec![
16394 Expression::Identifier(Identifier::new("DAY")),
16395 neg,
16396 cast_date,
16397 ],
16398 ))))
16399 }
16400 DialectType::Presto
16401 | DialectType::Trino
16402 | DialectType::Athena => {
16403 let cast_date = if is_string_literal {
16404 Self::double_cast_timestamp_date(date)
16405 } else {
16406 date
16407 };
16408 let neg = make_neg_days(days);
16409 Ok(Expression::Function(Box::new(Function::new(
16410 "DATE_ADD".to_string(),
16411 vec![Expression::string("DAY"), neg, cast_date],
16412 ))))
16413 }
16414 DialectType::BigQuery => {
16415 let cast_date = if is_string_literal {
16416 Self::double_cast_datetime_date(date)
16417 } else {
16418 date
16419 };
16420 let neg = make_neg_days(days);
16421 let interval = Expression::Interval(Box::new(
16422 crate::expressions::Interval {
16423 this: Some(Expression::Paren(Box::new(
16424 crate::expressions::Paren {
16425 this: neg,
16426 trailing_comments: vec![],
16427 },
16428 ))),
16429 unit: Some(
16430 crate::expressions::IntervalUnitSpec::Simple {
16431 unit: crate::expressions::IntervalUnit::Day,
16432 use_plural: false,
16433 },
16434 ),
16435 },
16436 ));
16437 Ok(Expression::Function(Box::new(Function::new(
16438 "DATE_ADD".to_string(),
16439 vec![cast_date, interval],
16440 ))))
16441 }
16442 _ => Ok(Expression::Function(Box::new(Function::new(
16443 "DATE_SUB".to_string(),
16444 vec![date, days],
16445 )))),
16446 }
16447 }
16448 // ADD_MONTHS(date, val) -> target-specific
16449 "ADD_MONTHS" if f.args.len() == 2 => {
16450 let mut args = f.args;
16451 let date = args.remove(0);
16452 let val = args.remove(0);
16453 match target {
16454 DialectType::TSQL => {
16455 let cast_date = Self::ensure_cast_datetime2(date);
16456 Ok(Expression::Function(Box::new(Function::new(
16457 "DATEADD".to_string(),
16458 vec![
16459 Expression::Identifier(Identifier::new("MONTH")),
16460 val,
16461 cast_date,
16462 ],
16463 ))))
16464 }
16465 DialectType::DuckDB => {
16466 let interval = Expression::Interval(Box::new(
16467 crate::expressions::Interval {
16468 this: Some(val),
16469 unit: Some(
16470 crate::expressions::IntervalUnitSpec::Simple {
16471 unit:
16472 crate::expressions::IntervalUnit::Month,
16473 use_plural: false,
16474 },
16475 ),
16476 },
16477 ));
16478 Ok(Expression::Add(Box::new(
16479 crate::expressions::BinaryOp::new(date, interval),
16480 )))
16481 }
16482 DialectType::Snowflake => {
16483 // Keep ADD_MONTHS when source is Snowflake
16484 if matches!(source, DialectType::Snowflake) {
16485 Ok(Expression::Function(Box::new(Function::new(
16486 "ADD_MONTHS".to_string(),
16487 vec![date, val],
16488 ))))
16489 } else {
16490 Ok(Expression::Function(Box::new(Function::new(
16491 "DATEADD".to_string(),
16492 vec![
16493 Expression::Identifier(Identifier::new(
16494 "MONTH",
16495 )),
16496 val,
16497 date,
16498 ],
16499 ))))
16500 }
16501 }
16502 DialectType::Redshift => {
16503 Ok(Expression::Function(Box::new(Function::new(
16504 "DATEADD".to_string(),
16505 vec![
16506 Expression::Identifier(Identifier::new("MONTH")),
16507 val,
16508 date,
16509 ],
16510 ))))
16511 }
16512 DialectType::Presto
16513 | DialectType::Trino
16514 | DialectType::Athena => {
16515 Ok(Expression::Function(Box::new(Function::new(
16516 "DATE_ADD".to_string(),
16517 vec![Expression::string("MONTH"), val, date],
16518 ))))
16519 }
16520 DialectType::BigQuery => {
16521 let interval = Expression::Interval(Box::new(
16522 crate::expressions::Interval {
16523 this: Some(val),
16524 unit: Some(
16525 crate::expressions::IntervalUnitSpec::Simple {
16526 unit:
16527 crate::expressions::IntervalUnit::Month,
16528 use_plural: false,
16529 },
16530 ),
16531 },
16532 ));
16533 Ok(Expression::Function(Box::new(Function::new(
16534 "DATE_ADD".to_string(),
16535 vec![date, interval],
16536 ))))
16537 }
16538 _ => Ok(Expression::Function(Box::new(Function::new(
16539 "ADD_MONTHS".to_string(),
16540 vec![date, val],
16541 )))),
16542 }
16543 }
16544 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
16545 "DATETRUNC" if f.args.len() == 2 => {
16546 let mut args = f.args;
16547 let arg0 = args.remove(0);
16548 let arg1 = args.remove(0);
16549 let unit_str = Self::get_unit_str_static(&arg0);
16550 match target {
16551 DialectType::TSQL | DialectType::Fabric => {
16552 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
16553 Ok(Expression::Function(Box::new(Function::new(
16554 "DATETRUNC".to_string(),
16555 vec![
16556 Expression::Identifier(Identifier::new(&unit_str)),
16557 arg1,
16558 ],
16559 ))))
16560 }
16561 DialectType::DuckDB => {
16562 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
16563 let date = Self::ensure_cast_timestamp(arg1);
16564 Ok(Expression::Function(Box::new(Function::new(
16565 "DATE_TRUNC".to_string(),
16566 vec![Expression::string(&unit_str), date],
16567 ))))
16568 }
16569 DialectType::ClickHouse => {
16570 // ClickHouse: dateTrunc('UNIT', expr)
16571 Ok(Expression::Function(Box::new(Function::new(
16572 "dateTrunc".to_string(),
16573 vec![Expression::string(&unit_str), arg1],
16574 ))))
16575 }
16576 _ => {
16577 // Standard: DATE_TRUNC('UNIT', expr)
16578 let unit = Expression::string(&unit_str);
16579 Ok(Expression::Function(Box::new(Function::new(
16580 "DATE_TRUNC".to_string(),
16581 vec![unit, arg1],
16582 ))))
16583 }
16584 }
16585 }
16586 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
16587 "GETDATE" if f.args.is_empty() => match target {
16588 DialectType::TSQL => Ok(Expression::Function(f)),
16589 DialectType::Redshift => Ok(Expression::Function(Box::new(
16590 Function::new("GETDATE".to_string(), vec![]),
16591 ))),
16592 _ => Ok(Expression::CurrentTimestamp(
16593 crate::expressions::CurrentTimestamp {
16594 precision: None,
16595 sysdate: false,
16596 },
16597 )),
16598 },
16599 // TO_HEX(x) / HEX(x) -> target-specific hex function
16600 "TO_HEX" | "HEX" if f.args.len() == 1 => {
16601 let name = match target {
16602 DialectType::Presto | DialectType::Trino => "TO_HEX",
16603 DialectType::Spark
16604 | DialectType::Databricks
16605 | DialectType::Hive => "HEX",
16606 DialectType::DuckDB
16607 | DialectType::PostgreSQL
16608 | DialectType::Redshift => "TO_HEX",
16609 _ => &f.name,
16610 };
16611 Ok(Expression::Function(Box::new(Function::new(
16612 name.to_string(),
16613 f.args,
16614 ))))
16615 }
16616 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
16617 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
16618 match target {
16619 DialectType::BigQuery => {
16620 // BigQuery: UNHEX(x) -> FROM_HEX(x)
16621 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
16622 // because BigQuery MD5 returns BYTES, not hex string
16623 let arg = &f.args[0];
16624 let wrapped_arg = match arg {
16625 Expression::Function(inner_f)
16626 if inner_f.name.eq_ignore_ascii_case("MD5")
16627 || inner_f
16628 .name
16629 .eq_ignore_ascii_case("SHA1")
16630 || inner_f
16631 .name
16632 .eq_ignore_ascii_case("SHA256")
16633 || inner_f
16634 .name
16635 .eq_ignore_ascii_case("SHA512") =>
16636 {
16637 // Wrap hash function in TO_HEX for BigQuery
16638 Expression::Function(Box::new(Function::new(
16639 "TO_HEX".to_string(),
16640 vec![arg.clone()],
16641 )))
16642 }
16643 _ => f.args.into_iter().next().unwrap(),
16644 };
16645 Ok(Expression::Function(Box::new(Function::new(
16646 "FROM_HEX".to_string(),
16647 vec![wrapped_arg],
16648 ))))
16649 }
16650 _ => {
16651 let name = match target {
16652 DialectType::Presto | DialectType::Trino => "FROM_HEX",
16653 DialectType::Spark
16654 | DialectType::Databricks
16655 | DialectType::Hive => "UNHEX",
16656 _ => &f.name,
16657 };
16658 Ok(Expression::Function(Box::new(Function::new(
16659 name.to_string(),
16660 f.args,
16661 ))))
16662 }
16663 }
16664 }
16665 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
16666 "TO_UTF8" if f.args.len() == 1 => match target {
16667 DialectType::Spark | DialectType::Databricks => {
16668 let mut args = f.args;
16669 args.push(Expression::string("utf-8"));
16670 Ok(Expression::Function(Box::new(Function::new(
16671 "ENCODE".to_string(),
16672 args,
16673 ))))
16674 }
16675 _ => Ok(Expression::Function(f)),
16676 },
16677 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
16678 "FROM_UTF8" if f.args.len() == 1 => match target {
16679 DialectType::Spark | DialectType::Databricks => {
16680 let mut args = f.args;
16681 args.push(Expression::string("utf-8"));
16682 Ok(Expression::Function(Box::new(Function::new(
16683 "DECODE".to_string(),
16684 args,
16685 ))))
16686 }
16687 _ => Ok(Expression::Function(f)),
16688 },
16689 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
16690 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
16691 let name = match target {
16692 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
16693 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
16694 DialectType::PostgreSQL | DialectType::Redshift => {
16695 "STARTS_WITH"
16696 }
16697 _ => &f.name,
16698 };
16699 Ok(Expression::Function(Box::new(Function::new(
16700 name.to_string(),
16701 f.args,
16702 ))))
16703 }
16704 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
16705 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
16706 let name = match target {
16707 DialectType::Presto
16708 | DialectType::Trino
16709 | DialectType::Athena => "APPROX_DISTINCT",
16710 _ => "APPROX_COUNT_DISTINCT",
16711 };
16712 Ok(Expression::Function(Box::new(Function::new(
16713 name.to_string(),
16714 f.args,
16715 ))))
16716 }
16717 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
16718 "JSON_EXTRACT"
16719 if f.args.len() == 2
16720 && !matches!(source, DialectType::BigQuery)
16721 && matches!(
16722 target,
16723 DialectType::Spark
16724 | DialectType::Databricks
16725 | DialectType::Hive
16726 ) =>
16727 {
16728 Ok(Expression::Function(Box::new(Function::new(
16729 "GET_JSON_OBJECT".to_string(),
16730 f.args,
16731 ))))
16732 }
16733 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
16734 "JSON_EXTRACT"
16735 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
16736 {
16737 let mut args = f.args;
16738 let path = args.remove(1);
16739 let this = args.remove(0);
16740 Ok(Expression::JsonExtract(Box::new(
16741 crate::expressions::JsonExtractFunc {
16742 this,
16743 path,
16744 returning: None,
16745 arrow_syntax: true,
16746 hash_arrow_syntax: false,
16747 wrapper_option: None,
16748 quotes_option: None,
16749 on_scalar_string: false,
16750 on_error: None,
16751 },
16752 )))
16753 }
16754 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
16755 "JSON_FORMAT" if f.args.len() == 1 => {
16756 match target {
16757 DialectType::Spark | DialectType::Databricks => {
16758 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
16759 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
16760 if matches!(
16761 source,
16762 DialectType::Presto
16763 | DialectType::Trino
16764 | DialectType::Athena
16765 ) {
16766 if let Some(Expression::ParseJson(pj)) = f.args.first()
16767 {
16768 if let Expression::Literal(lit) = &pj.this {
16769 if let Literal::String(s) = lit.as_ref() {
16770 let wrapped =
16771 Expression::Literal(Box::new(
16772 Literal::String(format!("[{}]", s)),
16773 ));
16774 let schema_of_json = Expression::Function(
16775 Box::new(Function::new(
16776 "SCHEMA_OF_JSON".to_string(),
16777 vec![wrapped.clone()],
16778 )),
16779 );
16780 let from_json = Expression::Function(
16781 Box::new(Function::new(
16782 "FROM_JSON".to_string(),
16783 vec![wrapped, schema_of_json],
16784 )),
16785 );
16786 let to_json = Expression::Function(
16787 Box::new(Function::new(
16788 "TO_JSON".to_string(),
16789 vec![from_json],
16790 )),
16791 );
16792 return Ok(Expression::Function(Box::new(
16793 Function::new(
16794 "REGEXP_EXTRACT".to_string(),
16795 vec![
16796 to_json,
16797 Expression::Literal(Box::new(
16798 Literal::String(
16799 "^.(.*).$".to_string(),
16800 ),
16801 )),
16802 Expression::Literal(Box::new(
16803 Literal::Number(
16804 "1".to_string(),
16805 ),
16806 )),
16807 ],
16808 ),
16809 )));
16810 }
16811 }
16812 }
16813 }
16814
16815 // Strip inner CAST(... AS JSON) or TO_JSON() if present
16816 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
16817 let mut args = f.args;
16818 if let Some(Expression::Cast(ref c)) = args.first() {
16819 if matches!(&c.to, DataType::Json | DataType::JsonB) {
16820 args = vec![c.this.clone()];
16821 }
16822 } else if let Some(Expression::Function(ref inner_f)) =
16823 args.first()
16824 {
16825 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
16826 && inner_f.args.len() == 1
16827 {
16828 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
16829 args = inner_f.args.clone();
16830 }
16831 }
16832 Ok(Expression::Function(Box::new(Function::new(
16833 "TO_JSON".to_string(),
16834 args,
16835 ))))
16836 }
16837 DialectType::BigQuery => Ok(Expression::Function(Box::new(
16838 Function::new("TO_JSON_STRING".to_string(), f.args),
16839 ))),
16840 DialectType::DuckDB => {
16841 // CAST(TO_JSON(x) AS TEXT)
16842 let to_json = Expression::Function(Box::new(
16843 Function::new("TO_JSON".to_string(), f.args),
16844 ));
16845 Ok(Expression::Cast(Box::new(Cast {
16846 this: to_json,
16847 to: DataType::Text,
16848 trailing_comments: Vec::new(),
16849 double_colon_syntax: false,
16850 format: None,
16851 default: None,
16852 inferred_type: None,
16853 })))
16854 }
16855 _ => Ok(Expression::Function(f)),
16856 }
16857 }
16858 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
16859 "SYSDATE" if f.args.is_empty() => {
16860 match target {
16861 DialectType::Oracle | DialectType::Redshift => {
16862 Ok(Expression::Function(f))
16863 }
16864 DialectType::Snowflake => {
16865 // Snowflake uses SYSDATE() with parens
16866 let mut f = *f;
16867 f.no_parens = false;
16868 Ok(Expression::Function(Box::new(f)))
16869 }
16870 DialectType::DuckDB => {
16871 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
16872 Ok(Expression::AtTimeZone(Box::new(
16873 crate::expressions::AtTimeZone {
16874 this: Expression::CurrentTimestamp(
16875 crate::expressions::CurrentTimestamp {
16876 precision: None,
16877 sysdate: false,
16878 },
16879 ),
16880 zone: Expression::Literal(Box::new(
16881 Literal::String("UTC".to_string()),
16882 )),
16883 },
16884 )))
16885 }
16886 _ => Ok(Expression::CurrentTimestamp(
16887 crate::expressions::CurrentTimestamp {
16888 precision: None,
16889 sysdate: true,
16890 },
16891 )),
16892 }
16893 }
16894 // LOGICAL_OR(x) -> BOOL_OR(x)
16895 "LOGICAL_OR" if f.args.len() == 1 => {
16896 let name = match target {
16897 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
16898 _ => &f.name,
16899 };
16900 Ok(Expression::Function(Box::new(Function::new(
16901 name.to_string(),
16902 f.args,
16903 ))))
16904 }
16905 // LOGICAL_AND(x) -> BOOL_AND(x)
16906 "LOGICAL_AND" if f.args.len() == 1 => {
16907 let name = match target {
16908 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
16909 _ => &f.name,
16910 };
16911 Ok(Expression::Function(Box::new(Function::new(
16912 name.to_string(),
16913 f.args,
16914 ))))
16915 }
16916 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
16917 "MONTHS_ADD" if f.args.len() == 2 => match target {
16918 DialectType::Oracle => Ok(Expression::Function(Box::new(
16919 Function::new("ADD_MONTHS".to_string(), f.args),
16920 ))),
16921 _ => Ok(Expression::Function(f)),
16922 },
16923 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
16924 "ARRAY_JOIN" if f.args.len() >= 2 => {
16925 match target {
16926 DialectType::Spark | DialectType::Databricks => {
16927 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
16928 Ok(Expression::Function(f))
16929 }
16930 DialectType::Hive => {
16931 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
16932 let mut args = f.args;
16933 let arr = args.remove(0);
16934 let sep = args.remove(0);
16935 // Drop any remaining args (null_replacement)
16936 Ok(Expression::Function(Box::new(Function::new(
16937 "CONCAT_WS".to_string(),
16938 vec![sep, arr],
16939 ))))
16940 }
16941 DialectType::Presto | DialectType::Trino => {
16942 Ok(Expression::Function(f))
16943 }
16944 _ => Ok(Expression::Function(f)),
16945 }
16946 }
16947 // LOCATE(substr, str, pos) 3-arg -> target-specific
16948 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
16949 "LOCATE"
16950 if f.args.len() == 3
16951 && matches!(
16952 target,
16953 DialectType::Presto
16954 | DialectType::Trino
16955 | DialectType::Athena
16956 | DialectType::DuckDB
16957 ) =>
16958 {
16959 let mut args = f.args;
16960 let substr = args.remove(0);
16961 let string = args.remove(0);
16962 let pos = args.remove(0);
16963 // STRPOS(SUBSTRING(string, pos), substr)
16964 let substring_call = Expression::Function(Box::new(Function::new(
16965 "SUBSTRING".to_string(),
16966 vec![string.clone(), pos.clone()],
16967 )));
16968 let strpos_call = Expression::Function(Box::new(Function::new(
16969 "STRPOS".to_string(),
16970 vec![substring_call, substr.clone()],
16971 )));
16972 // STRPOS(...) + pos - 1
16973 let pos_adjusted =
16974 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
16975 Expression::Add(Box::new(
16976 crate::expressions::BinaryOp::new(
16977 strpos_call.clone(),
16978 pos.clone(),
16979 ),
16980 )),
16981 Expression::number(1),
16982 )));
16983 // STRPOS(...) = 0
16984 let is_zero =
16985 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
16986 strpos_call.clone(),
16987 Expression::number(0),
16988 )));
16989
16990 match target {
16991 DialectType::Presto
16992 | DialectType::Trino
16993 | DialectType::Athena => {
16994 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
16995 Ok(Expression::Function(Box::new(Function::new(
16996 "IF".to_string(),
16997 vec![is_zero, Expression::number(0), pos_adjusted],
16998 ))))
16999 }
17000 DialectType::DuckDB => {
17001 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
17002 Ok(Expression::Case(Box::new(crate::expressions::Case {
17003 operand: None,
17004 whens: vec![(is_zero, Expression::number(0))],
17005 else_: Some(pos_adjusted),
17006 comments: Vec::new(),
17007 inferred_type: None,
17008 })))
17009 }
17010 _ => Ok(Expression::Function(Box::new(Function::new(
17011 "LOCATE".to_string(),
17012 vec![substr, string, pos],
17013 )))),
17014 }
17015 }
17016 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
17017 "STRPOS"
17018 if f.args.len() == 3
17019 && matches!(
17020 target,
17021 DialectType::BigQuery
17022 | DialectType::Oracle
17023 | DialectType::Teradata
17024 ) =>
17025 {
17026 let mut args = f.args;
17027 let haystack = args.remove(0);
17028 let needle = args.remove(0);
17029 let occurrence = args.remove(0);
17030 Ok(Expression::Function(Box::new(Function::new(
17031 "INSTR".to_string(),
17032 vec![haystack, needle, Expression::number(1), occurrence],
17033 ))))
17034 }
17035 // SCHEMA_NAME(id) -> target-specific
17036 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
17037 DialectType::MySQL | DialectType::SingleStore => {
17038 Ok(Expression::Function(Box::new(Function::new(
17039 "SCHEMA".to_string(),
17040 vec![],
17041 ))))
17042 }
17043 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
17044 crate::expressions::CurrentSchema { this: None },
17045 ))),
17046 DialectType::SQLite => Ok(Expression::string("main")),
17047 _ => Ok(Expression::Function(f)),
17048 },
17049 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
17050 "STRTOL" if f.args.len() == 2 => match target {
17051 DialectType::Presto | DialectType::Trino => {
17052 Ok(Expression::Function(Box::new(Function::new(
17053 "FROM_BASE".to_string(),
17054 f.args,
17055 ))))
17056 }
17057 _ => Ok(Expression::Function(f)),
17058 },
17059 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
17060 "EDITDIST3" if f.args.len() == 2 => match target {
17061 DialectType::Spark | DialectType::Databricks => {
17062 Ok(Expression::Function(Box::new(Function::new(
17063 "LEVENSHTEIN".to_string(),
17064 f.args,
17065 ))))
17066 }
17067 _ => Ok(Expression::Function(f)),
17068 },
17069 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
17070 "FORMAT"
17071 if f.args.len() == 2
17072 && matches!(
17073 source,
17074 DialectType::MySQL | DialectType::SingleStore
17075 )
17076 && matches!(target, DialectType::DuckDB) =>
17077 {
17078 let mut args = f.args;
17079 let num_expr = args.remove(0);
17080 let decimals_expr = args.remove(0);
17081 // Extract decimal count
17082 let dec_count = match &decimals_expr {
17083 Expression::Literal(lit)
17084 if matches!(lit.as_ref(), Literal::Number(_)) =>
17085 {
17086 let Literal::Number(n) = lit.as_ref() else {
17087 unreachable!()
17088 };
17089 n.clone()
17090 }
17091 _ => "0".to_string(),
17092 };
17093 let fmt_str = format!("{{:,.{}f}}", dec_count);
17094 Ok(Expression::Function(Box::new(Function::new(
17095 "FORMAT".to_string(),
17096 vec![Expression::string(&fmt_str), num_expr],
17097 ))))
17098 }
17099 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
17100 "FORMAT"
17101 if f.args.len() == 2
17102 && matches!(
17103 source,
17104 DialectType::TSQL | DialectType::Fabric
17105 ) =>
17106 {
17107 let val_expr = f.args[0].clone();
17108 let fmt_expr = f.args[1].clone();
17109 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
17110 // Only expand shortcodes that are NOT also valid numeric format specifiers.
17111 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
17112 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
17113 let (expanded_fmt, is_shortcode) = match &fmt_expr {
17114 Expression::Literal(lit)
17115 if matches!(
17116 lit.as_ref(),
17117 crate::expressions::Literal::String(_)
17118 ) =>
17119 {
17120 let crate::expressions::Literal::String(s) = lit.as_ref()
17121 else {
17122 unreachable!()
17123 };
17124 match s.as_str() {
17125 "m" | "M" => (Expression::string("MMMM d"), true),
17126 "t" => (Expression::string("h:mm tt"), true),
17127 "T" => (Expression::string("h:mm:ss tt"), true),
17128 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
17129 _ => (fmt_expr.clone(), false),
17130 }
17131 }
17132 _ => (fmt_expr.clone(), false),
17133 };
17134 // Check if the format looks like a date format
17135 let is_date_format = is_shortcode
17136 || match &expanded_fmt {
17137 Expression::Literal(lit)
17138 if matches!(
17139 lit.as_ref(),
17140 crate::expressions::Literal::String(_)
17141 ) =>
17142 {
17143 let crate::expressions::Literal::String(s) =
17144 lit.as_ref()
17145 else {
17146 unreachable!()
17147 };
17148 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
17149 s.contains("yyyy")
17150 || s.contains("YYYY")
17151 || s.contains("MM")
17152 || s.contains("dd")
17153 || s.contains("MMMM")
17154 || s.contains("HH")
17155 || s.contains("hh")
17156 || s.contains("ss")
17157 }
17158 _ => false,
17159 };
17160 match target {
17161 DialectType::Spark | DialectType::Databricks => {
17162 let func_name = if is_date_format {
17163 "DATE_FORMAT"
17164 } else {
17165 "FORMAT_NUMBER"
17166 };
17167 Ok(Expression::Function(Box::new(Function::new(
17168 func_name.to_string(),
17169 vec![val_expr, expanded_fmt],
17170 ))))
17171 }
17172 _ => {
17173 // For TSQL and other targets, expand shortcodes but keep FORMAT
17174 if is_shortcode {
17175 Ok(Expression::Function(Box::new(Function::new(
17176 "FORMAT".to_string(),
17177 vec![val_expr, expanded_fmt],
17178 ))))
17179 } else {
17180 Ok(Expression::Function(f))
17181 }
17182 }
17183 }
17184 }
17185 // FORMAT('%s', x) from Trino/Presto -> target-specific
17186 "FORMAT"
17187 if f.args.len() >= 2
17188 && matches!(
17189 source,
17190 DialectType::Trino
17191 | DialectType::Presto
17192 | DialectType::Athena
17193 ) =>
17194 {
17195 let fmt_expr = f.args[0].clone();
17196 let value_args: Vec<Expression> = f.args[1..].to_vec();
17197 match target {
17198 // DuckDB: replace %s with {} in format string
17199 DialectType::DuckDB => {
17200 let new_fmt = match &fmt_expr {
17201 Expression::Literal(lit)
17202 if matches!(lit.as_ref(), Literal::String(_)) =>
17203 {
17204 let Literal::String(s) = lit.as_ref() else {
17205 unreachable!()
17206 };
17207 Expression::Literal(Box::new(Literal::String(
17208 s.replace("%s", "{}"),
17209 )))
17210 }
17211 _ => fmt_expr,
17212 };
17213 let mut args = vec![new_fmt];
17214 args.extend(value_args);
17215 Ok(Expression::Function(Box::new(Function::new(
17216 "FORMAT".to_string(),
17217 args,
17218 ))))
17219 }
17220 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
17221 DialectType::Snowflake => match &fmt_expr {
17222 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "%s" && value_args.len() == 1) =>
17223 {
17224 let Literal::String(_) = lit.as_ref() else {
17225 unreachable!()
17226 };
17227 Ok(Expression::Function(Box::new(Function::new(
17228 "TO_CHAR".to_string(),
17229 value_args,
17230 ))))
17231 }
17232 _ => Ok(Expression::Function(f)),
17233 },
17234 // Default: keep FORMAT as-is
17235 _ => Ok(Expression::Function(f)),
17236 }
17237 }
17238 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
17239 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
17240 if f.args.len() == 2 =>
17241 {
17242 // When coming from Snowflake source: ARRAY_CONTAINS(value, array)
17243 // args[0]=value, args[1]=array. For DuckDB target, swap and add NULL-aware CASE.
17244 if matches!(target, DialectType::DuckDB)
17245 && matches!(source, DialectType::Snowflake)
17246 && f.name.eq_ignore_ascii_case("ARRAY_CONTAINS")
17247 {
17248 let value = f.args[0].clone();
17249 let array = f.args[1].clone();
17250
17251 // value IS NULL
17252 let value_is_null =
17253 Expression::IsNull(Box::new(crate::expressions::IsNull {
17254 this: value.clone(),
17255 not: false,
17256 postfix_form: false,
17257 }));
17258
17259 // ARRAY_LENGTH(array)
17260 let array_length =
17261 Expression::Function(Box::new(Function::new(
17262 "ARRAY_LENGTH".to_string(),
17263 vec![array.clone()],
17264 )));
17265 // LIST_COUNT(array)
17266 let list_count = Expression::Function(Box::new(Function::new(
17267 "LIST_COUNT".to_string(),
17268 vec![array.clone()],
17269 )));
17270 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
17271 let neq =
17272 Expression::Neq(Box::new(crate::expressions::BinaryOp {
17273 left: array_length,
17274 right: list_count,
17275 left_comments: vec![],
17276 operator_comments: vec![],
17277 trailing_comments: vec![],
17278 inferred_type: None,
17279 }));
17280 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
17281 let nullif =
17282 Expression::Nullif(Box::new(crate::expressions::Nullif {
17283 this: Box::new(neq),
17284 expression: Box::new(Expression::Boolean(
17285 crate::expressions::BooleanLiteral { value: false },
17286 )),
17287 }));
17288
17289 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
17290 let array_contains =
17291 Expression::Function(Box::new(Function::new(
17292 "ARRAY_CONTAINS".to_string(),
17293 vec![array, value],
17294 )));
17295
17296 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
17297 return Ok(Expression::Case(Box::new(Case {
17298 operand: None,
17299 whens: vec![(value_is_null, nullif)],
17300 else_: Some(array_contains),
17301 comments: Vec::new(),
17302 inferred_type: None,
17303 })));
17304 }
17305 match target {
17306 DialectType::PostgreSQL | DialectType::Redshift => {
17307 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
17308 let arr = f.args[0].clone();
17309 let needle = f.args[1].clone();
17310 // Convert [] to ARRAY[] for PostgreSQL
17311 let pg_arr = match arr {
17312 Expression::Array(a) => Expression::ArrayFunc(
17313 Box::new(crate::expressions::ArrayConstructor {
17314 expressions: a.expressions,
17315 bracket_notation: false,
17316 use_list_keyword: false,
17317 }),
17318 ),
17319 _ => arr,
17320 };
17321 // needle = ANY(arr) using the Any quantified expression
17322 let any_expr = Expression::Any(Box::new(
17323 crate::expressions::QuantifiedExpr {
17324 this: needle.clone(),
17325 subquery: pg_arr,
17326 op: Some(crate::expressions::QuantifiedOp::Eq),
17327 },
17328 ));
17329 let coalesce = Expression::Coalesce(Box::new(
17330 crate::expressions::VarArgFunc {
17331 expressions: vec![
17332 any_expr,
17333 Expression::Boolean(
17334 crate::expressions::BooleanLiteral {
17335 value: false,
17336 },
17337 ),
17338 ],
17339 original_name: None,
17340 inferred_type: None,
17341 },
17342 ));
17343 let is_null_check = Expression::IsNull(Box::new(
17344 crate::expressions::IsNull {
17345 this: needle,
17346 not: false,
17347 postfix_form: false,
17348 },
17349 ));
17350 Ok(Expression::Case(Box::new(Case {
17351 operand: None,
17352 whens: vec![(
17353 is_null_check,
17354 Expression::Null(crate::expressions::Null),
17355 )],
17356 else_: Some(coalesce),
17357 comments: Vec::new(),
17358 inferred_type: None,
17359 })))
17360 }
17361 _ => Ok(Expression::Function(Box::new(Function::new(
17362 "ARRAY_CONTAINS".to_string(),
17363 f.args,
17364 )))),
17365 }
17366 }
17367 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
17368 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
17369 match target {
17370 DialectType::PostgreSQL | DialectType::Redshift => {
17371 // arr1 && arr2 with ARRAY[] syntax
17372 let mut args = f.args;
17373 let arr1 = args.remove(0);
17374 let arr2 = args.remove(0);
17375 let pg_arr1 = match arr1 {
17376 Expression::Array(a) => Expression::ArrayFunc(
17377 Box::new(crate::expressions::ArrayConstructor {
17378 expressions: a.expressions,
17379 bracket_notation: false,
17380 use_list_keyword: false,
17381 }),
17382 ),
17383 _ => arr1,
17384 };
17385 let pg_arr2 = match arr2 {
17386 Expression::Array(a) => Expression::ArrayFunc(
17387 Box::new(crate::expressions::ArrayConstructor {
17388 expressions: a.expressions,
17389 bracket_notation: false,
17390 use_list_keyword: false,
17391 }),
17392 ),
17393 _ => arr2,
17394 };
17395 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
17396 pg_arr1, pg_arr2,
17397 ))))
17398 }
17399 DialectType::DuckDB => {
17400 // DuckDB: arr1 && arr2 (native support)
17401 let mut args = f.args;
17402 let arr1 = args.remove(0);
17403 let arr2 = args.remove(0);
17404 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
17405 arr1, arr2,
17406 ))))
17407 }
17408 _ => Ok(Expression::Function(Box::new(Function::new(
17409 "LIST_HAS_ANY".to_string(),
17410 f.args,
17411 )))),
17412 }
17413 }
17414 // APPROX_QUANTILE(x, q) -> target-specific
17415 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
17416 DialectType::Snowflake => Ok(Expression::Function(Box::new(
17417 Function::new("APPROX_PERCENTILE".to_string(), f.args),
17418 ))),
17419 DialectType::DuckDB => Ok(Expression::Function(f)),
17420 _ => Ok(Expression::Function(f)),
17421 },
17422 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
17423 "MAKE_DATE" if f.args.len() == 3 => match target {
17424 DialectType::BigQuery => Ok(Expression::Function(Box::new(
17425 Function::new("DATE".to_string(), f.args),
17426 ))),
17427 _ => Ok(Expression::Function(f)),
17428 },
17429 // RANGE(start, end[, step]) -> target-specific
17430 "RANGE"
17431 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
17432 {
17433 let start = f.args[0].clone();
17434 let end = f.args[1].clone();
17435 let step = f.args.get(2).cloned();
17436 match target {
17437 // Snowflake ARRAY_GENERATE_RANGE uses exclusive end (same as DuckDB RANGE),
17438 // so just rename without adjusting the end argument.
17439 DialectType::Snowflake => {
17440 let mut args = vec![start, end];
17441 if let Some(s) = step {
17442 args.push(s);
17443 }
17444 Ok(Expression::Function(Box::new(Function::new(
17445 "ARRAY_GENERATE_RANGE".to_string(),
17446 args,
17447 ))))
17448 }
17449 DialectType::Spark | DialectType::Databricks => {
17450 // RANGE(start, end) -> SEQUENCE(start, end-1)
17451 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
17452 // RANGE(start, start) -> ARRAY() (empty)
17453 // RANGE(start, end, 0) -> ARRAY() (empty)
17454 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
17455
17456 // Check for constant args
17457 fn extract_i64(e: &Expression) -> Option<i64> {
17458 match e {
17459 Expression::Literal(lit)
17460 if matches!(
17461 lit.as_ref(),
17462 Literal::Number(_)
17463 ) =>
17464 {
17465 let Literal::Number(n) = lit.as_ref() else {
17466 unreachable!()
17467 };
17468 n.parse::<i64>().ok()
17469 }
17470 Expression::Neg(u) => {
17471 if let Expression::Literal(lit) = &u.this {
17472 if let Literal::Number(n) = lit.as_ref() {
17473 n.parse::<i64>().ok().map(|v| -v)
17474 } else {
17475 None
17476 }
17477 } else {
17478 None
17479 }
17480 }
17481 _ => None,
17482 }
17483 }
17484 let start_val = extract_i64(&start);
17485 let end_val = extract_i64(&end);
17486 let step_val = step.as_ref().and_then(|s| extract_i64(s));
17487
17488 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
17489 if step_val == Some(0) {
17490 return Ok(Expression::Function(Box::new(
17491 Function::new("ARRAY".to_string(), vec![]),
17492 )));
17493 }
17494 if let (Some(s), Some(e_val)) = (start_val, end_val) {
17495 if s == e_val {
17496 return Ok(Expression::Function(Box::new(
17497 Function::new("ARRAY".to_string(), vec![]),
17498 )));
17499 }
17500 }
17501
17502 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
17503 // All constants - compute new end = end - step (if step provided) or end - 1
17504 match step_val {
17505 Some(st) if st < 0 => {
17506 // Negative step: SEQUENCE(start, end - step, step)
17507 let new_end = e_val - st; // end - step (= end + |step|)
17508 let mut args =
17509 vec![start, Expression::number(new_end)];
17510 if let Some(s) = step {
17511 args.push(s);
17512 }
17513 Ok(Expression::Function(Box::new(
17514 Function::new("SEQUENCE".to_string(), args),
17515 )))
17516 }
17517 Some(st) => {
17518 let new_end = e_val - st;
17519 let mut args =
17520 vec![start, Expression::number(new_end)];
17521 if let Some(s) = step {
17522 args.push(s);
17523 }
17524 Ok(Expression::Function(Box::new(
17525 Function::new("SEQUENCE".to_string(), args),
17526 )))
17527 }
17528 None => {
17529 // No step: SEQUENCE(start, end - 1)
17530 let new_end = e_val - 1;
17531 Ok(Expression::Function(Box::new(
17532 Function::new(
17533 "SEQUENCE".to_string(),
17534 vec![
17535 start,
17536 Expression::number(new_end),
17537 ],
17538 ),
17539 )))
17540 }
17541 }
17542 } else {
17543 // Variable end: IF((end - 1) < start, ARRAY(), SEQUENCE(start, (end - 1)))
17544 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
17545 end.clone(),
17546 Expression::number(1),
17547 )));
17548 let cond = Expression::Lt(Box::new(BinaryOp::new(
17549 Expression::Paren(Box::new(Paren {
17550 this: end_m1.clone(),
17551 trailing_comments: Vec::new(),
17552 })),
17553 start.clone(),
17554 )));
17555 let empty = Expression::Function(Box::new(
17556 Function::new("ARRAY".to_string(), vec![]),
17557 ));
17558 let mut seq_args = vec![
17559 start,
17560 Expression::Paren(Box::new(Paren {
17561 this: end_m1,
17562 trailing_comments: Vec::new(),
17563 })),
17564 ];
17565 if let Some(s) = step {
17566 seq_args.push(s);
17567 }
17568 let seq = Expression::Function(Box::new(
17569 Function::new("SEQUENCE".to_string(), seq_args),
17570 ));
17571 Ok(Expression::IfFunc(Box::new(
17572 crate::expressions::IfFunc {
17573 condition: cond,
17574 true_value: empty,
17575 false_value: Some(seq),
17576 original_name: None,
17577 inferred_type: None,
17578 },
17579 )))
17580 }
17581 }
17582 DialectType::SQLite => {
17583 // RANGE(start, end) -> GENERATE_SERIES(start, end)
17584 // The subquery wrapping is handled at the Alias level
17585 let mut args = vec![start, end];
17586 if let Some(s) = step {
17587 args.push(s);
17588 }
17589 Ok(Expression::Function(Box::new(Function::new(
17590 "GENERATE_SERIES".to_string(),
17591 args,
17592 ))))
17593 }
17594 _ => Ok(Expression::Function(f)),
17595 }
17596 }
17597 // ARRAY_REVERSE_SORT -> target-specific
17598 // (handled above as well, but also need DuckDB self-normalization)
17599 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
17600 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
17601 DialectType::Snowflake => Ok(Expression::Function(Box::new(
17602 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
17603 ))),
17604 DialectType::Spark | DialectType::Databricks => {
17605 Ok(Expression::Function(Box::new(Function::new(
17606 "MAP_FROM_ARRAYS".to_string(),
17607 f.args,
17608 ))))
17609 }
17610 _ => Ok(Expression::Function(Box::new(Function::new(
17611 "MAP".to_string(),
17612 f.args,
17613 )))),
17614 },
17615 // VARIANCE(x) -> varSamp(x) for ClickHouse
17616 "VARIANCE" if f.args.len() == 1 => match target {
17617 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
17618 Function::new("varSamp".to_string(), f.args),
17619 ))),
17620 _ => Ok(Expression::Function(f)),
17621 },
17622 // STDDEV(x) -> stddevSamp(x) for ClickHouse
17623 "STDDEV" if f.args.len() == 1 => match target {
17624 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
17625 Function::new("stddevSamp".to_string(), f.args),
17626 ))),
17627 _ => Ok(Expression::Function(f)),
17628 },
17629 // ISINF(x) -> IS_INF(x) for BigQuery
17630 "ISINF" if f.args.len() == 1 => match target {
17631 DialectType::BigQuery => Ok(Expression::Function(Box::new(
17632 Function::new("IS_INF".to_string(), f.args),
17633 ))),
17634 _ => Ok(Expression::Function(f)),
17635 },
17636 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
17637 "CONTAINS" if f.args.len() == 2 => match target {
17638 DialectType::Spark
17639 | DialectType::Databricks
17640 | DialectType::Hive => Ok(Expression::Function(Box::new(
17641 Function::new("ARRAY_CONTAINS".to_string(), f.args),
17642 ))),
17643 _ => Ok(Expression::Function(f)),
17644 },
17645 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
17646 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
17647 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
17648 Ok(Expression::Function(Box::new(Function::new(
17649 "CONTAINS".to_string(),
17650 f.args,
17651 ))))
17652 }
17653 DialectType::DuckDB => Ok(Expression::Function(Box::new(
17654 Function::new("ARRAY_CONTAINS".to_string(), f.args),
17655 ))),
17656 _ => Ok(Expression::Function(f)),
17657 },
17658 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
17659 "TO_UNIXTIME" if f.args.len() == 1 => match target {
17660 DialectType::Hive
17661 | DialectType::Spark
17662 | DialectType::Databricks => Ok(Expression::Function(Box::new(
17663 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
17664 ))),
17665 _ => Ok(Expression::Function(f)),
17666 },
17667 // FROM_UNIXTIME(x) -> target-specific
17668 "FROM_UNIXTIME" if f.args.len() == 1 => {
17669 match target {
17670 DialectType::Hive
17671 | DialectType::Spark
17672 | DialectType::Databricks
17673 | DialectType::Presto
17674 | DialectType::Trino => Ok(Expression::Function(f)),
17675 DialectType::DuckDB => {
17676 // DuckDB: TO_TIMESTAMP(x)
17677 let arg = f.args.into_iter().next().unwrap();
17678 Ok(Expression::Function(Box::new(Function::new(
17679 "TO_TIMESTAMP".to_string(),
17680 vec![arg],
17681 ))))
17682 }
17683 DialectType::PostgreSQL => {
17684 // PG: TO_TIMESTAMP(col)
17685 let arg = f.args.into_iter().next().unwrap();
17686 Ok(Expression::Function(Box::new(Function::new(
17687 "TO_TIMESTAMP".to_string(),
17688 vec![arg],
17689 ))))
17690 }
17691 DialectType::Redshift => {
17692 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
17693 let arg = f.args.into_iter().next().unwrap();
17694 let epoch_ts = Expression::Literal(Box::new(
17695 Literal::Timestamp("epoch".to_string()),
17696 ));
17697 let interval = Expression::Interval(Box::new(
17698 crate::expressions::Interval {
17699 this: Some(Expression::string("1 SECOND")),
17700 unit: None,
17701 },
17702 ));
17703 let mul =
17704 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
17705 let add =
17706 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
17707 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
17708 this: add,
17709 trailing_comments: Vec::new(),
17710 })))
17711 }
17712 _ => Ok(Expression::Function(f)),
17713 }
17714 }
17715 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
17716 "FROM_UNIXTIME"
17717 if f.args.len() == 2
17718 && matches!(
17719 source,
17720 DialectType::Hive
17721 | DialectType::Spark
17722 | DialectType::Databricks
17723 ) =>
17724 {
17725 let mut args = f.args;
17726 let unix_ts = args.remove(0);
17727 let fmt_expr = args.remove(0);
17728 match target {
17729 DialectType::DuckDB => {
17730 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
17731 let to_ts = Expression::Function(Box::new(Function::new(
17732 "TO_TIMESTAMP".to_string(),
17733 vec![unix_ts],
17734 )));
17735 if let Expression::Literal(lit) = &fmt_expr {
17736 if let crate::expressions::Literal::String(s) =
17737 lit.as_ref()
17738 {
17739 let c_fmt = Self::hive_format_to_c_format(s);
17740 Ok(Expression::Function(Box::new(Function::new(
17741 "STRFTIME".to_string(),
17742 vec![to_ts, Expression::string(&c_fmt)],
17743 ))))
17744 } else {
17745 Ok(Expression::Function(Box::new(Function::new(
17746 "STRFTIME".to_string(),
17747 vec![to_ts, fmt_expr],
17748 ))))
17749 }
17750 } else {
17751 Ok(Expression::Function(Box::new(Function::new(
17752 "STRFTIME".to_string(),
17753 vec![to_ts, fmt_expr],
17754 ))))
17755 }
17756 }
17757 DialectType::Presto
17758 | DialectType::Trino
17759 | DialectType::Athena => {
17760 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
17761 let from_unix =
17762 Expression::Function(Box::new(Function::new(
17763 "FROM_UNIXTIME".to_string(),
17764 vec![unix_ts],
17765 )));
17766 if let Expression::Literal(lit) = &fmt_expr {
17767 if let crate::expressions::Literal::String(s) =
17768 lit.as_ref()
17769 {
17770 let p_fmt = Self::hive_format_to_presto_format(s);
17771 Ok(Expression::Function(Box::new(Function::new(
17772 "DATE_FORMAT".to_string(),
17773 vec![from_unix, Expression::string(&p_fmt)],
17774 ))))
17775 } else {
17776 Ok(Expression::Function(Box::new(Function::new(
17777 "DATE_FORMAT".to_string(),
17778 vec![from_unix, fmt_expr],
17779 ))))
17780 }
17781 } else {
17782 Ok(Expression::Function(Box::new(Function::new(
17783 "DATE_FORMAT".to_string(),
17784 vec![from_unix, fmt_expr],
17785 ))))
17786 }
17787 }
17788 _ => {
17789 // Keep as FROM_UNIXTIME(x, fmt) for other targets
17790 Ok(Expression::Function(Box::new(Function::new(
17791 "FROM_UNIXTIME".to_string(),
17792 vec![unix_ts, fmt_expr],
17793 ))))
17794 }
17795 }
17796 }
17797 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
17798 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
17799 let unit_str = Self::get_unit_str_static(&f.args[0]);
17800 // Get the raw unit text preserving original case
17801 let raw_unit = match &f.args[0] {
17802 Expression::Identifier(id) => id.name.clone(),
17803 Expression::Var(v) => v.this.clone(),
17804 Expression::Literal(lit)
17805 if matches!(
17806 lit.as_ref(),
17807 crate::expressions::Literal::String(_)
17808 ) =>
17809 {
17810 let crate::expressions::Literal::String(s) = lit.as_ref()
17811 else {
17812 unreachable!()
17813 };
17814 s.clone()
17815 }
17816 Expression::Column(col) => col.name.name.clone(),
17817 _ => unit_str.clone(),
17818 };
17819 match target {
17820 DialectType::TSQL | DialectType::Fabric => {
17821 // Preserve original case of unit for TSQL
17822 let unit_name = match unit_str.as_str() {
17823 "YY" | "YYYY" => "YEAR".to_string(),
17824 "QQ" | "Q" => "QUARTER".to_string(),
17825 "MM" | "M" => "MONTH".to_string(),
17826 "WK" | "WW" => "WEEK".to_string(),
17827 "DD" | "D" | "DY" => "DAY".to_string(),
17828 "HH" => "HOUR".to_string(),
17829 "MI" | "N" => "MINUTE".to_string(),
17830 "SS" | "S" => "SECOND".to_string(),
17831 _ => raw_unit.clone(), // preserve original case
17832 };
17833 let mut args = f.args;
17834 args[0] =
17835 Expression::Identifier(Identifier::new(&unit_name));
17836 Ok(Expression::Function(Box::new(Function::new(
17837 "DATEPART".to_string(),
17838 args,
17839 ))))
17840 }
17841 DialectType::Spark | DialectType::Databricks => {
17842 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
17843 // Preserve original case for non-abbreviation units
17844 let unit = match unit_str.as_str() {
17845 "YY" | "YYYY" => "YEAR".to_string(),
17846 "QQ" | "Q" => "QUARTER".to_string(),
17847 "MM" | "M" => "MONTH".to_string(),
17848 "WK" | "WW" => "WEEK".to_string(),
17849 "DD" | "D" | "DY" => "DAY".to_string(),
17850 "HH" => "HOUR".to_string(),
17851 "MI" | "N" => "MINUTE".to_string(),
17852 "SS" | "S" => "SECOND".to_string(),
17853 _ => raw_unit, // preserve original case
17854 };
17855 Ok(Expression::Extract(Box::new(
17856 crate::expressions::ExtractFunc {
17857 this: f.args[1].clone(),
17858 field: crate::expressions::DateTimeField::Custom(
17859 unit,
17860 ),
17861 },
17862 )))
17863 }
17864 _ => Ok(Expression::Function(Box::new(Function::new(
17865 "DATE_PART".to_string(),
17866 f.args,
17867 )))),
17868 }
17869 }
17870 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
17871 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
17872 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
17873 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
17874 "DATENAME" if f.args.len() == 2 => {
17875 let unit_str = Self::get_unit_str_static(&f.args[0]);
17876 let date_expr = f.args[1].clone();
17877 match unit_str.as_str() {
17878 "MM" | "M" | "MONTH" => match target {
17879 DialectType::TSQL => {
17880 let cast_date = Expression::Cast(Box::new(
17881 crate::expressions::Cast {
17882 this: date_expr,
17883 to: DataType::Custom {
17884 name: "DATETIME2".to_string(),
17885 },
17886 trailing_comments: Vec::new(),
17887 double_colon_syntax: false,
17888 format: None,
17889 default: None,
17890 inferred_type: None,
17891 },
17892 ));
17893 Ok(Expression::Function(Box::new(Function::new(
17894 "FORMAT".to_string(),
17895 vec![cast_date, Expression::string("MMMM")],
17896 ))))
17897 }
17898 DialectType::Spark | DialectType::Databricks => {
17899 let cast_date = Expression::Cast(Box::new(
17900 crate::expressions::Cast {
17901 this: date_expr,
17902 to: DataType::Timestamp {
17903 timezone: false,
17904 precision: None,
17905 },
17906 trailing_comments: Vec::new(),
17907 double_colon_syntax: false,
17908 format: None,
17909 default: None,
17910 inferred_type: None,
17911 },
17912 ));
17913 Ok(Expression::Function(Box::new(Function::new(
17914 "DATE_FORMAT".to_string(),
17915 vec![cast_date, Expression::string("MMMM")],
17916 ))))
17917 }
17918 _ => Ok(Expression::Function(f)),
17919 },
17920 "DW" | "WEEKDAY" => match target {
17921 DialectType::TSQL => {
17922 let cast_date = Expression::Cast(Box::new(
17923 crate::expressions::Cast {
17924 this: date_expr,
17925 to: DataType::Custom {
17926 name: "DATETIME2".to_string(),
17927 },
17928 trailing_comments: Vec::new(),
17929 double_colon_syntax: false,
17930 format: None,
17931 default: None,
17932 inferred_type: None,
17933 },
17934 ));
17935 Ok(Expression::Function(Box::new(Function::new(
17936 "FORMAT".to_string(),
17937 vec![cast_date, Expression::string("dddd")],
17938 ))))
17939 }
17940 DialectType::Spark | DialectType::Databricks => {
17941 let cast_date = Expression::Cast(Box::new(
17942 crate::expressions::Cast {
17943 this: date_expr,
17944 to: DataType::Timestamp {
17945 timezone: false,
17946 precision: None,
17947 },
17948 trailing_comments: Vec::new(),
17949 double_colon_syntax: false,
17950 format: None,
17951 default: None,
17952 inferred_type: None,
17953 },
17954 ));
17955 Ok(Expression::Function(Box::new(Function::new(
17956 "DATE_FORMAT".to_string(),
17957 vec![cast_date, Expression::string("EEEE")],
17958 ))))
17959 }
17960 _ => Ok(Expression::Function(f)),
17961 },
17962 _ => Ok(Expression::Function(f)),
17963 }
17964 }
17965 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
17966 "STRING_AGG" if f.args.len() >= 2 => {
17967 let x = f.args[0].clone();
17968 let sep = f.args[1].clone();
17969 match target {
17970 DialectType::MySQL
17971 | DialectType::SingleStore
17972 | DialectType::Doris
17973 | DialectType::StarRocks => Ok(Expression::GroupConcat(
17974 Box::new(crate::expressions::GroupConcatFunc {
17975 this: x,
17976 separator: Some(sep),
17977 order_by: None,
17978 distinct: false,
17979 filter: None,
17980 limit: None,
17981 inferred_type: None,
17982 }),
17983 )),
17984 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
17985 crate::expressions::GroupConcatFunc {
17986 this: x,
17987 separator: Some(sep),
17988 order_by: None,
17989 distinct: false,
17990 filter: None,
17991 limit: None,
17992 inferred_type: None,
17993 },
17994 ))),
17995 DialectType::PostgreSQL | DialectType::Redshift => {
17996 Ok(Expression::StringAgg(Box::new(
17997 crate::expressions::StringAggFunc {
17998 this: x,
17999 separator: Some(sep),
18000 order_by: None,
18001 distinct: false,
18002 filter: None,
18003 limit: None,
18004 inferred_type: None,
18005 },
18006 )))
18007 }
18008 _ => Ok(Expression::Function(f)),
18009 }
18010 }
18011 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
18012 "JSON_ARRAYAGG" => match target {
18013 DialectType::PostgreSQL => {
18014 Ok(Expression::Function(Box::new(Function {
18015 name: "JSON_AGG".to_string(),
18016 ..(*f)
18017 })))
18018 }
18019 _ => Ok(Expression::Function(f)),
18020 },
18021 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
18022 "SCHEMA_NAME" => match target {
18023 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
18024 crate::expressions::CurrentSchema { this: None },
18025 ))),
18026 DialectType::SQLite => Ok(Expression::string("main")),
18027 _ => Ok(Expression::Function(f)),
18028 },
18029 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
18030 "TO_TIMESTAMP"
18031 if f.args.len() == 2
18032 && matches!(
18033 source,
18034 DialectType::Spark
18035 | DialectType::Databricks
18036 | DialectType::Hive
18037 )
18038 && matches!(target, DialectType::DuckDB) =>
18039 {
18040 let mut args = f.args;
18041 let val = args.remove(0);
18042 let fmt_expr = args.remove(0);
18043 if let Expression::Literal(ref lit) = fmt_expr {
18044 if let Literal::String(ref s) = lit.as_ref() {
18045 // Convert Java/Spark format to C strptime format
18046 fn java_to_c_fmt(fmt: &str) -> String {
18047 let result = fmt
18048 .replace("yyyy", "%Y")
18049 .replace("SSSSSS", "%f")
18050 .replace("EEEE", "%W")
18051 .replace("MM", "%m")
18052 .replace("dd", "%d")
18053 .replace("HH", "%H")
18054 .replace("mm", "%M")
18055 .replace("ss", "%S")
18056 .replace("yy", "%y");
18057 let mut out = String::new();
18058 let chars: Vec<char> = result.chars().collect();
18059 let mut i = 0;
18060 while i < chars.len() {
18061 if chars[i] == '%' && i + 1 < chars.len() {
18062 out.push(chars[i]);
18063 out.push(chars[i + 1]);
18064 i += 2;
18065 } else if chars[i] == 'z' {
18066 out.push_str("%Z");
18067 i += 1;
18068 } else if chars[i] == 'Z' {
18069 out.push_str("%z");
18070 i += 1;
18071 } else {
18072 out.push(chars[i]);
18073 i += 1;
18074 }
18075 }
18076 out
18077 }
18078 let c_fmt = java_to_c_fmt(s);
18079 Ok(Expression::Function(Box::new(Function::new(
18080 "STRPTIME".to_string(),
18081 vec![val, Expression::string(&c_fmt)],
18082 ))))
18083 } else {
18084 Ok(Expression::Function(Box::new(Function::new(
18085 "STRPTIME".to_string(),
18086 vec![val, fmt_expr],
18087 ))))
18088 }
18089 } else {
18090 Ok(Expression::Function(Box::new(Function::new(
18091 "STRPTIME".to_string(),
18092 vec![val, fmt_expr],
18093 ))))
18094 }
18095 }
18096 // TO_DATE(x) 1-arg from Doris: date conversion
18097 "TO_DATE"
18098 if f.args.len() == 1
18099 && matches!(
18100 source,
18101 DialectType::Doris | DialectType::StarRocks
18102 ) =>
18103 {
18104 let arg = f.args.into_iter().next().unwrap();
18105 match target {
18106 DialectType::Oracle
18107 | DialectType::DuckDB
18108 | DialectType::TSQL => {
18109 // CAST(x AS DATE)
18110 Ok(Expression::Cast(Box::new(Cast {
18111 this: arg,
18112 to: DataType::Date,
18113 double_colon_syntax: false,
18114 trailing_comments: vec![],
18115 format: None,
18116 default: None,
18117 inferred_type: None,
18118 })))
18119 }
18120 DialectType::MySQL | DialectType::SingleStore => {
18121 // DATE(x)
18122 Ok(Expression::Function(Box::new(Function::new(
18123 "DATE".to_string(),
18124 vec![arg],
18125 ))))
18126 }
18127 _ => {
18128 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
18129 Ok(Expression::Function(Box::new(Function::new(
18130 "TO_DATE".to_string(),
18131 vec![arg],
18132 ))))
18133 }
18134 }
18135 }
18136 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
18137 "TO_DATE"
18138 if f.args.len() == 1
18139 && matches!(
18140 source,
18141 DialectType::Spark
18142 | DialectType::Databricks
18143 | DialectType::Hive
18144 ) =>
18145 {
18146 let arg = f.args.into_iter().next().unwrap();
18147 match target {
18148 DialectType::DuckDB => {
18149 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
18150 Ok(Expression::TryCast(Box::new(Cast {
18151 this: arg,
18152 to: DataType::Date,
18153 double_colon_syntax: false,
18154 trailing_comments: vec![],
18155 format: None,
18156 default: None,
18157 inferred_type: None,
18158 })))
18159 }
18160 DialectType::Presto
18161 | DialectType::Trino
18162 | DialectType::Athena => {
18163 // CAST(CAST(x AS TIMESTAMP) AS DATE)
18164 Ok(Self::double_cast_timestamp_date(arg))
18165 }
18166 DialectType::Snowflake => {
18167 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
18168 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
18169 Ok(Expression::Function(Box::new(Function::new(
18170 "TRY_TO_DATE".to_string(),
18171 vec![arg, Expression::string("yyyy-mm-DD")],
18172 ))))
18173 }
18174 _ => {
18175 // Default: keep as TO_DATE(x)
18176 Ok(Expression::Function(Box::new(Function::new(
18177 "TO_DATE".to_string(),
18178 vec![arg],
18179 ))))
18180 }
18181 }
18182 }
18183 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
18184 "TO_DATE"
18185 if f.args.len() == 2
18186 && matches!(
18187 source,
18188 DialectType::Spark
18189 | DialectType::Databricks
18190 | DialectType::Hive
18191 ) =>
18192 {
18193 let mut args = f.args;
18194 let val = args.remove(0);
18195 let fmt_expr = args.remove(0);
18196 let is_default_format = matches!(&fmt_expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "yyyy-MM-dd"));
18197
18198 if is_default_format {
18199 // Default format: same as 1-arg form
18200 match target {
18201 DialectType::DuckDB => {
18202 Ok(Expression::TryCast(Box::new(Cast {
18203 this: val,
18204 to: DataType::Date,
18205 double_colon_syntax: false,
18206 trailing_comments: vec![],
18207 format: None,
18208 default: None,
18209 inferred_type: None,
18210 })))
18211 }
18212 DialectType::Presto
18213 | DialectType::Trino
18214 | DialectType::Athena => {
18215 Ok(Self::double_cast_timestamp_date(val))
18216 }
18217 DialectType::Snowflake => {
18218 // TRY_TO_DATE(x, format) with Snowflake format mapping
18219 let sf_fmt = "yyyy-MM-dd"
18220 .replace("yyyy", "yyyy")
18221 .replace("MM", "mm")
18222 .replace("dd", "DD");
18223 Ok(Expression::Function(Box::new(Function::new(
18224 "TRY_TO_DATE".to_string(),
18225 vec![val, Expression::string(&sf_fmt)],
18226 ))))
18227 }
18228 _ => Ok(Expression::Function(Box::new(Function::new(
18229 "TO_DATE".to_string(),
18230 vec![val],
18231 )))),
18232 }
18233 } else {
18234 // Non-default format: use format-based parsing
18235 if let Expression::Literal(ref lit) = fmt_expr {
18236 if let Literal::String(ref s) = lit.as_ref() {
18237 match target {
18238 DialectType::DuckDB => {
18239 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
18240 fn java_to_c_fmt_todate(fmt: &str) -> String {
18241 let result = fmt
18242 .replace("yyyy", "%Y")
18243 .replace("SSSSSS", "%f")
18244 .replace("EEEE", "%W")
18245 .replace("MM", "%m")
18246 .replace("dd", "%d")
18247 .replace("HH", "%H")
18248 .replace("mm", "%M")
18249 .replace("ss", "%S")
18250 .replace("yy", "%y");
18251 let mut out = String::new();
18252 let chars: Vec<char> =
18253 result.chars().collect();
18254 let mut i = 0;
18255 while i < chars.len() {
18256 if chars[i] == '%'
18257 && i + 1 < chars.len()
18258 {
18259 out.push(chars[i]);
18260 out.push(chars[i + 1]);
18261 i += 2;
18262 } else if chars[i] == 'z' {
18263 out.push_str("%Z");
18264 i += 1;
18265 } else if chars[i] == 'Z' {
18266 out.push_str("%z");
18267 i += 1;
18268 } else {
18269 out.push(chars[i]);
18270 i += 1;
18271 }
18272 }
18273 out
18274 }
18275 let c_fmt = java_to_c_fmt_todate(s);
18276 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
18277 let try_strptime = Expression::Function(
18278 Box::new(Function::new(
18279 "TRY_STRPTIME".to_string(),
18280 vec![val, Expression::string(&c_fmt)],
18281 )),
18282 );
18283 let cast_ts =
18284 Expression::Cast(Box::new(Cast {
18285 this: try_strptime,
18286 to: DataType::Timestamp {
18287 precision: None,
18288 timezone: false,
18289 },
18290 double_colon_syntax: false,
18291 trailing_comments: vec![],
18292 format: None,
18293 default: None,
18294 inferred_type: None,
18295 }));
18296 Ok(Expression::Cast(Box::new(Cast {
18297 this: cast_ts,
18298 to: DataType::Date,
18299 double_colon_syntax: false,
18300 trailing_comments: vec![],
18301 format: None,
18302 default: None,
18303 inferred_type: None,
18304 })))
18305 }
18306 DialectType::Presto
18307 | DialectType::Trino
18308 | DialectType::Athena => {
18309 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
18310 let p_fmt = s
18311 .replace("yyyy", "%Y")
18312 .replace("SSSSSS", "%f")
18313 .replace("MM", "%m")
18314 .replace("dd", "%d")
18315 .replace("HH", "%H")
18316 .replace("mm", "%M")
18317 .replace("ss", "%S")
18318 .replace("yy", "%y");
18319 let date_parse = Expression::Function(
18320 Box::new(Function::new(
18321 "DATE_PARSE".to_string(),
18322 vec![val, Expression::string(&p_fmt)],
18323 )),
18324 );
18325 Ok(Expression::Cast(Box::new(Cast {
18326 this: date_parse,
18327 to: DataType::Date,
18328 double_colon_syntax: false,
18329 trailing_comments: vec![],
18330 format: None,
18331 default: None,
18332 inferred_type: None,
18333 })))
18334 }
18335 DialectType::Snowflake => {
18336 // TRY_TO_DATE(x, snowflake_fmt)
18337 Ok(Expression::Function(Box::new(
18338 Function::new(
18339 "TRY_TO_DATE".to_string(),
18340 vec![val, Expression::string(s)],
18341 ),
18342 )))
18343 }
18344 _ => Ok(Expression::Function(Box::new(
18345 Function::new(
18346 "TO_DATE".to_string(),
18347 vec![val, fmt_expr],
18348 ),
18349 ))),
18350 }
18351 } else {
18352 Ok(Expression::Function(Box::new(Function::new(
18353 "TO_DATE".to_string(),
18354 vec![val, fmt_expr],
18355 ))))
18356 }
18357 } else {
18358 Ok(Expression::Function(Box::new(Function::new(
18359 "TO_DATE".to_string(),
18360 vec![val, fmt_expr],
18361 ))))
18362 }
18363 }
18364 }
18365 // TO_TIMESTAMP(x) 1-arg: epoch conversion
18366 "TO_TIMESTAMP"
18367 if f.args.len() == 1
18368 && matches!(source, DialectType::DuckDB)
18369 && matches!(
18370 target,
18371 DialectType::BigQuery
18372 | DialectType::Presto
18373 | DialectType::Trino
18374 | DialectType::Hive
18375 | DialectType::Spark
18376 | DialectType::Databricks
18377 | DialectType::Athena
18378 ) =>
18379 {
18380 let arg = f.args.into_iter().next().unwrap();
18381 let func_name = match target {
18382 DialectType::BigQuery => "TIMESTAMP_SECONDS",
18383 DialectType::Presto
18384 | DialectType::Trino
18385 | DialectType::Athena
18386 | DialectType::Hive
18387 | DialectType::Spark
18388 | DialectType::Databricks => "FROM_UNIXTIME",
18389 _ => "TO_TIMESTAMP",
18390 };
18391 Ok(Expression::Function(Box::new(Function::new(
18392 func_name.to_string(),
18393 vec![arg],
18394 ))))
18395 }
18396 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
18397 "CONCAT" if f.args.len() == 1 => {
18398 let arg = f.args.into_iter().next().unwrap();
18399 match target {
18400 DialectType::Presto
18401 | DialectType::Trino
18402 | DialectType::Athena => {
18403 // CONCAT(a) -> CAST(a AS VARCHAR)
18404 Ok(Expression::Cast(Box::new(Cast {
18405 this: arg,
18406 to: DataType::VarChar {
18407 length: None,
18408 parenthesized_length: false,
18409 },
18410 trailing_comments: vec![],
18411 double_colon_syntax: false,
18412 format: None,
18413 default: None,
18414 inferred_type: None,
18415 })))
18416 }
18417 DialectType::TSQL => {
18418 // CONCAT(a) -> a
18419 Ok(arg)
18420 }
18421 DialectType::DuckDB => {
18422 // Keep CONCAT(a) for DuckDB (native support)
18423 Ok(Expression::Function(Box::new(Function::new(
18424 "CONCAT".to_string(),
18425 vec![arg],
18426 ))))
18427 }
18428 DialectType::Spark | DialectType::Databricks => {
18429 let coalesced = Expression::Coalesce(Box::new(
18430 crate::expressions::VarArgFunc {
18431 expressions: vec![arg, Expression::string("")],
18432 original_name: None,
18433 inferred_type: None,
18434 },
18435 ));
18436 Ok(Expression::Function(Box::new(Function::new(
18437 "CONCAT".to_string(),
18438 vec![coalesced],
18439 ))))
18440 }
18441 _ => Ok(Expression::Function(Box::new(Function::new(
18442 "CONCAT".to_string(),
18443 vec![arg],
18444 )))),
18445 }
18446 }
18447 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
18448 "REGEXP_EXTRACT"
18449 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
18450 {
18451 // If group_index is 0, drop it
18452 let drop_group = match &f.args[2] {
18453 Expression::Literal(lit)
18454 if matches!(lit.as_ref(), Literal::Number(_)) =>
18455 {
18456 let Literal::Number(n) = lit.as_ref() else {
18457 unreachable!()
18458 };
18459 n == "0"
18460 }
18461 _ => false,
18462 };
18463 if drop_group {
18464 let mut args = f.args;
18465 args.truncate(2);
18466 Ok(Expression::Function(Box::new(Function::new(
18467 "REGEXP_EXTRACT".to_string(),
18468 args,
18469 ))))
18470 } else {
18471 Ok(Expression::Function(f))
18472 }
18473 }
18474 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
18475 "REGEXP_EXTRACT"
18476 if f.args.len() == 4
18477 && matches!(target, DialectType::Snowflake) =>
18478 {
18479 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
18480 let mut args = f.args;
18481 let this = args.remove(0);
18482 let pattern = args.remove(0);
18483 let group = args.remove(0);
18484 let flags = args.remove(0);
18485 Ok(Expression::Function(Box::new(Function::new(
18486 "REGEXP_SUBSTR".to_string(),
18487 vec![
18488 this,
18489 pattern,
18490 Expression::number(1),
18491 Expression::number(1),
18492 flags,
18493 group,
18494 ],
18495 ))))
18496 }
18497 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
18498 "REGEXP_SUBSTR"
18499 if f.args.len() == 3
18500 && matches!(
18501 target,
18502 DialectType::DuckDB
18503 | DialectType::Presto
18504 | DialectType::Trino
18505 | DialectType::Spark
18506 | DialectType::Databricks
18507 ) =>
18508 {
18509 let mut args = f.args;
18510 let this = args.remove(0);
18511 let pattern = args.remove(0);
18512 let position = args.remove(0);
18513 // Wrap subject in SUBSTRING(this, position) to apply the offset
18514 let substring_expr = Expression::Function(Box::new(Function::new(
18515 "SUBSTRING".to_string(),
18516 vec![this, position],
18517 )));
18518 let target_name = match target {
18519 DialectType::DuckDB => "REGEXP_EXTRACT",
18520 _ => "REGEXP_EXTRACT",
18521 };
18522 Ok(Expression::Function(Box::new(Function::new(
18523 target_name.to_string(),
18524 vec![substring_expr, pattern],
18525 ))))
18526 }
18527 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
18528 "TO_DAYS" if f.args.len() == 1 => {
18529 let x = f.args.into_iter().next().unwrap();
18530 let epoch = Expression::string("0000-01-01");
18531 // Build the final target-specific expression directly
18532 let datediff_expr = match target {
18533 DialectType::MySQL | DialectType::SingleStore => {
18534 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
18535 Expression::Function(Box::new(Function::new(
18536 "DATEDIFF".to_string(),
18537 vec![x, epoch],
18538 )))
18539 }
18540 DialectType::DuckDB => {
18541 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
18542 let cast_epoch = Expression::Cast(Box::new(Cast {
18543 this: epoch,
18544 to: DataType::Date,
18545 trailing_comments: Vec::new(),
18546 double_colon_syntax: false,
18547 format: None,
18548 default: None,
18549 inferred_type: None,
18550 }));
18551 let cast_x = Expression::Cast(Box::new(Cast {
18552 this: x,
18553 to: DataType::Date,
18554 trailing_comments: Vec::new(),
18555 double_colon_syntax: false,
18556 format: None,
18557 default: None,
18558 inferred_type: None,
18559 }));
18560 Expression::Function(Box::new(Function::new(
18561 "DATE_DIFF".to_string(),
18562 vec![Expression::string("DAY"), cast_epoch, cast_x],
18563 )))
18564 }
18565 DialectType::Presto
18566 | DialectType::Trino
18567 | DialectType::Athena => {
18568 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
18569 let cast_epoch = Self::double_cast_timestamp_date(epoch);
18570 let cast_x = Self::double_cast_timestamp_date(x);
18571 Expression::Function(Box::new(Function::new(
18572 "DATE_DIFF".to_string(),
18573 vec![Expression::string("DAY"), cast_epoch, cast_x],
18574 )))
18575 }
18576 _ => {
18577 // Default: (DATEDIFF(x, '0000-01-01') + 1)
18578 Expression::Function(Box::new(Function::new(
18579 "DATEDIFF".to_string(),
18580 vec![x, epoch],
18581 )))
18582 }
18583 };
18584 let add_one = Expression::Add(Box::new(BinaryOp::new(
18585 datediff_expr,
18586 Expression::number(1),
18587 )));
18588 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
18589 this: add_one,
18590 trailing_comments: Vec::new(),
18591 })))
18592 }
18593 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
18594 "STR_TO_DATE"
18595 if f.args.len() == 2
18596 && matches!(
18597 target,
18598 DialectType::Presto | DialectType::Trino
18599 ) =>
18600 {
18601 let mut args = f.args;
18602 let x = args.remove(0);
18603 let format_expr = args.remove(0);
18604 // Check if the format contains time components
18605 let has_time = if let Expression::Literal(ref lit) = format_expr {
18606 if let Literal::String(ref fmt) = lit.as_ref() {
18607 fmt.contains("%H")
18608 || fmt.contains("%T")
18609 || fmt.contains("%M")
18610 || fmt.contains("%S")
18611 || fmt.contains("%I")
18612 || fmt.contains("%p")
18613 } else {
18614 false
18615 }
18616 } else {
18617 false
18618 };
18619 let date_parse = Expression::Function(Box::new(Function::new(
18620 "DATE_PARSE".to_string(),
18621 vec![x, format_expr],
18622 )));
18623 if has_time {
18624 // Has time components: just DATE_PARSE
18625 Ok(date_parse)
18626 } else {
18627 // Date-only: CAST(DATE_PARSE(...) AS DATE)
18628 Ok(Expression::Cast(Box::new(Cast {
18629 this: date_parse,
18630 to: DataType::Date,
18631 trailing_comments: Vec::new(),
18632 double_colon_syntax: false,
18633 format: None,
18634 default: None,
18635 inferred_type: None,
18636 })))
18637 }
18638 }
18639 "STR_TO_DATE"
18640 if f.args.len() == 2
18641 && matches!(
18642 target,
18643 DialectType::PostgreSQL | DialectType::Redshift
18644 ) =>
18645 {
18646 let mut args = f.args;
18647 let x = args.remove(0);
18648 let fmt = args.remove(0);
18649 let pg_fmt = match fmt {
18650 Expression::Literal(lit)
18651 if matches!(lit.as_ref(), Literal::String(_)) =>
18652 {
18653 let Literal::String(s) = lit.as_ref() else {
18654 unreachable!()
18655 };
18656 Expression::string(
18657 &s.replace("%Y", "YYYY")
18658 .replace("%m", "MM")
18659 .replace("%d", "DD")
18660 .replace("%H", "HH24")
18661 .replace("%M", "MI")
18662 .replace("%S", "SS"),
18663 )
18664 }
18665 other => other,
18666 };
18667 let to_date = Expression::Function(Box::new(Function::new(
18668 "TO_DATE".to_string(),
18669 vec![x, pg_fmt],
18670 )));
18671 Ok(Expression::Cast(Box::new(Cast {
18672 this: to_date,
18673 to: DataType::Timestamp {
18674 timezone: false,
18675 precision: None,
18676 },
18677 trailing_comments: Vec::new(),
18678 double_colon_syntax: false,
18679 format: None,
18680 default: None,
18681 inferred_type: None,
18682 })))
18683 }
18684 // RANGE(start, end) -> GENERATE_SERIES for SQLite
18685 "RANGE"
18686 if (f.args.len() == 1 || f.args.len() == 2)
18687 && matches!(target, DialectType::SQLite) =>
18688 {
18689 if f.args.len() == 2 {
18690 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
18691 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
18692 let mut args = f.args;
18693 let start = args.remove(0);
18694 let end = args.remove(0);
18695 Ok(Expression::Function(Box::new(Function::new(
18696 "GENERATE_SERIES".to_string(),
18697 vec![start, end],
18698 ))))
18699 } else {
18700 Ok(Expression::Function(f))
18701 }
18702 }
18703 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
18704 // When source is Snowflake, keep as-is (args already in correct form)
18705 "UNIFORM"
18706 if matches!(target, DialectType::Snowflake)
18707 && (f.args.len() == 2 || f.args.len() == 3) =>
18708 {
18709 if matches!(source, DialectType::Snowflake) {
18710 // Snowflake -> Snowflake: keep as-is
18711 Ok(Expression::Function(f))
18712 } else {
18713 let mut args = f.args;
18714 let low = args.remove(0);
18715 let high = args.remove(0);
18716 let random = if !args.is_empty() {
18717 let seed = args.remove(0);
18718 Expression::Function(Box::new(Function::new(
18719 "RANDOM".to_string(),
18720 vec![seed],
18721 )))
18722 } else {
18723 Expression::Function(Box::new(Function::new(
18724 "RANDOM".to_string(),
18725 vec![],
18726 )))
18727 };
18728 Ok(Expression::Function(Box::new(Function::new(
18729 "UNIFORM".to_string(),
18730 vec![low, high, random],
18731 ))))
18732 }
18733 }
18734 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
18735 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
18736 let mut args = f.args;
18737 let ts_arg = args.remove(0);
18738 let tz_arg = args.remove(0);
18739 // Cast string literal to TIMESTAMP for all targets
18740 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
18741 {
18742 Expression::Cast(Box::new(Cast {
18743 this: ts_arg,
18744 to: DataType::Timestamp {
18745 timezone: false,
18746 precision: None,
18747 },
18748 trailing_comments: vec![],
18749 double_colon_syntax: false,
18750 format: None,
18751 default: None,
18752 inferred_type: None,
18753 }))
18754 } else {
18755 ts_arg
18756 };
18757 match target {
18758 DialectType::Spark | DialectType::Databricks => {
18759 Ok(Expression::Function(Box::new(Function::new(
18760 "TO_UTC_TIMESTAMP".to_string(),
18761 vec![ts_cast, tz_arg],
18762 ))))
18763 }
18764 DialectType::Snowflake => {
18765 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
18766 Ok(Expression::Function(Box::new(Function::new(
18767 "CONVERT_TIMEZONE".to_string(),
18768 vec![tz_arg, Expression::string("UTC"), ts_cast],
18769 ))))
18770 }
18771 DialectType::Presto
18772 | DialectType::Trino
18773 | DialectType::Athena => {
18774 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
18775 let wtz = Expression::Function(Box::new(Function::new(
18776 "WITH_TIMEZONE".to_string(),
18777 vec![ts_cast, tz_arg],
18778 )));
18779 Ok(Expression::AtTimeZone(Box::new(
18780 crate::expressions::AtTimeZone {
18781 this: wtz,
18782 zone: Expression::string("UTC"),
18783 },
18784 )))
18785 }
18786 DialectType::BigQuery => {
18787 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
18788 let cast_dt = Expression::Cast(Box::new(Cast {
18789 this: if let Expression::Cast(c) = ts_cast {
18790 c.this
18791 } else {
18792 ts_cast.clone()
18793 },
18794 to: DataType::Custom {
18795 name: "DATETIME".to_string(),
18796 },
18797 trailing_comments: vec![],
18798 double_colon_syntax: false,
18799 format: None,
18800 default: None,
18801 inferred_type: None,
18802 }));
18803 let ts_func =
18804 Expression::Function(Box::new(Function::new(
18805 "TIMESTAMP".to_string(),
18806 vec![cast_dt, tz_arg],
18807 )));
18808 Ok(Expression::Function(Box::new(Function::new(
18809 "DATETIME".to_string(),
18810 vec![ts_func, Expression::string("UTC")],
18811 ))))
18812 }
18813 _ => {
18814 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
18815 let atz1 = Expression::AtTimeZone(Box::new(
18816 crate::expressions::AtTimeZone {
18817 this: ts_cast,
18818 zone: tz_arg,
18819 },
18820 ));
18821 Ok(Expression::AtTimeZone(Box::new(
18822 crate::expressions::AtTimeZone {
18823 this: atz1,
18824 zone: Expression::string("UTC"),
18825 },
18826 )))
18827 }
18828 }
18829 }
18830 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
18831 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
18832 let mut args = f.args;
18833 let ts_arg = args.remove(0);
18834 let tz_arg = args.remove(0);
18835 // Cast string literal to TIMESTAMP
18836 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
18837 {
18838 Expression::Cast(Box::new(Cast {
18839 this: ts_arg,
18840 to: DataType::Timestamp {
18841 timezone: false,
18842 precision: None,
18843 },
18844 trailing_comments: vec![],
18845 double_colon_syntax: false,
18846 format: None,
18847 default: None,
18848 inferred_type: None,
18849 }))
18850 } else {
18851 ts_arg
18852 };
18853 match target {
18854 DialectType::Spark | DialectType::Databricks => {
18855 Ok(Expression::Function(Box::new(Function::new(
18856 "FROM_UTC_TIMESTAMP".to_string(),
18857 vec![ts_cast, tz_arg],
18858 ))))
18859 }
18860 DialectType::Presto
18861 | DialectType::Trino
18862 | DialectType::Athena => {
18863 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
18864 Ok(Expression::Function(Box::new(Function::new(
18865 "AT_TIMEZONE".to_string(),
18866 vec![ts_cast, tz_arg],
18867 ))))
18868 }
18869 DialectType::Snowflake => {
18870 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
18871 Ok(Expression::Function(Box::new(Function::new(
18872 "CONVERT_TIMEZONE".to_string(),
18873 vec![Expression::string("UTC"), tz_arg, ts_cast],
18874 ))))
18875 }
18876 _ => {
18877 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
18878 Ok(Expression::AtTimeZone(Box::new(
18879 crate::expressions::AtTimeZone {
18880 this: ts_cast,
18881 zone: tz_arg,
18882 },
18883 )))
18884 }
18885 }
18886 }
18887 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
18888 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
18889 let name = match target {
18890 DialectType::Snowflake => "OBJECT_CONSTRUCT",
18891 _ => "MAP",
18892 };
18893 Ok(Expression::Function(Box::new(Function::new(
18894 name.to_string(),
18895 f.args,
18896 ))))
18897 }
18898 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
18899 "STR_TO_MAP" if f.args.len() >= 1 => match target {
18900 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18901 Ok(Expression::Function(Box::new(Function::new(
18902 "SPLIT_TO_MAP".to_string(),
18903 f.args,
18904 ))))
18905 }
18906 _ => Ok(Expression::Function(f)),
18907 },
18908 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
18909 "TIME_TO_STR" if f.args.len() == 2 => {
18910 let mut args = f.args;
18911 let this = args.remove(0);
18912 let fmt_expr = args.remove(0);
18913 let format = if let Expression::Literal(lit) = fmt_expr {
18914 if let Literal::String(s) = lit.as_ref() {
18915 s.clone()
18916 } else {
18917 String::new()
18918 }
18919 } else {
18920 "%Y-%m-%d %H:%M:%S".to_string()
18921 };
18922 Ok(Expression::TimeToStr(Box::new(
18923 crate::expressions::TimeToStr {
18924 this: Box::new(this),
18925 format,
18926 culture: None,
18927 zone: None,
18928 },
18929 )))
18930 }
18931 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
18932 "STR_TO_TIME" if f.args.len() == 2 => {
18933 let mut args = f.args;
18934 let this = args.remove(0);
18935 let fmt_expr = args.remove(0);
18936 let format = if let Expression::Literal(lit) = fmt_expr {
18937 if let Literal::String(s) = lit.as_ref() {
18938 s.clone()
18939 } else {
18940 String::new()
18941 }
18942 } else {
18943 "%Y-%m-%d %H:%M:%S".to_string()
18944 };
18945 Ok(Expression::StrToTime(Box::new(
18946 crate::expressions::StrToTime {
18947 this: Box::new(this),
18948 format,
18949 zone: None,
18950 safe: None,
18951 target_type: None,
18952 },
18953 )))
18954 }
18955 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
18956 "STR_TO_UNIX" if f.args.len() >= 1 => {
18957 let mut args = f.args;
18958 let this = args.remove(0);
18959 let format = if !args.is_empty() {
18960 if let Expression::Literal(lit) = args.remove(0) {
18961 if let Literal::String(s) = lit.as_ref() {
18962 Some(s.clone())
18963 } else {
18964 None
18965 }
18966 } else {
18967 None
18968 }
18969 } else {
18970 None
18971 };
18972 Ok(Expression::StrToUnix(Box::new(
18973 crate::expressions::StrToUnix {
18974 this: Some(Box::new(this)),
18975 format,
18976 },
18977 )))
18978 }
18979 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
18980 "TIME_TO_UNIX" if f.args.len() == 1 => {
18981 let mut args = f.args;
18982 let this = args.remove(0);
18983 Ok(Expression::TimeToUnix(Box::new(
18984 crate::expressions::UnaryFunc {
18985 this,
18986 original_name: None,
18987 inferred_type: None,
18988 },
18989 )))
18990 }
18991 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
18992 "UNIX_TO_STR" if f.args.len() >= 1 => {
18993 let mut args = f.args;
18994 let this = args.remove(0);
18995 let format = if !args.is_empty() {
18996 if let Expression::Literal(lit) = args.remove(0) {
18997 if let Literal::String(s) = lit.as_ref() {
18998 Some(s.clone())
18999 } else {
19000 None
19001 }
19002 } else {
19003 None
19004 }
19005 } else {
19006 None
19007 };
19008 Ok(Expression::UnixToStr(Box::new(
19009 crate::expressions::UnixToStr {
19010 this: Box::new(this),
19011 format,
19012 },
19013 )))
19014 }
19015 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
19016 "UNIX_TO_TIME" if f.args.len() == 1 => {
19017 let mut args = f.args;
19018 let this = args.remove(0);
19019 Ok(Expression::UnixToTime(Box::new(
19020 crate::expressions::UnixToTime {
19021 this: Box::new(this),
19022 scale: None,
19023 zone: None,
19024 hours: None,
19025 minutes: None,
19026 format: None,
19027 target_type: None,
19028 },
19029 )))
19030 }
19031 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
19032 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
19033 let mut args = f.args;
19034 let this = args.remove(0);
19035 Ok(Expression::TimeStrToDate(Box::new(
19036 crate::expressions::UnaryFunc {
19037 this,
19038 original_name: None,
19039 inferred_type: None,
19040 },
19041 )))
19042 }
19043 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
19044 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
19045 let mut args = f.args;
19046 let this = args.remove(0);
19047 Ok(Expression::TimeStrToTime(Box::new(
19048 crate::expressions::TimeStrToTime {
19049 this: Box::new(this),
19050 zone: None,
19051 },
19052 )))
19053 }
19054 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
19055 "MONTHS_BETWEEN" if f.args.len() == 2 => {
19056 match target {
19057 DialectType::DuckDB => {
19058 let mut args = f.args;
19059 let end_date = args.remove(0);
19060 let start_date = args.remove(0);
19061 let cast_end = Self::ensure_cast_date(end_date);
19062 let cast_start = Self::ensure_cast_date(start_date);
19063 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
19064 let dd = Expression::Function(Box::new(Function::new(
19065 "DATE_DIFF".to_string(),
19066 vec![
19067 Expression::string("MONTH"),
19068 cast_start.clone(),
19069 cast_end.clone(),
19070 ],
19071 )));
19072 let day_end =
19073 Expression::Function(Box::new(Function::new(
19074 "DAY".to_string(),
19075 vec![cast_end.clone()],
19076 )));
19077 let day_start =
19078 Expression::Function(Box::new(Function::new(
19079 "DAY".to_string(),
19080 vec![cast_start.clone()],
19081 )));
19082 let last_day_end =
19083 Expression::Function(Box::new(Function::new(
19084 "LAST_DAY".to_string(),
19085 vec![cast_end.clone()],
19086 )));
19087 let last_day_start =
19088 Expression::Function(Box::new(Function::new(
19089 "LAST_DAY".to_string(),
19090 vec![cast_start.clone()],
19091 )));
19092 let day_last_end = Expression::Function(Box::new(
19093 Function::new("DAY".to_string(), vec![last_day_end]),
19094 ));
19095 let day_last_start = Expression::Function(Box::new(
19096 Function::new("DAY".to_string(), vec![last_day_start]),
19097 ));
19098 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
19099 day_end.clone(),
19100 day_last_end,
19101 )));
19102 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
19103 day_start.clone(),
19104 day_last_start,
19105 )));
19106 let both_cond =
19107 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
19108 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
19109 day_end, day_start,
19110 )));
19111 let day_diff_paren = Expression::Paren(Box::new(
19112 crate::expressions::Paren {
19113 this: day_diff,
19114 trailing_comments: Vec::new(),
19115 },
19116 ));
19117 let frac = Expression::Div(Box::new(BinaryOp::new(
19118 day_diff_paren,
19119 Expression::Literal(Box::new(Literal::Number(
19120 "31.0".to_string(),
19121 ))),
19122 )));
19123 let case_expr = Expression::Case(Box::new(Case {
19124 operand: None,
19125 whens: vec![(both_cond, Expression::number(0))],
19126 else_: Some(frac),
19127 comments: Vec::new(),
19128 inferred_type: None,
19129 }));
19130 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
19131 }
19132 DialectType::Snowflake | DialectType::Redshift => {
19133 let mut args = f.args;
19134 let end_date = args.remove(0);
19135 let start_date = args.remove(0);
19136 let unit = Expression::Identifier(Identifier::new("MONTH"));
19137 Ok(Expression::Function(Box::new(Function::new(
19138 "DATEDIFF".to_string(),
19139 vec![unit, start_date, end_date],
19140 ))))
19141 }
19142 DialectType::Presto
19143 | DialectType::Trino
19144 | DialectType::Athena => {
19145 let mut args = f.args;
19146 let end_date = args.remove(0);
19147 let start_date = args.remove(0);
19148 Ok(Expression::Function(Box::new(Function::new(
19149 "DATE_DIFF".to_string(),
19150 vec![Expression::string("MONTH"), start_date, end_date],
19151 ))))
19152 }
19153 _ => Ok(Expression::Function(f)),
19154 }
19155 }
19156 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
19157 // Drop the roundOff arg for non-Spark targets, keep it for Spark
19158 "MONTHS_BETWEEN" if f.args.len() == 3 => {
19159 match target {
19160 DialectType::Spark | DialectType::Databricks => {
19161 Ok(Expression::Function(f))
19162 }
19163 _ => {
19164 // Drop the 3rd arg and delegate to the 2-arg logic
19165 let mut args = f.args;
19166 let end_date = args.remove(0);
19167 let start_date = args.remove(0);
19168 // Re-create as 2-arg and process
19169 let f2 = Function::new(
19170 "MONTHS_BETWEEN".to_string(),
19171 vec![end_date, start_date],
19172 );
19173 let e2 = Expression::Function(Box::new(f2));
19174 Self::cross_dialect_normalize(e2, source, target)
19175 }
19176 }
19177 }
19178 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
19179 "TO_TIMESTAMP"
19180 if f.args.len() == 1
19181 && matches!(
19182 source,
19183 DialectType::Spark
19184 | DialectType::Databricks
19185 | DialectType::Hive
19186 ) =>
19187 {
19188 let arg = f.args.into_iter().next().unwrap();
19189 Ok(Expression::Cast(Box::new(Cast {
19190 this: arg,
19191 to: DataType::Timestamp {
19192 timezone: false,
19193 precision: None,
19194 },
19195 trailing_comments: vec![],
19196 double_colon_syntax: false,
19197 format: None,
19198 default: None,
19199 inferred_type: None,
19200 })))
19201 }
19202 // STRING(x) -> CAST(x AS STRING) for Spark target
19203 "STRING"
19204 if f.args.len() == 1
19205 && matches!(
19206 source,
19207 DialectType::Spark | DialectType::Databricks
19208 ) =>
19209 {
19210 let arg = f.args.into_iter().next().unwrap();
19211 let dt = match target {
19212 DialectType::Spark
19213 | DialectType::Databricks
19214 | DialectType::Hive => DataType::Custom {
19215 name: "STRING".to_string(),
19216 },
19217 _ => DataType::Text,
19218 };
19219 Ok(Expression::Cast(Box::new(Cast {
19220 this: arg,
19221 to: dt,
19222 trailing_comments: vec![],
19223 double_colon_syntax: false,
19224 format: None,
19225 default: None,
19226 inferred_type: None,
19227 })))
19228 }
19229 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
19230 "LOGICAL_OR" if f.args.len() == 1 => {
19231 let name = match target {
19232 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
19233 _ => "LOGICAL_OR",
19234 };
19235 Ok(Expression::Function(Box::new(Function::new(
19236 name.to_string(),
19237 f.args,
19238 ))))
19239 }
19240 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
19241 "SPLIT"
19242 if f.args.len() == 2
19243 && matches!(
19244 source,
19245 DialectType::Spark
19246 | DialectType::Databricks
19247 | DialectType::Hive
19248 ) =>
19249 {
19250 let name = match target {
19251 DialectType::DuckDB => "STR_SPLIT_REGEX",
19252 DialectType::Presto
19253 | DialectType::Trino
19254 | DialectType::Athena => "REGEXP_SPLIT",
19255 DialectType::Spark
19256 | DialectType::Databricks
19257 | DialectType::Hive => "SPLIT",
19258 _ => "SPLIT",
19259 };
19260 Ok(Expression::Function(Box::new(Function::new(
19261 name.to_string(),
19262 f.args,
19263 ))))
19264 }
19265 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
19266 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
19267 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
19268 Ok(Expression::Function(Box::new(Function::new(
19269 "ELEMENT_AT".to_string(),
19270 f.args,
19271 ))))
19272 }
19273 DialectType::DuckDB => {
19274 let mut args = f.args;
19275 let arr = args.remove(0);
19276 let idx = args.remove(0);
19277 Ok(Expression::Subscript(Box::new(
19278 crate::expressions::Subscript {
19279 this: arr,
19280 index: idx,
19281 },
19282 )))
19283 }
19284 _ => Ok(Expression::Function(f)),
19285 },
19286 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
19287 "ARRAY_FILTER" if f.args.len() == 2 => {
19288 let name = match target {
19289 DialectType::DuckDB => "LIST_FILTER",
19290 DialectType::StarRocks => "ARRAY_FILTER",
19291 _ => "FILTER",
19292 };
19293 Ok(Expression::Function(Box::new(Function::new(
19294 name.to_string(),
19295 f.args,
19296 ))))
19297 }
19298 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
19299 "FILTER" if f.args.len() == 2 => {
19300 let name = match target {
19301 DialectType::DuckDB => "LIST_FILTER",
19302 DialectType::StarRocks => "ARRAY_FILTER",
19303 _ => "FILTER",
19304 };
19305 Ok(Expression::Function(Box::new(Function::new(
19306 name.to_string(),
19307 f.args,
19308 ))))
19309 }
19310 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
19311 "REDUCE" if f.args.len() >= 3 => {
19312 let name = match target {
19313 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
19314 _ => "REDUCE",
19315 };
19316 Ok(Expression::Function(Box::new(Function::new(
19317 name.to_string(),
19318 f.args,
19319 ))))
19320 }
19321 // CURRENT_SCHEMA() -> dialect-specific
19322 "CURRENT_SCHEMA" => {
19323 match target {
19324 DialectType::PostgreSQL => {
19325 // PostgreSQL: CURRENT_SCHEMA (no parens)
19326 Ok(Expression::Function(Box::new(Function {
19327 name: "CURRENT_SCHEMA".to_string(),
19328 args: vec![],
19329 distinct: false,
19330 trailing_comments: vec![],
19331 use_bracket_syntax: false,
19332 no_parens: true,
19333 quoted: false,
19334 span: None,
19335 inferred_type: None,
19336 })))
19337 }
19338 DialectType::MySQL
19339 | DialectType::Doris
19340 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
19341 Function::new("SCHEMA".to_string(), vec![]),
19342 ))),
19343 DialectType::TSQL => Ok(Expression::Function(Box::new(
19344 Function::new("SCHEMA_NAME".to_string(), vec![]),
19345 ))),
19346 DialectType::SQLite => Ok(Expression::Literal(Box::new(
19347 Literal::String("main".to_string()),
19348 ))),
19349 _ => Ok(Expression::Function(f)),
19350 }
19351 }
19352 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
19353 "LTRIM" if f.args.len() == 2 => match target {
19354 DialectType::Spark
19355 | DialectType::Hive
19356 | DialectType::Databricks
19357 | DialectType::ClickHouse => {
19358 let mut args = f.args;
19359 let str_expr = args.remove(0);
19360 let chars = args.remove(0);
19361 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
19362 this: str_expr,
19363 characters: Some(chars),
19364 position: crate::expressions::TrimPosition::Leading,
19365 sql_standard_syntax: true,
19366 position_explicit: true,
19367 })))
19368 }
19369 _ => Ok(Expression::Function(f)),
19370 },
19371 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
19372 "RTRIM" if f.args.len() == 2 => match target {
19373 DialectType::Spark
19374 | DialectType::Hive
19375 | DialectType::Databricks
19376 | DialectType::ClickHouse => {
19377 let mut args = f.args;
19378 let str_expr = args.remove(0);
19379 let chars = args.remove(0);
19380 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
19381 this: str_expr,
19382 characters: Some(chars),
19383 position: crate::expressions::TrimPosition::Trailing,
19384 sql_standard_syntax: true,
19385 position_explicit: true,
19386 })))
19387 }
19388 _ => Ok(Expression::Function(f)),
19389 },
19390 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
19391 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
19392 DialectType::ClickHouse => {
19393 let mut new_f = *f;
19394 new_f.name = "arrayReverse".to_string();
19395 Ok(Expression::Function(Box::new(new_f)))
19396 }
19397 _ => Ok(Expression::Function(f)),
19398 },
19399 // UUID() -> NEWID() for TSQL
19400 "UUID" if f.args.is_empty() => match target {
19401 DialectType::TSQL | DialectType::Fabric => {
19402 Ok(Expression::Function(Box::new(Function::new(
19403 "NEWID".to_string(),
19404 vec![],
19405 ))))
19406 }
19407 _ => Ok(Expression::Function(f)),
19408 },
19409 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
19410 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
19411 DialectType::ClickHouse => {
19412 let mut new_f = *f;
19413 new_f.name = "farmFingerprint64".to_string();
19414 Ok(Expression::Function(Box::new(new_f)))
19415 }
19416 DialectType::Redshift => {
19417 let mut new_f = *f;
19418 new_f.name = "FARMFINGERPRINT64".to_string();
19419 Ok(Expression::Function(Box::new(new_f)))
19420 }
19421 _ => Ok(Expression::Function(f)),
19422 },
19423 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
19424 "JSON_KEYS" => match target {
19425 DialectType::Databricks | DialectType::Spark => {
19426 let mut new_f = *f;
19427 new_f.name = "JSON_OBJECT_KEYS".to_string();
19428 Ok(Expression::Function(Box::new(new_f)))
19429 }
19430 DialectType::Snowflake => {
19431 let mut new_f = *f;
19432 new_f.name = "OBJECT_KEYS".to_string();
19433 Ok(Expression::Function(Box::new(new_f)))
19434 }
19435 _ => Ok(Expression::Function(f)),
19436 },
19437 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
19438 "WEEKOFYEAR" => match target {
19439 DialectType::Snowflake => {
19440 let mut new_f = *f;
19441 new_f.name = "WEEKISO".to_string();
19442 Ok(Expression::Function(Box::new(new_f)))
19443 }
19444 _ => Ok(Expression::Function(f)),
19445 },
19446 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
19447 "FORMAT"
19448 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
19449 {
19450 match target {
19451 DialectType::Databricks | DialectType::Spark => {
19452 let mut new_f = *f;
19453 new_f.name = "FORMAT_STRING".to_string();
19454 Ok(Expression::Function(Box::new(new_f)))
19455 }
19456 _ => Ok(Expression::Function(f)),
19457 }
19458 }
19459 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
19460 "CONCAT_WS" if f.args.len() >= 2 => match target {
19461 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
19462 let mut args = f.args;
19463 let sep = args.remove(0);
19464 let cast_args: Vec<Expression> = args
19465 .into_iter()
19466 .map(|a| {
19467 Expression::Cast(Box::new(Cast {
19468 this: a,
19469 to: DataType::VarChar {
19470 length: None,
19471 parenthesized_length: false,
19472 },
19473 double_colon_syntax: false,
19474 trailing_comments: Vec::new(),
19475 format: None,
19476 default: None,
19477 inferred_type: None,
19478 }))
19479 })
19480 .collect();
19481 let mut new_args = vec![sep];
19482 new_args.extend(cast_args);
19483 Ok(Expression::Function(Box::new(Function::new(
19484 "CONCAT_WS".to_string(),
19485 new_args,
19486 ))))
19487 }
19488 _ => Ok(Expression::Function(f)),
19489 },
19490 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
19491 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
19492 DialectType::DuckDB
19493 if f.args.len() == 3
19494 && matches!(source, DialectType::Snowflake) =>
19495 {
19496 // Snowflake ARRAY_SLICE (0-indexed, exclusive end)
19497 // -> DuckDB ARRAY_SLICE (1-indexed, inclusive end)
19498 let mut args = f.args;
19499 let arr = args.remove(0);
19500 let start = args.remove(0);
19501 let end = args.remove(0);
19502
19503 // CASE WHEN start >= 0 THEN start + 1 ELSE start END
19504 let adjusted_start = Expression::Case(Box::new(Case {
19505 operand: None,
19506 whens: vec![(
19507 Expression::Gte(Box::new(BinaryOp {
19508 left: start.clone(),
19509 right: Expression::number(0),
19510 left_comments: vec![],
19511 operator_comments: vec![],
19512 trailing_comments: vec![],
19513 inferred_type: None,
19514 })),
19515 Expression::Add(Box::new(BinaryOp {
19516 left: start.clone(),
19517 right: Expression::number(1),
19518 left_comments: vec![],
19519 operator_comments: vec![],
19520 trailing_comments: vec![],
19521 inferred_type: None,
19522 })),
19523 )],
19524 else_: Some(start),
19525 comments: vec![],
19526 inferred_type: None,
19527 }));
19528
19529 // CASE WHEN end < 0 THEN end - 1 ELSE end END
19530 let adjusted_end = Expression::Case(Box::new(Case {
19531 operand: None,
19532 whens: vec![(
19533 Expression::Lt(Box::new(BinaryOp {
19534 left: end.clone(),
19535 right: Expression::number(0),
19536 left_comments: vec![],
19537 operator_comments: vec![],
19538 trailing_comments: vec![],
19539 inferred_type: None,
19540 })),
19541 Expression::Sub(Box::new(BinaryOp {
19542 left: end.clone(),
19543 right: Expression::number(1),
19544 left_comments: vec![],
19545 operator_comments: vec![],
19546 trailing_comments: vec![],
19547 inferred_type: None,
19548 })),
19549 )],
19550 else_: Some(end),
19551 comments: vec![],
19552 inferred_type: None,
19553 }));
19554
19555 Ok(Expression::Function(Box::new(Function::new(
19556 "ARRAY_SLICE".to_string(),
19557 vec![arr, adjusted_start, adjusted_end],
19558 ))))
19559 }
19560 DialectType::Presto
19561 | DialectType::Trino
19562 | DialectType::Athena
19563 | DialectType::Databricks
19564 | DialectType::Spark => {
19565 let mut new_f = *f;
19566 new_f.name = "SLICE".to_string();
19567 Ok(Expression::Function(Box::new(new_f)))
19568 }
19569 DialectType::ClickHouse => {
19570 let mut new_f = *f;
19571 new_f.name = "arraySlice".to_string();
19572 Ok(Expression::Function(Box::new(new_f)))
19573 }
19574 _ => Ok(Expression::Function(f)),
19575 },
19576 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
19577 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
19578 DialectType::DuckDB => {
19579 let mut args = f.args;
19580 let arr = args.remove(0);
19581 let val = args.remove(0);
19582 Ok(Expression::Function(Box::new(Function::new(
19583 "LIST_PREPEND".to_string(),
19584 vec![val, arr],
19585 ))))
19586 }
19587 _ => Ok(Expression::Function(f)),
19588 },
19589 // ARRAY_REMOVE(arr, target) -> dialect-specific
19590 "ARRAY_REMOVE" if f.args.len() == 2 => {
19591 match target {
19592 DialectType::DuckDB => {
19593 let mut args = f.args;
19594 let arr = args.remove(0);
19595 let target_val = args.remove(0);
19596 let u_id = crate::expressions::Identifier::new("_u");
19597 // LIST_FILTER(arr, _u -> _u <> target)
19598 let lambda = Expression::Lambda(Box::new(
19599 crate::expressions::LambdaExpr {
19600 parameters: vec![u_id.clone()],
19601 body: Expression::Neq(Box::new(BinaryOp {
19602 left: Expression::Identifier(u_id),
19603 right: target_val,
19604 left_comments: Vec::new(),
19605 operator_comments: Vec::new(),
19606 trailing_comments: Vec::new(),
19607 inferred_type: None,
19608 })),
19609 colon: false,
19610 parameter_types: Vec::new(),
19611 },
19612 ));
19613 Ok(Expression::Function(Box::new(Function::new(
19614 "LIST_FILTER".to_string(),
19615 vec![arr, lambda],
19616 ))))
19617 }
19618 DialectType::ClickHouse => {
19619 let mut args = f.args;
19620 let arr = args.remove(0);
19621 let target_val = args.remove(0);
19622 let u_id = crate::expressions::Identifier::new("_u");
19623 // arrayFilter(_u -> _u <> target, arr)
19624 let lambda = Expression::Lambda(Box::new(
19625 crate::expressions::LambdaExpr {
19626 parameters: vec![u_id.clone()],
19627 body: Expression::Neq(Box::new(BinaryOp {
19628 left: Expression::Identifier(u_id),
19629 right: target_val,
19630 left_comments: Vec::new(),
19631 operator_comments: Vec::new(),
19632 trailing_comments: Vec::new(),
19633 inferred_type: None,
19634 })),
19635 colon: false,
19636 parameter_types: Vec::new(),
19637 },
19638 ));
19639 Ok(Expression::Function(Box::new(Function::new(
19640 "arrayFilter".to_string(),
19641 vec![lambda, arr],
19642 ))))
19643 }
19644 DialectType::BigQuery => {
19645 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
19646 let mut args = f.args;
19647 let arr = args.remove(0);
19648 let target_val = args.remove(0);
19649 let u_id = crate::expressions::Identifier::new("_u");
19650 let u_col = Expression::Column(Box::new(
19651 crate::expressions::Column {
19652 name: u_id.clone(),
19653 table: None,
19654 join_mark: false,
19655 trailing_comments: Vec::new(),
19656 span: None,
19657 inferred_type: None,
19658 },
19659 ));
19660 // UNNEST(the_array) AS _u
19661 let unnest_expr = Expression::Unnest(Box::new(
19662 crate::expressions::UnnestFunc {
19663 this: arr,
19664 expressions: Vec::new(),
19665 with_ordinality: false,
19666 alias: None,
19667 offset_alias: None,
19668 },
19669 ));
19670 let aliased_unnest = Expression::Alias(Box::new(
19671 crate::expressions::Alias {
19672 this: unnest_expr,
19673 alias: u_id.clone(),
19674 column_aliases: Vec::new(),
19675 pre_alias_comments: Vec::new(),
19676 trailing_comments: Vec::new(),
19677 inferred_type: None,
19678 },
19679 ));
19680 // _u <> target
19681 let where_cond = Expression::Neq(Box::new(BinaryOp {
19682 left: u_col.clone(),
19683 right: target_val,
19684 left_comments: Vec::new(),
19685 operator_comments: Vec::new(),
19686 trailing_comments: Vec::new(),
19687 inferred_type: None,
19688 }));
19689 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
19690 let subquery = Expression::Select(Box::new(
19691 crate::expressions::Select::new()
19692 .column(u_col)
19693 .from(aliased_unnest)
19694 .where_(where_cond),
19695 ));
19696 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
19697 Ok(Expression::ArrayFunc(Box::new(
19698 crate::expressions::ArrayConstructor {
19699 expressions: vec![subquery],
19700 bracket_notation: false,
19701 use_list_keyword: false,
19702 },
19703 )))
19704 }
19705 _ => Ok(Expression::Function(f)),
19706 }
19707 }
19708 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
19709 "PARSE_JSON" if f.args.len() == 1 => {
19710 match target {
19711 DialectType::SQLite
19712 | DialectType::Doris
19713 | DialectType::MySQL
19714 | DialectType::StarRocks => {
19715 // Strip PARSE_JSON, return the inner argument
19716 Ok(f.args.into_iter().next().unwrap())
19717 }
19718 _ => Ok(Expression::Function(f)),
19719 }
19720 }
19721 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
19722 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
19723 "JSON_REMOVE" => Ok(Expression::Function(f)),
19724 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
19725 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
19726 "JSON_SET" => Ok(Expression::Function(f)),
19727 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
19728 // Behavior per search value type:
19729 // NULL literal -> CASE WHEN x IS NULL THEN result
19730 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
19731 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
19732 "DECODE" if f.args.len() >= 3 => {
19733 // Keep as DECODE for targets that support it natively
19734 let keep_as_decode = matches!(
19735 target,
19736 DialectType::Oracle
19737 | DialectType::Snowflake
19738 | DialectType::Redshift
19739 | DialectType::Teradata
19740 | DialectType::Spark
19741 | DialectType::Databricks
19742 );
19743 if keep_as_decode {
19744 return Ok(Expression::Function(f));
19745 }
19746
19747 let mut args = f.args;
19748 let this_expr = args.remove(0);
19749 let mut pairs = Vec::new();
19750 let mut default = None;
19751 let mut i = 0;
19752 while i + 1 < args.len() {
19753 pairs.push((args[i].clone(), args[i + 1].clone()));
19754 i += 2;
19755 }
19756 if i < args.len() {
19757 default = Some(args[i].clone());
19758 }
19759 // Helper: check if expression is a literal value
19760 fn is_literal(e: &Expression) -> bool {
19761 matches!(
19762 e,
19763 Expression::Literal(_)
19764 | Expression::Boolean(_)
19765 | Expression::Neg(_)
19766 )
19767 }
19768 let whens: Vec<(Expression, Expression)> = pairs
19769 .into_iter()
19770 .map(|(search, result)| {
19771 if matches!(&search, Expression::Null(_)) {
19772 // NULL search -> IS NULL
19773 let condition = Expression::Is(Box::new(BinaryOp {
19774 left: this_expr.clone(),
19775 right: Expression::Null(crate::expressions::Null),
19776 left_comments: Vec::new(),
19777 operator_comments: Vec::new(),
19778 trailing_comments: Vec::new(),
19779 inferred_type: None,
19780 }));
19781 (condition, result)
19782 } else if is_literal(&search) {
19783 // Literal search -> simple equality
19784 let eq = Expression::Eq(Box::new(BinaryOp {
19785 left: this_expr.clone(),
19786 right: search,
19787 left_comments: Vec::new(),
19788 operator_comments: Vec::new(),
19789 trailing_comments: Vec::new(),
19790 inferred_type: None,
19791 }));
19792 (eq, result)
19793 } else {
19794 // Non-literal (column ref, expression) -> null-safe comparison
19795 let needs_paren = matches!(
19796 &search,
19797 Expression::Eq(_)
19798 | Expression::Neq(_)
19799 | Expression::Gt(_)
19800 | Expression::Gte(_)
19801 | Expression::Lt(_)
19802 | Expression::Lte(_)
19803 );
19804 let search_for_eq = if needs_paren {
19805 Expression::Paren(Box::new(
19806 crate::expressions::Paren {
19807 this: search.clone(),
19808 trailing_comments: Vec::new(),
19809 },
19810 ))
19811 } else {
19812 search.clone()
19813 };
19814 let eq = Expression::Eq(Box::new(BinaryOp {
19815 left: this_expr.clone(),
19816 right: search_for_eq,
19817 left_comments: Vec::new(),
19818 operator_comments: Vec::new(),
19819 trailing_comments: Vec::new(),
19820 inferred_type: None,
19821 }));
19822 let search_for_null = if needs_paren {
19823 Expression::Paren(Box::new(
19824 crate::expressions::Paren {
19825 this: search.clone(),
19826 trailing_comments: Vec::new(),
19827 },
19828 ))
19829 } else {
19830 search.clone()
19831 };
19832 let x_is_null = Expression::Is(Box::new(BinaryOp {
19833 left: this_expr.clone(),
19834 right: Expression::Null(crate::expressions::Null),
19835 left_comments: Vec::new(),
19836 operator_comments: Vec::new(),
19837 trailing_comments: Vec::new(),
19838 inferred_type: None,
19839 }));
19840 let s_is_null = Expression::Is(Box::new(BinaryOp {
19841 left: search_for_null,
19842 right: Expression::Null(crate::expressions::Null),
19843 left_comments: Vec::new(),
19844 operator_comments: Vec::new(),
19845 trailing_comments: Vec::new(),
19846 inferred_type: None,
19847 }));
19848 let both_null = Expression::And(Box::new(BinaryOp {
19849 left: x_is_null,
19850 right: s_is_null,
19851 left_comments: Vec::new(),
19852 operator_comments: Vec::new(),
19853 trailing_comments: Vec::new(),
19854 inferred_type: None,
19855 }));
19856 let condition = Expression::Or(Box::new(BinaryOp {
19857 left: eq,
19858 right: Expression::Paren(Box::new(
19859 crate::expressions::Paren {
19860 this: both_null,
19861 trailing_comments: Vec::new(),
19862 },
19863 )),
19864 left_comments: Vec::new(),
19865 operator_comments: Vec::new(),
19866 trailing_comments: Vec::new(),
19867 inferred_type: None,
19868 }));
19869 (condition, result)
19870 }
19871 })
19872 .collect();
19873 Ok(Expression::Case(Box::new(Case {
19874 operand: None,
19875 whens,
19876 else_: default,
19877 comments: Vec::new(),
19878 inferred_type: None,
19879 })))
19880 }
19881 // LEVENSHTEIN(a, b, ...) -> dialect-specific
19882 "LEVENSHTEIN" => {
19883 match target {
19884 DialectType::BigQuery => {
19885 let mut new_f = *f;
19886 new_f.name = "EDIT_DISTANCE".to_string();
19887 Ok(Expression::Function(Box::new(new_f)))
19888 }
19889 DialectType::Drill => {
19890 let mut new_f = *f;
19891 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
19892 Ok(Expression::Function(Box::new(new_f)))
19893 }
19894 DialectType::PostgreSQL if f.args.len() == 6 => {
19895 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
19896 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
19897 let mut new_f = *f;
19898 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
19899 Ok(Expression::Function(Box::new(new_f)))
19900 }
19901 _ => Ok(Expression::Function(f)),
19902 }
19903 }
19904 // ARRAY_MAX(x) -> arrayMax(x) for ClickHouse, LIST_MAX(x) for DuckDB
19905 "ARRAY_MAX" => {
19906 let name = match target {
19907 DialectType::ClickHouse => "arrayMax",
19908 DialectType::DuckDB => "LIST_MAX",
19909 _ => "ARRAY_MAX",
19910 };
19911 let mut new_f = *f;
19912 new_f.name = name.to_string();
19913 Ok(Expression::Function(Box::new(new_f)))
19914 }
19915 // ARRAY_MIN(x) -> arrayMin(x) for ClickHouse, LIST_MIN(x) for DuckDB
19916 "ARRAY_MIN" => {
19917 let name = match target {
19918 DialectType::ClickHouse => "arrayMin",
19919 DialectType::DuckDB => "LIST_MIN",
19920 _ => "ARRAY_MIN",
19921 };
19922 let mut new_f = *f;
19923 new_f.name = name.to_string();
19924 Ok(Expression::Function(Box::new(new_f)))
19925 }
19926 // JAROWINKLER_SIMILARITY(a, b) -> jaroWinklerSimilarity(UPPER(a), UPPER(b)) for ClickHouse
19927 // -> JARO_WINKLER_SIMILARITY(UPPER(a), UPPER(b)) for DuckDB
19928 "JAROWINKLER_SIMILARITY" if f.args.len() == 2 => {
19929 let mut args = f.args;
19930 let b = args.pop().unwrap();
19931 let a = args.pop().unwrap();
19932 match target {
19933 DialectType::ClickHouse => {
19934 let upper_a = Expression::Upper(Box::new(
19935 crate::expressions::UnaryFunc::new(a),
19936 ));
19937 let upper_b = Expression::Upper(Box::new(
19938 crate::expressions::UnaryFunc::new(b),
19939 ));
19940 Ok(Expression::Function(Box::new(Function::new(
19941 "jaroWinklerSimilarity".to_string(),
19942 vec![upper_a, upper_b],
19943 ))))
19944 }
19945 DialectType::DuckDB => {
19946 let upper_a = Expression::Upper(Box::new(
19947 crate::expressions::UnaryFunc::new(a),
19948 ));
19949 let upper_b = Expression::Upper(Box::new(
19950 crate::expressions::UnaryFunc::new(b),
19951 ));
19952 Ok(Expression::Function(Box::new(Function::new(
19953 "JARO_WINKLER_SIMILARITY".to_string(),
19954 vec![upper_a, upper_b],
19955 ))))
19956 }
19957 _ => Ok(Expression::Function(Box::new(Function::new(
19958 "JAROWINKLER_SIMILARITY".to_string(),
19959 vec![a, b],
19960 )))),
19961 }
19962 }
19963 // CURRENT_SCHEMAS(x) -> CURRENT_SCHEMAS() for Snowflake (drop arg)
19964 "CURRENT_SCHEMAS" => match target {
19965 DialectType::Snowflake => Ok(Expression::Function(Box::new(
19966 Function::new("CURRENT_SCHEMAS".to_string(), vec![]),
19967 ))),
19968 _ => Ok(Expression::Function(f)),
19969 },
19970 // TRUNC/TRUNCATE (numeric) -> dialect-specific
19971 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 => {
19972 match target {
19973 DialectType::TSQL | DialectType::Fabric => {
19974 // ROUND(x, decimals, 1) - the 1 flag means truncation
19975 let mut args = f.args;
19976 let this = if args.is_empty() {
19977 return Ok(Expression::Function(Box::new(
19978 Function::new("TRUNC".to_string(), args),
19979 )));
19980 } else {
19981 args.remove(0)
19982 };
19983 let decimals = if args.is_empty() {
19984 Expression::Literal(Box::new(Literal::Number(
19985 "0".to_string(),
19986 )))
19987 } else {
19988 args.remove(0)
19989 };
19990 Ok(Expression::Function(Box::new(Function::new(
19991 "ROUND".to_string(),
19992 vec![
19993 this,
19994 decimals,
19995 Expression::Literal(Box::new(Literal::Number(
19996 "1".to_string(),
19997 ))),
19998 ],
19999 ))))
20000 }
20001 DialectType::Presto
20002 | DialectType::Trino
20003 | DialectType::Athena => {
20004 // TRUNCATE(x, decimals)
20005 let mut new_f = *f;
20006 new_f.name = "TRUNCATE".to_string();
20007 Ok(Expression::Function(Box::new(new_f)))
20008 }
20009 DialectType::MySQL
20010 | DialectType::SingleStore
20011 | DialectType::TiDB => {
20012 // TRUNCATE(x, decimals)
20013 let mut new_f = *f;
20014 new_f.name = "TRUNCATE".to_string();
20015 Ok(Expression::Function(Box::new(new_f)))
20016 }
20017 DialectType::DuckDB => {
20018 // DuckDB supports TRUNC(x, decimals) — preserve both args
20019 let mut args = f.args;
20020 // Snowflake fractions_supported: wrap non-INT decimals in CAST(... AS INT)
20021 if args.len() == 2 && matches!(source, DialectType::Snowflake) {
20022 let decimals = args.remove(1);
20023 let is_int = matches!(&decimals, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
20024 || matches!(&decimals, Expression::Cast(c) if matches!(c.to, DataType::Int { .. } | DataType::SmallInt { .. } | DataType::BigInt { .. } | DataType::TinyInt { .. }));
20025 let wrapped = if !is_int {
20026 Expression::Cast(Box::new(crate::expressions::Cast {
20027 this: decimals,
20028 to: DataType::Int { length: None, integer_spelling: false },
20029 double_colon_syntax: false,
20030 trailing_comments: Vec::new(),
20031 format: None,
20032 default: None,
20033 inferred_type: None,
20034 }))
20035 } else {
20036 decimals
20037 };
20038 args.push(wrapped);
20039 }
20040 Ok(Expression::Function(Box::new(Function::new(
20041 "TRUNC".to_string(),
20042 args,
20043 ))))
20044 }
20045 DialectType::ClickHouse => {
20046 // trunc(x, decimals) - lowercase
20047 let mut new_f = *f;
20048 new_f.name = "trunc".to_string();
20049 Ok(Expression::Function(Box::new(new_f)))
20050 }
20051 DialectType::Spark | DialectType::Databricks => {
20052 // Spark: TRUNC is date-only; numeric TRUNC → CAST(x AS BIGINT)
20053 let this = f.args.into_iter().next().unwrap_or(
20054 Expression::Literal(Box::new(Literal::Number(
20055 "0".to_string(),
20056 ))),
20057 );
20058 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
20059 this,
20060 to: crate::expressions::DataType::BigInt {
20061 length: None,
20062 },
20063 double_colon_syntax: false,
20064 trailing_comments: Vec::new(),
20065 format: None,
20066 default: None,
20067 inferred_type: None,
20068 })))
20069 }
20070 _ => {
20071 // TRUNC(x, decimals) for PostgreSQL, Oracle, Snowflake, etc.
20072 let mut new_f = *f;
20073 new_f.name = "TRUNC".to_string();
20074 Ok(Expression::Function(Box::new(new_f)))
20075 }
20076 }
20077 }
20078 // CURRENT_VERSION() -> VERSION() for most dialects
20079 "CURRENT_VERSION" => match target {
20080 DialectType::Snowflake
20081 | DialectType::Databricks
20082 | DialectType::StarRocks => Ok(Expression::Function(f)),
20083 DialectType::SQLite => {
20084 let mut new_f = *f;
20085 new_f.name = "SQLITE_VERSION".to_string();
20086 Ok(Expression::Function(Box::new(new_f)))
20087 }
20088 _ => {
20089 let mut new_f = *f;
20090 new_f.name = "VERSION".to_string();
20091 Ok(Expression::Function(Box::new(new_f)))
20092 }
20093 },
20094 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
20095 "ARRAY_REVERSE" => match target {
20096 DialectType::ClickHouse => {
20097 let mut new_f = *f;
20098 new_f.name = "arrayReverse".to_string();
20099 Ok(Expression::Function(Box::new(new_f)))
20100 }
20101 _ => Ok(Expression::Function(f)),
20102 },
20103 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
20104 "GENERATE_DATE_ARRAY" => {
20105 let mut args = f.args;
20106 if matches!(target, DialectType::BigQuery) {
20107 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
20108 if args.len() == 2 {
20109 let default_interval = Expression::Interval(Box::new(
20110 crate::expressions::Interval {
20111 this: Some(Expression::Literal(Box::new(
20112 Literal::String("1".to_string()),
20113 ))),
20114 unit: Some(
20115 crate::expressions::IntervalUnitSpec::Simple {
20116 unit: crate::expressions::IntervalUnit::Day,
20117 use_plural: false,
20118 },
20119 ),
20120 },
20121 ));
20122 args.push(default_interval);
20123 }
20124 Ok(Expression::Function(Box::new(Function::new(
20125 "GENERATE_DATE_ARRAY".to_string(),
20126 args,
20127 ))))
20128 } else if matches!(target, DialectType::DuckDB) {
20129 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
20130 let start = args.get(0).cloned();
20131 let end = args.get(1).cloned();
20132 let step = args.get(2).cloned().or_else(|| {
20133 Some(Expression::Interval(Box::new(
20134 crate::expressions::Interval {
20135 this: Some(Expression::Literal(Box::new(
20136 Literal::String("1".to_string()),
20137 ))),
20138 unit: Some(
20139 crate::expressions::IntervalUnitSpec::Simple {
20140 unit: crate::expressions::IntervalUnit::Day,
20141 use_plural: false,
20142 },
20143 ),
20144 },
20145 )))
20146 });
20147 let gen_series = Expression::GenerateSeries(Box::new(
20148 crate::expressions::GenerateSeries {
20149 start: start.map(Box::new),
20150 end: end.map(Box::new),
20151 step: step.map(Box::new),
20152 is_end_exclusive: None,
20153 },
20154 ));
20155 Ok(Expression::Cast(Box::new(Cast {
20156 this: gen_series,
20157 to: DataType::Array {
20158 element_type: Box::new(DataType::Date),
20159 dimension: None,
20160 },
20161 trailing_comments: vec![],
20162 double_colon_syntax: false,
20163 format: None,
20164 default: None,
20165 inferred_type: None,
20166 })))
20167 } else if matches!(
20168 target,
20169 DialectType::Presto | DialectType::Trino | DialectType::Athena
20170 ) {
20171 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
20172 let start = args.get(0).cloned();
20173 let end = args.get(1).cloned();
20174 let step = args.get(2).cloned().or_else(|| {
20175 Some(Expression::Interval(Box::new(
20176 crate::expressions::Interval {
20177 this: Some(Expression::Literal(Box::new(
20178 Literal::String("1".to_string()),
20179 ))),
20180 unit: Some(
20181 crate::expressions::IntervalUnitSpec::Simple {
20182 unit: crate::expressions::IntervalUnit::Day,
20183 use_plural: false,
20184 },
20185 ),
20186 },
20187 )))
20188 });
20189 let gen_series = Expression::GenerateSeries(Box::new(
20190 crate::expressions::GenerateSeries {
20191 start: start.map(Box::new),
20192 end: end.map(Box::new),
20193 step: step.map(Box::new),
20194 is_end_exclusive: None,
20195 },
20196 ));
20197 Ok(gen_series)
20198 } else if matches!(
20199 target,
20200 DialectType::Spark | DialectType::Databricks
20201 ) {
20202 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
20203 let start = args.get(0).cloned();
20204 let end = args.get(1).cloned();
20205 let step = args.get(2).cloned().or_else(|| {
20206 Some(Expression::Interval(Box::new(
20207 crate::expressions::Interval {
20208 this: Some(Expression::Literal(Box::new(
20209 Literal::String("1".to_string()),
20210 ))),
20211 unit: Some(
20212 crate::expressions::IntervalUnitSpec::Simple {
20213 unit: crate::expressions::IntervalUnit::Day,
20214 use_plural: false,
20215 },
20216 ),
20217 },
20218 )))
20219 });
20220 let gen_series = Expression::GenerateSeries(Box::new(
20221 crate::expressions::GenerateSeries {
20222 start: start.map(Box::new),
20223 end: end.map(Box::new),
20224 step: step.map(Box::new),
20225 is_end_exclusive: None,
20226 },
20227 ));
20228 Ok(gen_series)
20229 } else if matches!(target, DialectType::Snowflake) {
20230 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
20231 if args.len() == 2 {
20232 let default_interval = Expression::Interval(Box::new(
20233 crate::expressions::Interval {
20234 this: Some(Expression::Literal(Box::new(
20235 Literal::String("1".to_string()),
20236 ))),
20237 unit: Some(
20238 crate::expressions::IntervalUnitSpec::Simple {
20239 unit: crate::expressions::IntervalUnit::Day,
20240 use_plural: false,
20241 },
20242 ),
20243 },
20244 ));
20245 args.push(default_interval);
20246 }
20247 Ok(Expression::Function(Box::new(Function::new(
20248 "GENERATE_DATE_ARRAY".to_string(),
20249 args,
20250 ))))
20251 } else if matches!(
20252 target,
20253 DialectType::MySQL
20254 | DialectType::TSQL
20255 | DialectType::Fabric
20256 | DialectType::Redshift
20257 ) {
20258 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
20259 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
20260 Ok(Expression::Function(Box::new(Function::new(
20261 "GENERATE_DATE_ARRAY".to_string(),
20262 args,
20263 ))))
20264 } else {
20265 // PostgreSQL/others: convert to GenerateSeries
20266 let start = args.get(0).cloned();
20267 let end = args.get(1).cloned();
20268 let step = args.get(2).cloned().or_else(|| {
20269 Some(Expression::Interval(Box::new(
20270 crate::expressions::Interval {
20271 this: Some(Expression::Literal(Box::new(
20272 Literal::String("1".to_string()),
20273 ))),
20274 unit: Some(
20275 crate::expressions::IntervalUnitSpec::Simple {
20276 unit: crate::expressions::IntervalUnit::Day,
20277 use_plural: false,
20278 },
20279 ),
20280 },
20281 )))
20282 });
20283 Ok(Expression::GenerateSeries(Box::new(
20284 crate::expressions::GenerateSeries {
20285 start: start.map(Box::new),
20286 end: end.map(Box::new),
20287 step: step.map(Box::new),
20288 is_end_exclusive: None,
20289 },
20290 )))
20291 }
20292 }
20293 // ARRAYS_OVERLAP(arr1, arr2) from Snowflake -> DuckDB:
20294 // (arr1 && arr2) OR (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
20295 "ARRAYS_OVERLAP"
20296 if f.args.len() == 2
20297 && matches!(source, DialectType::Snowflake)
20298 && matches!(target, DialectType::DuckDB) =>
20299 {
20300 let mut args = f.args;
20301 let arr1 = args.remove(0);
20302 let arr2 = args.remove(0);
20303
20304 // (arr1 && arr2)
20305 let overlap = Expression::Paren(Box::new(Paren {
20306 this: Expression::ArrayOverlaps(Box::new(BinaryOp {
20307 left: arr1.clone(),
20308 right: arr2.clone(),
20309 left_comments: vec![],
20310 operator_comments: vec![],
20311 trailing_comments: vec![],
20312 inferred_type: None,
20313 })),
20314 trailing_comments: vec![],
20315 }));
20316
20317 // ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1)
20318 let arr1_has_null = Expression::Neq(Box::new(BinaryOp {
20319 left: Expression::Function(Box::new(Function::new(
20320 "ARRAY_LENGTH".to_string(),
20321 vec![arr1.clone()],
20322 ))),
20323 right: Expression::Function(Box::new(Function::new(
20324 "LIST_COUNT".to_string(),
20325 vec![arr1],
20326 ))),
20327 left_comments: vec![],
20328 operator_comments: vec![],
20329 trailing_comments: vec![],
20330 inferred_type: None,
20331 }));
20332
20333 // ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2)
20334 let arr2_has_null = Expression::Neq(Box::new(BinaryOp {
20335 left: Expression::Function(Box::new(Function::new(
20336 "ARRAY_LENGTH".to_string(),
20337 vec![arr2.clone()],
20338 ))),
20339 right: Expression::Function(Box::new(Function::new(
20340 "LIST_COUNT".to_string(),
20341 vec![arr2],
20342 ))),
20343 left_comments: vec![],
20344 operator_comments: vec![],
20345 trailing_comments: vec![],
20346 inferred_type: None,
20347 }));
20348
20349 // (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
20350 let null_check = Expression::Paren(Box::new(Paren {
20351 this: Expression::And(Box::new(BinaryOp {
20352 left: arr1_has_null,
20353 right: arr2_has_null,
20354 left_comments: vec![],
20355 operator_comments: vec![],
20356 trailing_comments: vec![],
20357 inferred_type: None,
20358 })),
20359 trailing_comments: vec![],
20360 }));
20361
20362 // (arr1 && arr2) OR (null_check)
20363 Ok(Expression::Or(Box::new(BinaryOp {
20364 left: overlap,
20365 right: null_check,
20366 left_comments: vec![],
20367 operator_comments: vec![],
20368 trailing_comments: vec![],
20369 inferred_type: None,
20370 })))
20371 }
20372 // ARRAY_INTERSECTION([1, 2], [2, 3]) from Snowflake -> DuckDB:
20373 // Bag semantics using LIST_TRANSFORM/LIST_FILTER with GENERATE_SERIES
20374 "ARRAY_INTERSECTION"
20375 if f.args.len() == 2
20376 && matches!(source, DialectType::Snowflake)
20377 && matches!(target, DialectType::DuckDB) =>
20378 {
20379 let mut args = f.args;
20380 let arr1 = args.remove(0);
20381 let arr2 = args.remove(0);
20382
20383 // Build: arr1 IS NULL
20384 let arr1_is_null = Expression::IsNull(Box::new(IsNull {
20385 this: arr1.clone(),
20386 not: false,
20387 postfix_form: false,
20388 }));
20389 let arr2_is_null = Expression::IsNull(Box::new(IsNull {
20390 this: arr2.clone(),
20391 not: false,
20392 postfix_form: false,
20393 }));
20394 let null_check = Expression::Or(Box::new(BinaryOp {
20395 left: arr1_is_null,
20396 right: arr2_is_null,
20397 left_comments: vec![],
20398 operator_comments: vec![],
20399 trailing_comments: vec![],
20400 inferred_type: None,
20401 }));
20402
20403 // GENERATE_SERIES(1, LENGTH(arr1))
20404 let gen_series = Expression::Function(Box::new(Function::new(
20405 "GENERATE_SERIES".to_string(),
20406 vec![
20407 Expression::number(1),
20408 Expression::Function(Box::new(Function::new(
20409 "LENGTH".to_string(),
20410 vec![arr1.clone()],
20411 ))),
20412 ],
20413 )));
20414
20415 // LIST_ZIP(arr1, GENERATE_SERIES(1, LENGTH(arr1)))
20416 let list_zip = Expression::Function(Box::new(Function::new(
20417 "LIST_ZIP".to_string(),
20418 vec![arr1.clone(), gen_series],
20419 )));
20420
20421 // pair[1] and pair[2]
20422 let pair_col = Expression::column("pair");
20423 let pair_1 = Expression::Subscript(Box::new(
20424 crate::expressions::Subscript {
20425 this: pair_col.clone(),
20426 index: Expression::number(1),
20427 },
20428 ));
20429 let pair_2 = Expression::Subscript(Box::new(
20430 crate::expressions::Subscript {
20431 this: pair_col.clone(),
20432 index: Expression::number(2),
20433 },
20434 ));
20435
20436 // arr1[1:pair[2]]
20437 let arr1_slice = Expression::ArraySlice(Box::new(
20438 crate::expressions::ArraySlice {
20439 this: arr1.clone(),
20440 start: Some(Expression::number(1)),
20441 end: Some(pair_2),
20442 },
20443 ));
20444
20445 // e IS NOT DISTINCT FROM pair[1]
20446 let e_col = Expression::column("e");
20447 let is_not_distinct = Expression::NullSafeEq(Box::new(BinaryOp {
20448 left: e_col.clone(),
20449 right: pair_1.clone(),
20450 left_comments: vec![],
20451 operator_comments: vec![],
20452 trailing_comments: vec![],
20453 inferred_type: None,
20454 }));
20455
20456 // e -> e IS NOT DISTINCT FROM pair[1]
20457 let inner_lambda1 =
20458 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
20459 parameters: vec![crate::expressions::Identifier::new("e")],
20460 body: is_not_distinct,
20461 colon: false,
20462 parameter_types: vec![],
20463 }));
20464
20465 // LIST_FILTER(arr1[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
20466 let inner_filter1 = Expression::Function(Box::new(Function::new(
20467 "LIST_FILTER".to_string(),
20468 vec![arr1_slice, inner_lambda1],
20469 )));
20470
20471 // LENGTH(LIST_FILTER(arr1[1:pair[2]], ...))
20472 let len1 = Expression::Function(Box::new(Function::new(
20473 "LENGTH".to_string(),
20474 vec![inner_filter1],
20475 )));
20476
20477 // e -> e IS NOT DISTINCT FROM pair[1]
20478 let inner_lambda2 =
20479 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
20480 parameters: vec![crate::expressions::Identifier::new("e")],
20481 body: Expression::NullSafeEq(Box::new(BinaryOp {
20482 left: e_col,
20483 right: pair_1.clone(),
20484 left_comments: vec![],
20485 operator_comments: vec![],
20486 trailing_comments: vec![],
20487 inferred_type: None,
20488 })),
20489 colon: false,
20490 parameter_types: vec![],
20491 }));
20492
20493 // LIST_FILTER(arr2, e -> e IS NOT DISTINCT FROM pair[1])
20494 let inner_filter2 = Expression::Function(Box::new(Function::new(
20495 "LIST_FILTER".to_string(),
20496 vec![arr2.clone(), inner_lambda2],
20497 )));
20498
20499 // LENGTH(LIST_FILTER(arr2, ...))
20500 let len2 = Expression::Function(Box::new(Function::new(
20501 "LENGTH".to_string(),
20502 vec![inner_filter2],
20503 )));
20504
20505 // LENGTH(...) <= LENGTH(...)
20506 let cond = Expression::Paren(Box::new(Paren {
20507 this: Expression::Lte(Box::new(BinaryOp {
20508 left: len1,
20509 right: len2,
20510 left_comments: vec![],
20511 operator_comments: vec![],
20512 trailing_comments: vec![],
20513 inferred_type: None,
20514 })),
20515 trailing_comments: vec![],
20516 }));
20517
20518 // pair -> (condition)
20519 let filter_lambda =
20520 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
20521 parameters: vec![crate::expressions::Identifier::new(
20522 "pair",
20523 )],
20524 body: cond,
20525 colon: false,
20526 parameter_types: vec![],
20527 }));
20528
20529 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
20530 let outer_filter = Expression::Function(Box::new(Function::new(
20531 "LIST_FILTER".to_string(),
20532 vec![list_zip, filter_lambda],
20533 )));
20534
20535 // pair -> pair[1]
20536 let transform_lambda =
20537 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
20538 parameters: vec![crate::expressions::Identifier::new(
20539 "pair",
20540 )],
20541 body: pair_1,
20542 colon: false,
20543 parameter_types: vec![],
20544 }));
20545
20546 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
20547 let list_transform = Expression::Function(Box::new(Function::new(
20548 "LIST_TRANSFORM".to_string(),
20549 vec![outer_filter, transform_lambda],
20550 )));
20551
20552 // CASE WHEN arr1 IS NULL OR arr2 IS NULL THEN NULL
20553 // ELSE LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
20554 // END
20555 Ok(Expression::Case(Box::new(Case {
20556 operand: None,
20557 whens: vec![(null_check, Expression::Null(Null))],
20558 else_: Some(list_transform),
20559 comments: vec![],
20560 inferred_type: None,
20561 })))
20562 }
20563 // ARRAY_CONSTRUCT(args) -> Expression::Array for all targets
20564 "ARRAY_CONSTRUCT" => {
20565 Ok(Expression::Array(Box::new(crate::expressions::Array {
20566 expressions: f.args,
20567 })))
20568 }
20569 // ARRAY(args) function -> Expression::Array for DuckDB/Snowflake/Presto/Trino/Athena
20570 "ARRAY"
20571 if !f.args.iter().any(|a| {
20572 matches!(a, Expression::Select(_) | Expression::Subquery(_))
20573 }) =>
20574 {
20575 match target {
20576 DialectType::DuckDB
20577 | DialectType::Snowflake
20578 | DialectType::Presto
20579 | DialectType::Trino
20580 | DialectType::Athena => {
20581 Ok(Expression::Array(Box::new(crate::expressions::Array {
20582 expressions: f.args,
20583 })))
20584 }
20585 _ => Ok(Expression::Function(f)),
20586 }
20587 }
20588 _ => Ok(Expression::Function(f)),
20589 }
20590 } else if let Expression::AggregateFunction(mut af) = e {
20591 let name = af.name.to_ascii_uppercase();
20592 match name.as_str() {
20593 "ARBITRARY" if af.args.len() == 1 => {
20594 let arg = af.args.into_iter().next().unwrap();
20595 Ok(convert_arbitrary(arg, target))
20596 }
20597 "JSON_ARRAYAGG" => {
20598 match target {
20599 DialectType::PostgreSQL => {
20600 af.name = "JSON_AGG".to_string();
20601 // Add NULLS FIRST to ORDER BY items for PostgreSQL
20602 for ordered in af.order_by.iter_mut() {
20603 if ordered.nulls_first.is_none() {
20604 ordered.nulls_first = Some(true);
20605 }
20606 }
20607 Ok(Expression::AggregateFunction(af))
20608 }
20609 _ => Ok(Expression::AggregateFunction(af)),
20610 }
20611 }
20612 _ => Ok(Expression::AggregateFunction(af)),
20613 }
20614 } else if let Expression::JSONArrayAgg(ja) = e {
20615 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
20616 match target {
20617 DialectType::PostgreSQL => {
20618 let mut order_by = Vec::new();
20619 if let Some(order_expr) = ja.order {
20620 if let Expression::OrderBy(ob) = *order_expr {
20621 for mut ordered in ob.expressions {
20622 if ordered.nulls_first.is_none() {
20623 ordered.nulls_first = Some(true);
20624 }
20625 order_by.push(ordered);
20626 }
20627 }
20628 }
20629 Ok(Expression::AggregateFunction(Box::new(
20630 crate::expressions::AggregateFunction {
20631 name: "JSON_AGG".to_string(),
20632 args: vec![*ja.this],
20633 distinct: false,
20634 filter: None,
20635 order_by,
20636 limit: None,
20637 ignore_nulls: None,
20638 inferred_type: None,
20639 },
20640 )))
20641 }
20642 _ => Ok(Expression::JSONArrayAgg(ja)),
20643 }
20644 } else if let Expression::ToNumber(tn) = e {
20645 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
20646 let arg = *tn.this;
20647 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
20648 this: arg,
20649 to: crate::expressions::DataType::Double {
20650 precision: None,
20651 scale: None,
20652 },
20653 double_colon_syntax: false,
20654 trailing_comments: Vec::new(),
20655 format: None,
20656 default: None,
20657 inferred_type: None,
20658 })))
20659 } else {
20660 Ok(e)
20661 }
20662 }
20663
20664 Action::RegexpLikeToDuckDB => {
20665 if let Expression::RegexpLike(f) = e {
20666 let mut args = vec![f.this, f.pattern];
20667 if let Some(flags) = f.flags {
20668 args.push(flags);
20669 }
20670 Ok(Expression::Function(Box::new(Function::new(
20671 "REGEXP_MATCHES".to_string(),
20672 args,
20673 ))))
20674 } else {
20675 Ok(e)
20676 }
20677 }
20678 Action::EpochConvert => {
20679 if let Expression::Epoch(f) = e {
20680 let arg = f.this;
20681 let name = match target {
20682 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
20683 "UNIX_TIMESTAMP"
20684 }
20685 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
20686 DialectType::BigQuery => "TIME_TO_UNIX",
20687 _ => "EPOCH",
20688 };
20689 Ok(Expression::Function(Box::new(Function::new(
20690 name.to_string(),
20691 vec![arg],
20692 ))))
20693 } else {
20694 Ok(e)
20695 }
20696 }
20697 Action::EpochMsConvert => {
20698 use crate::expressions::{BinaryOp, Cast};
20699 if let Expression::EpochMs(f) = e {
20700 let arg = f.this;
20701 match target {
20702 DialectType::Spark | DialectType::Databricks => {
20703 Ok(Expression::Function(Box::new(Function::new(
20704 "TIMESTAMP_MILLIS".to_string(),
20705 vec![arg],
20706 ))))
20707 }
20708 DialectType::BigQuery => Ok(Expression::Function(Box::new(
20709 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
20710 ))),
20711 DialectType::Presto | DialectType::Trino => {
20712 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
20713 let cast_arg = Expression::Cast(Box::new(Cast {
20714 this: arg,
20715 to: DataType::Double {
20716 precision: None,
20717 scale: None,
20718 },
20719 trailing_comments: Vec::new(),
20720 double_colon_syntax: false,
20721 format: None,
20722 default: None,
20723 inferred_type: None,
20724 }));
20725 let div = Expression::Div(Box::new(BinaryOp::new(
20726 cast_arg,
20727 Expression::Function(Box::new(Function::new(
20728 "POW".to_string(),
20729 vec![Expression::number(10), Expression::number(3)],
20730 ))),
20731 )));
20732 Ok(Expression::Function(Box::new(Function::new(
20733 "FROM_UNIXTIME".to_string(),
20734 vec![div],
20735 ))))
20736 }
20737 DialectType::MySQL => {
20738 // FROM_UNIXTIME(x / POWER(10, 3))
20739 let div = Expression::Div(Box::new(BinaryOp::new(
20740 arg,
20741 Expression::Function(Box::new(Function::new(
20742 "POWER".to_string(),
20743 vec![Expression::number(10), Expression::number(3)],
20744 ))),
20745 )));
20746 Ok(Expression::Function(Box::new(Function::new(
20747 "FROM_UNIXTIME".to_string(),
20748 vec![div],
20749 ))))
20750 }
20751 DialectType::PostgreSQL | DialectType::Redshift => {
20752 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
20753 let cast_arg = Expression::Cast(Box::new(Cast {
20754 this: arg,
20755 to: DataType::Custom {
20756 name: "DOUBLE PRECISION".to_string(),
20757 },
20758 trailing_comments: Vec::new(),
20759 double_colon_syntax: false,
20760 format: None,
20761 default: None,
20762 inferred_type: None,
20763 }));
20764 let div = Expression::Div(Box::new(BinaryOp::new(
20765 cast_arg,
20766 Expression::Function(Box::new(Function::new(
20767 "POWER".to_string(),
20768 vec![Expression::number(10), Expression::number(3)],
20769 ))),
20770 )));
20771 Ok(Expression::Function(Box::new(Function::new(
20772 "TO_TIMESTAMP".to_string(),
20773 vec![div],
20774 ))))
20775 }
20776 DialectType::ClickHouse => {
20777 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
20778 let cast_arg = Expression::Cast(Box::new(Cast {
20779 this: arg,
20780 to: DataType::Nullable {
20781 inner: Box::new(DataType::BigInt { length: None }),
20782 },
20783 trailing_comments: Vec::new(),
20784 double_colon_syntax: false,
20785 format: None,
20786 default: None,
20787 inferred_type: None,
20788 }));
20789 Ok(Expression::Function(Box::new(Function::new(
20790 "fromUnixTimestamp64Milli".to_string(),
20791 vec![cast_arg],
20792 ))))
20793 }
20794 _ => Ok(Expression::Function(Box::new(Function::new(
20795 "EPOCH_MS".to_string(),
20796 vec![arg],
20797 )))),
20798 }
20799 } else {
20800 Ok(e)
20801 }
20802 }
20803 Action::TSQLTypeNormalize => {
20804 if let Expression::DataType(dt) = e {
20805 let new_dt = match &dt {
20806 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
20807 DataType::Decimal {
20808 precision: Some(15),
20809 scale: Some(4),
20810 }
20811 }
20812 DataType::Custom { name }
20813 if name.eq_ignore_ascii_case("SMALLMONEY") =>
20814 {
20815 DataType::Decimal {
20816 precision: Some(6),
20817 scale: Some(4),
20818 }
20819 }
20820 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
20821 DataType::Timestamp {
20822 timezone: false,
20823 precision: None,
20824 }
20825 }
20826 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
20827 DataType::Float {
20828 precision: None,
20829 scale: None,
20830 real_spelling: false,
20831 }
20832 }
20833 DataType::Float {
20834 real_spelling: true,
20835 ..
20836 } => DataType::Float {
20837 precision: None,
20838 scale: None,
20839 real_spelling: false,
20840 },
20841 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
20842 DataType::Custom {
20843 name: "BLOB".to_string(),
20844 }
20845 }
20846 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
20847 DataType::Boolean
20848 }
20849 DataType::Custom { name }
20850 if name.eq_ignore_ascii_case("ROWVERSION") =>
20851 {
20852 DataType::Custom {
20853 name: "BINARY".to_string(),
20854 }
20855 }
20856 DataType::Custom { name }
20857 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
20858 {
20859 match target {
20860 DialectType::Spark
20861 | DialectType::Databricks
20862 | DialectType::Hive => DataType::Custom {
20863 name: "STRING".to_string(),
20864 },
20865 _ => DataType::VarChar {
20866 length: Some(36),
20867 parenthesized_length: true,
20868 },
20869 }
20870 }
20871 DataType::Custom { name }
20872 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
20873 {
20874 match target {
20875 DialectType::Spark
20876 | DialectType::Databricks
20877 | DialectType::Hive => DataType::Timestamp {
20878 timezone: false,
20879 precision: None,
20880 },
20881 _ => DataType::Timestamp {
20882 timezone: true,
20883 precision: None,
20884 },
20885 }
20886 }
20887 DataType::Custom { ref name }
20888 if name.len() >= 10
20889 && name[..10].eq_ignore_ascii_case("DATETIME2(") =>
20890 {
20891 // DATETIME2(n) -> TIMESTAMP
20892 DataType::Timestamp {
20893 timezone: false,
20894 precision: None,
20895 }
20896 }
20897 DataType::Custom { ref name }
20898 if name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME(") =>
20899 {
20900 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
20901 match target {
20902 DialectType::Spark
20903 | DialectType::Databricks
20904 | DialectType::Hive => DataType::Timestamp {
20905 timezone: false,
20906 precision: None,
20907 },
20908 _ => return Ok(Expression::DataType(dt)),
20909 }
20910 }
20911 DataType::Custom { ref name }
20912 if name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC") =>
20913 {
20914 // Parse NUMERIC(p,s) back to Decimal(p,s)
20915 let upper = name.to_ascii_uppercase();
20916 if let Some(inner) = upper
20917 .strip_prefix("NUMERIC(")
20918 .and_then(|s| s.strip_suffix(')'))
20919 {
20920 let parts: Vec<&str> = inner.split(',').collect();
20921 let precision =
20922 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
20923 let scale =
20924 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
20925 DataType::Decimal { precision, scale }
20926 } else if upper == "NUMERIC" {
20927 DataType::Decimal {
20928 precision: None,
20929 scale: None,
20930 }
20931 } else {
20932 return Ok(Expression::DataType(dt));
20933 }
20934 }
20935 DataType::Float {
20936 precision: Some(p), ..
20937 } => {
20938 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
20939 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
20940 let boundary = match target {
20941 DialectType::Hive
20942 | DialectType::Spark
20943 | DialectType::Databricks => 32,
20944 _ => 24,
20945 };
20946 if *p <= boundary {
20947 DataType::Float {
20948 precision: None,
20949 scale: None,
20950 real_spelling: false,
20951 }
20952 } else {
20953 DataType::Double {
20954 precision: None,
20955 scale: None,
20956 }
20957 }
20958 }
20959 DataType::TinyInt { .. } => match target {
20960 DialectType::DuckDB => DataType::Custom {
20961 name: "UTINYINT".to_string(),
20962 },
20963 DialectType::Hive
20964 | DialectType::Spark
20965 | DialectType::Databricks => DataType::SmallInt { length: None },
20966 _ => return Ok(Expression::DataType(dt)),
20967 },
20968 // INTEGER -> INT for Spark/Databricks
20969 DataType::Int {
20970 length,
20971 integer_spelling: true,
20972 } => DataType::Int {
20973 length: *length,
20974 integer_spelling: false,
20975 },
20976 _ => return Ok(Expression::DataType(dt)),
20977 };
20978 Ok(Expression::DataType(new_dt))
20979 } else {
20980 Ok(e)
20981 }
20982 }
20983 Action::MySQLSafeDivide => {
20984 use crate::expressions::{BinaryOp, Cast};
20985 if let Expression::Div(op) = e {
20986 let left = op.left;
20987 let right = op.right;
20988 // For SQLite: CAST left as REAL but NO NULLIF wrapping
20989 if matches!(target, DialectType::SQLite) {
20990 let new_left = Expression::Cast(Box::new(Cast {
20991 this: left,
20992 to: DataType::Float {
20993 precision: None,
20994 scale: None,
20995 real_spelling: true,
20996 },
20997 trailing_comments: Vec::new(),
20998 double_colon_syntax: false,
20999 format: None,
21000 default: None,
21001 inferred_type: None,
21002 }));
21003 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
21004 }
21005 // Wrap right in NULLIF(right, 0)
21006 let nullif_right = Expression::Function(Box::new(Function::new(
21007 "NULLIF".to_string(),
21008 vec![right, Expression::number(0)],
21009 )));
21010 // For some dialects, also CAST the left side
21011 let new_left = match target {
21012 DialectType::PostgreSQL
21013 | DialectType::Redshift
21014 | DialectType::Teradata
21015 | DialectType::Materialize
21016 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
21017 this: left,
21018 to: DataType::Custom {
21019 name: "DOUBLE PRECISION".to_string(),
21020 },
21021 trailing_comments: Vec::new(),
21022 double_colon_syntax: false,
21023 format: None,
21024 default: None,
21025 inferred_type: None,
21026 })),
21027 DialectType::Drill
21028 | DialectType::Trino
21029 | DialectType::Presto
21030 | DialectType::Athena => Expression::Cast(Box::new(Cast {
21031 this: left,
21032 to: DataType::Double {
21033 precision: None,
21034 scale: None,
21035 },
21036 trailing_comments: Vec::new(),
21037 double_colon_syntax: false,
21038 format: None,
21039 default: None,
21040 inferred_type: None,
21041 })),
21042 DialectType::TSQL => Expression::Cast(Box::new(Cast {
21043 this: left,
21044 to: DataType::Float {
21045 precision: None,
21046 scale: None,
21047 real_spelling: false,
21048 },
21049 trailing_comments: Vec::new(),
21050 double_colon_syntax: false,
21051 format: None,
21052 default: None,
21053 inferred_type: None,
21054 })),
21055 _ => left,
21056 };
21057 Ok(Expression::Div(Box::new(BinaryOp::new(
21058 new_left,
21059 nullif_right,
21060 ))))
21061 } else {
21062 Ok(e)
21063 }
21064 }
21065 Action::AlterTableRenameStripSchema => {
21066 if let Expression::AlterTable(mut at) = e {
21067 if let Some(crate::expressions::AlterTableAction::RenameTable(
21068 ref mut new_tbl,
21069 )) = at.actions.first_mut()
21070 {
21071 new_tbl.schema = None;
21072 new_tbl.catalog = None;
21073 }
21074 Ok(Expression::AlterTable(at))
21075 } else {
21076 Ok(e)
21077 }
21078 }
21079 Action::NullsOrdering => {
21080 // Fill in the source dialect's implied null ordering default.
21081 // This makes implicit null ordering explicit so the target generator
21082 // can correctly strip or keep it.
21083 //
21084 // Dialect null ordering categories:
21085 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
21086 // ASC -> NULLS LAST, DESC -> NULLS FIRST
21087 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
21088 // ASC -> NULLS FIRST, DESC -> NULLS LAST
21089 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
21090 // NULLS LAST always (both ASC and DESC)
21091 if let Expression::Ordered(mut o) = e {
21092 let is_asc = !o.desc;
21093
21094 let is_source_nulls_large = matches!(
21095 source,
21096 DialectType::Oracle
21097 | DialectType::PostgreSQL
21098 | DialectType::Redshift
21099 | DialectType::Snowflake
21100 );
21101 let is_source_nulls_last = matches!(
21102 source,
21103 DialectType::DuckDB
21104 | DialectType::Presto
21105 | DialectType::Trino
21106 | DialectType::Dremio
21107 | DialectType::Athena
21108 | DialectType::ClickHouse
21109 | DialectType::Drill
21110 | DialectType::Exasol
21111 | DialectType::DataFusion
21112 );
21113
21114 // Determine target category to check if default matches
21115 let is_target_nulls_large = matches!(
21116 target,
21117 DialectType::Oracle
21118 | DialectType::PostgreSQL
21119 | DialectType::Redshift
21120 | DialectType::Snowflake
21121 );
21122 let is_target_nulls_last = matches!(
21123 target,
21124 DialectType::DuckDB
21125 | DialectType::Presto
21126 | DialectType::Trino
21127 | DialectType::Dremio
21128 | DialectType::Athena
21129 | DialectType::ClickHouse
21130 | DialectType::Drill
21131 | DialectType::Exasol
21132 | DialectType::DataFusion
21133 );
21134
21135 // Compute the implied nulls_first for source
21136 let source_nulls_first = if is_source_nulls_large {
21137 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
21138 } else if is_source_nulls_last {
21139 false // NULLS LAST always
21140 } else {
21141 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
21142 };
21143
21144 // Compute the target's default
21145 let target_nulls_first = if is_target_nulls_large {
21146 !is_asc
21147 } else if is_target_nulls_last {
21148 false
21149 } else {
21150 is_asc
21151 };
21152
21153 // Only add explicit nulls ordering if source and target defaults differ
21154 if source_nulls_first != target_nulls_first {
21155 o.nulls_first = Some(source_nulls_first);
21156 }
21157 // If they match, leave nulls_first as None so the generator won't output it
21158
21159 Ok(Expression::Ordered(o))
21160 } else {
21161 Ok(e)
21162 }
21163 }
21164 Action::StringAggConvert => {
21165 match e {
21166 Expression::WithinGroup(wg) => {
21167 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
21168 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
21169 let (x_opt, sep_opt, distinct) = match wg.this {
21170 Expression::AggregateFunction(ref af)
21171 if af.name.eq_ignore_ascii_case("STRING_AGG")
21172 && af.args.len() >= 2 =>
21173 {
21174 (
21175 Some(af.args[0].clone()),
21176 Some(af.args[1].clone()),
21177 af.distinct,
21178 )
21179 }
21180 Expression::Function(ref f)
21181 if f.name.eq_ignore_ascii_case("STRING_AGG")
21182 && f.args.len() >= 2 =>
21183 {
21184 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
21185 }
21186 Expression::StringAgg(ref sa) => {
21187 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
21188 }
21189 _ => (None, None, false),
21190 };
21191 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
21192 let order_by = wg.order_by;
21193
21194 match target {
21195 DialectType::TSQL | DialectType::Fabric => {
21196 // Keep as WithinGroup(StringAgg) for TSQL
21197 Ok(Expression::WithinGroup(Box::new(
21198 crate::expressions::WithinGroup {
21199 this: Expression::StringAgg(Box::new(
21200 crate::expressions::StringAggFunc {
21201 this: x,
21202 separator: Some(sep),
21203 order_by: None, // order_by goes in WithinGroup, not StringAgg
21204 distinct,
21205 filter: None,
21206 limit: None,
21207 inferred_type: None,
21208 },
21209 )),
21210 order_by,
21211 },
21212 )))
21213 }
21214 DialectType::MySQL
21215 | DialectType::SingleStore
21216 | DialectType::Doris
21217 | DialectType::StarRocks => {
21218 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
21219 Ok(Expression::GroupConcat(Box::new(
21220 crate::expressions::GroupConcatFunc {
21221 this: x,
21222 separator: Some(sep),
21223 order_by: Some(order_by),
21224 distinct,
21225 filter: None,
21226 limit: None,
21227 inferred_type: None,
21228 },
21229 )))
21230 }
21231 DialectType::SQLite => {
21232 // GROUP_CONCAT(x, sep) - no ORDER BY support
21233 Ok(Expression::GroupConcat(Box::new(
21234 crate::expressions::GroupConcatFunc {
21235 this: x,
21236 separator: Some(sep),
21237 order_by: None,
21238 distinct,
21239 filter: None,
21240 limit: None,
21241 inferred_type: None,
21242 },
21243 )))
21244 }
21245 DialectType::PostgreSQL | DialectType::Redshift => {
21246 // STRING_AGG(x, sep ORDER BY z)
21247 Ok(Expression::StringAgg(Box::new(
21248 crate::expressions::StringAggFunc {
21249 this: x,
21250 separator: Some(sep),
21251 order_by: Some(order_by),
21252 distinct,
21253 filter: None,
21254 limit: None,
21255 inferred_type: None,
21256 },
21257 )))
21258 }
21259 _ => {
21260 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
21261 Ok(Expression::StringAgg(Box::new(
21262 crate::expressions::StringAggFunc {
21263 this: x,
21264 separator: Some(sep),
21265 order_by: Some(order_by),
21266 distinct,
21267 filter: None,
21268 limit: None,
21269 inferred_type: None,
21270 },
21271 )))
21272 }
21273 }
21274 } else {
21275 Ok(Expression::WithinGroup(wg))
21276 }
21277 }
21278 Expression::StringAgg(sa) => {
21279 match target {
21280 DialectType::MySQL
21281 | DialectType::SingleStore
21282 | DialectType::Doris
21283 | DialectType::StarRocks => {
21284 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
21285 Ok(Expression::GroupConcat(Box::new(
21286 crate::expressions::GroupConcatFunc {
21287 this: sa.this,
21288 separator: sa.separator,
21289 order_by: sa.order_by,
21290 distinct: sa.distinct,
21291 filter: sa.filter,
21292 limit: None,
21293 inferred_type: None,
21294 },
21295 )))
21296 }
21297 DialectType::SQLite => {
21298 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
21299 Ok(Expression::GroupConcat(Box::new(
21300 crate::expressions::GroupConcatFunc {
21301 this: sa.this,
21302 separator: sa.separator,
21303 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
21304 distinct: sa.distinct,
21305 filter: sa.filter,
21306 limit: None,
21307 inferred_type: None,
21308 },
21309 )))
21310 }
21311 DialectType::Spark | DialectType::Databricks => {
21312 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
21313 Ok(Expression::ListAgg(Box::new(
21314 crate::expressions::ListAggFunc {
21315 this: sa.this,
21316 separator: sa.separator,
21317 on_overflow: None,
21318 order_by: sa.order_by,
21319 distinct: sa.distinct,
21320 filter: None,
21321 inferred_type: None,
21322 },
21323 )))
21324 }
21325 _ => Ok(Expression::StringAgg(sa)),
21326 }
21327 }
21328 _ => Ok(e),
21329 }
21330 }
21331 Action::GroupConcatConvert => {
21332 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
21333 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
21334 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
21335 if let Expression::Function(ref f) = expr {
21336 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
21337 let mut result = f.args[0].clone();
21338 for arg in &f.args[1..] {
21339 result = Expression::Concat(Box::new(BinaryOp {
21340 left: result,
21341 right: arg.clone(),
21342 left_comments: vec![],
21343 operator_comments: vec![],
21344 trailing_comments: vec![],
21345 inferred_type: None,
21346 }));
21347 }
21348 return result;
21349 }
21350 }
21351 expr
21352 }
21353 fn expand_concat_to_plus(expr: Expression) -> Expression {
21354 if let Expression::Function(ref f) = expr {
21355 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
21356 let mut result = f.args[0].clone();
21357 for arg in &f.args[1..] {
21358 result = Expression::Add(Box::new(BinaryOp {
21359 left: result,
21360 right: arg.clone(),
21361 left_comments: vec![],
21362 operator_comments: vec![],
21363 trailing_comments: vec![],
21364 inferred_type: None,
21365 }));
21366 }
21367 return result;
21368 }
21369 }
21370 expr
21371 }
21372 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
21373 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
21374 if let Expression::Function(ref f) = expr {
21375 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
21376 let new_args: Vec<Expression> = f
21377 .args
21378 .iter()
21379 .map(|arg| {
21380 Expression::Cast(Box::new(crate::expressions::Cast {
21381 this: arg.clone(),
21382 to: crate::expressions::DataType::VarChar {
21383 length: None,
21384 parenthesized_length: false,
21385 },
21386 trailing_comments: Vec::new(),
21387 double_colon_syntax: false,
21388 format: None,
21389 default: None,
21390 inferred_type: None,
21391 }))
21392 })
21393 .collect();
21394 return Expression::Function(Box::new(
21395 crate::expressions::Function::new(
21396 "CONCAT".to_string(),
21397 new_args,
21398 ),
21399 ));
21400 }
21401 }
21402 expr
21403 }
21404 if let Expression::GroupConcat(gc) = e {
21405 match target {
21406 DialectType::Presto => {
21407 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
21408 let sep = gc.separator.unwrap_or(Expression::string(","));
21409 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
21410 let this = wrap_concat_args_in_varchar_cast(gc.this);
21411 let array_agg =
21412 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
21413 this,
21414 distinct: gc.distinct,
21415 filter: gc.filter,
21416 order_by: gc.order_by.unwrap_or_default(),
21417 name: None,
21418 ignore_nulls: None,
21419 having_max: None,
21420 limit: None,
21421 inferred_type: None,
21422 }));
21423 Ok(Expression::ArrayJoin(Box::new(
21424 crate::expressions::ArrayJoinFunc {
21425 this: array_agg,
21426 separator: sep,
21427 null_replacement: None,
21428 },
21429 )))
21430 }
21431 DialectType::Trino => {
21432 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
21433 let sep = gc.separator.unwrap_or(Expression::string(","));
21434 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
21435 let this = wrap_concat_args_in_varchar_cast(gc.this);
21436 Ok(Expression::ListAgg(Box::new(
21437 crate::expressions::ListAggFunc {
21438 this,
21439 separator: Some(sep),
21440 on_overflow: None,
21441 order_by: gc.order_by,
21442 distinct: gc.distinct,
21443 filter: gc.filter,
21444 inferred_type: None,
21445 },
21446 )))
21447 }
21448 DialectType::PostgreSQL
21449 | DialectType::Redshift
21450 | DialectType::Snowflake
21451 | DialectType::DuckDB
21452 | DialectType::Hive
21453 | DialectType::ClickHouse => {
21454 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
21455 let sep = gc.separator.unwrap_or(Expression::string(","));
21456 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
21457 let this = expand_concat_to_dpipe(gc.this);
21458 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
21459 let order_by = if target == DialectType::PostgreSQL {
21460 gc.order_by.map(|ords| {
21461 ords.into_iter()
21462 .map(|mut o| {
21463 if o.nulls_first.is_none() {
21464 if o.desc {
21465 o.nulls_first = Some(false);
21466 // NULLS LAST
21467 } else {
21468 o.nulls_first = Some(true);
21469 // NULLS FIRST
21470 }
21471 }
21472 o
21473 })
21474 .collect()
21475 })
21476 } else {
21477 gc.order_by
21478 };
21479 Ok(Expression::StringAgg(Box::new(
21480 crate::expressions::StringAggFunc {
21481 this,
21482 separator: Some(sep),
21483 order_by,
21484 distinct: gc.distinct,
21485 filter: gc.filter,
21486 limit: None,
21487 inferred_type: None,
21488 },
21489 )))
21490 }
21491 DialectType::TSQL => {
21492 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
21493 // TSQL doesn't support DISTINCT in STRING_AGG
21494 let sep = gc.separator.unwrap_or(Expression::string(","));
21495 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
21496 let this = expand_concat_to_plus(gc.this);
21497 Ok(Expression::StringAgg(Box::new(
21498 crate::expressions::StringAggFunc {
21499 this,
21500 separator: Some(sep),
21501 order_by: gc.order_by,
21502 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
21503 filter: gc.filter,
21504 limit: None,
21505 inferred_type: None,
21506 },
21507 )))
21508 }
21509 DialectType::SQLite => {
21510 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
21511 // SQLite GROUP_CONCAT doesn't support ORDER BY
21512 // Expand CONCAT(a,b,c) -> a || b || c
21513 let this = expand_concat_to_dpipe(gc.this);
21514 Ok(Expression::GroupConcat(Box::new(
21515 crate::expressions::GroupConcatFunc {
21516 this,
21517 separator: gc.separator,
21518 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
21519 distinct: gc.distinct,
21520 filter: gc.filter,
21521 limit: None,
21522 inferred_type: None,
21523 },
21524 )))
21525 }
21526 DialectType::Spark | DialectType::Databricks => {
21527 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
21528 let sep = gc.separator.unwrap_or(Expression::string(","));
21529 Ok(Expression::ListAgg(Box::new(
21530 crate::expressions::ListAggFunc {
21531 this: gc.this,
21532 separator: Some(sep),
21533 on_overflow: None,
21534 order_by: gc.order_by,
21535 distinct: gc.distinct,
21536 filter: None,
21537 inferred_type: None,
21538 },
21539 )))
21540 }
21541 DialectType::MySQL
21542 | DialectType::SingleStore
21543 | DialectType::StarRocks => {
21544 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
21545 if gc.separator.is_none() {
21546 let mut gc = gc;
21547 gc.separator = Some(Expression::string(","));
21548 Ok(Expression::GroupConcat(gc))
21549 } else {
21550 Ok(Expression::GroupConcat(gc))
21551 }
21552 }
21553 _ => Ok(Expression::GroupConcat(gc)),
21554 }
21555 } else {
21556 Ok(e)
21557 }
21558 }
21559 Action::TempTableHash => {
21560 match e {
21561 Expression::CreateTable(mut ct) => {
21562 // TSQL #table -> TEMPORARY TABLE with # stripped from name
21563 let name = &ct.name.name.name;
21564 if name.starts_with('#') {
21565 ct.name.name.name = name.trim_start_matches('#').to_string();
21566 }
21567 // Set temporary flag
21568 ct.temporary = true;
21569 Ok(Expression::CreateTable(ct))
21570 }
21571 Expression::Table(mut tr) => {
21572 // Strip # from table references
21573 let name = &tr.name.name;
21574 if name.starts_with('#') {
21575 tr.name.name = name.trim_start_matches('#').to_string();
21576 }
21577 Ok(Expression::Table(tr))
21578 }
21579 Expression::DropTable(mut dt) => {
21580 // Strip # from DROP TABLE names
21581 for table_ref in &mut dt.names {
21582 if table_ref.name.name.starts_with('#') {
21583 table_ref.name.name =
21584 table_ref.name.name.trim_start_matches('#').to_string();
21585 }
21586 }
21587 Ok(Expression::DropTable(dt))
21588 }
21589 _ => Ok(e),
21590 }
21591 }
21592 Action::NvlClearOriginal => {
21593 if let Expression::Nvl(mut f) = e {
21594 f.original_name = None;
21595 Ok(Expression::Nvl(f))
21596 } else {
21597 Ok(e)
21598 }
21599 }
21600 Action::HiveCastToTryCast => {
21601 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
21602 if let Expression::Cast(mut c) = e {
21603 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
21604 // (Spark's TIMESTAMP is always timezone-aware)
21605 if matches!(target, DialectType::DuckDB)
21606 && matches!(source, DialectType::Spark | DialectType::Databricks)
21607 && matches!(
21608 c.to,
21609 DataType::Timestamp {
21610 timezone: false,
21611 ..
21612 }
21613 )
21614 {
21615 c.to = DataType::Custom {
21616 name: "TIMESTAMPTZ".to_string(),
21617 };
21618 }
21619 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
21620 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
21621 if matches!(target, DialectType::Databricks | DialectType::Spark)
21622 && matches!(
21623 source,
21624 DialectType::Spark | DialectType::Databricks | DialectType::Hive
21625 )
21626 && Self::has_varchar_char_type(&c.to)
21627 {
21628 c.to = Self::normalize_varchar_to_string(c.to);
21629 }
21630 Ok(Expression::TryCast(c))
21631 } else {
21632 Ok(e)
21633 }
21634 }
21635 Action::XorExpand => {
21636 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
21637 // Snowflake: use BOOLXOR(a, b) instead
21638 if let Expression::Xor(xor) = e {
21639 // Collect all XOR operands
21640 let mut operands = Vec::new();
21641 if let Some(this) = xor.this {
21642 operands.push(*this);
21643 }
21644 if let Some(expr) = xor.expression {
21645 operands.push(*expr);
21646 }
21647 operands.extend(xor.expressions);
21648
21649 // Snowflake: use BOOLXOR(a, b)
21650 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
21651 let a = operands.remove(0);
21652 let b = operands.remove(0);
21653 return Ok(Expression::Function(Box::new(Function::new(
21654 "BOOLXOR".to_string(),
21655 vec![a, b],
21656 ))));
21657 }
21658
21659 // Helper to build (a AND NOT b) OR (NOT a AND b)
21660 let make_xor = |a: Expression, b: Expression| -> Expression {
21661 let not_b = Expression::Not(Box::new(
21662 crate::expressions::UnaryOp::new(b.clone()),
21663 ));
21664 let not_a = Expression::Not(Box::new(
21665 crate::expressions::UnaryOp::new(a.clone()),
21666 ));
21667 let left_and = Expression::And(Box::new(BinaryOp {
21668 left: a,
21669 right: Expression::Paren(Box::new(Paren {
21670 this: not_b,
21671 trailing_comments: Vec::new(),
21672 })),
21673 left_comments: Vec::new(),
21674 operator_comments: Vec::new(),
21675 trailing_comments: Vec::new(),
21676 inferred_type: None,
21677 }));
21678 let right_and = Expression::And(Box::new(BinaryOp {
21679 left: Expression::Paren(Box::new(Paren {
21680 this: not_a,
21681 trailing_comments: Vec::new(),
21682 })),
21683 right: b,
21684 left_comments: Vec::new(),
21685 operator_comments: Vec::new(),
21686 trailing_comments: Vec::new(),
21687 inferred_type: None,
21688 }));
21689 Expression::Or(Box::new(BinaryOp {
21690 left: Expression::Paren(Box::new(Paren {
21691 this: left_and,
21692 trailing_comments: Vec::new(),
21693 })),
21694 right: Expression::Paren(Box::new(Paren {
21695 this: right_and,
21696 trailing_comments: Vec::new(),
21697 })),
21698 left_comments: Vec::new(),
21699 operator_comments: Vec::new(),
21700 trailing_comments: Vec::new(),
21701 inferred_type: None,
21702 }))
21703 };
21704
21705 if operands.len() >= 2 {
21706 let mut result = make_xor(operands.remove(0), operands.remove(0));
21707 for operand in operands {
21708 result = make_xor(result, operand);
21709 }
21710 Ok(result)
21711 } else if operands.len() == 1 {
21712 Ok(operands.remove(0))
21713 } else {
21714 // No operands - return FALSE (shouldn't happen)
21715 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
21716 value: false,
21717 }))
21718 }
21719 } else {
21720 Ok(e)
21721 }
21722 }
21723 Action::DatePartUnquote => {
21724 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
21725 // Convert the quoted string first arg to a bare Column/Identifier
21726 if let Expression::Function(mut f) = e {
21727 if let Some(Expression::Literal(lit)) = f.args.first() {
21728 if let crate::expressions::Literal::String(s) = lit.as_ref() {
21729 let bare_name = s.to_ascii_lowercase();
21730 f.args[0] =
21731 Expression::Column(Box::new(crate::expressions::Column {
21732 name: Identifier::new(bare_name),
21733 table: None,
21734 join_mark: false,
21735 trailing_comments: Vec::new(),
21736 span: None,
21737 inferred_type: None,
21738 }));
21739 }
21740 }
21741 Ok(Expression::Function(f))
21742 } else {
21743 Ok(e)
21744 }
21745 }
21746 Action::ArrayLengthConvert => {
21747 // Extract the argument from the expression
21748 let arg = match e {
21749 Expression::Cardinality(ref f) => f.this.clone(),
21750 Expression::ArrayLength(ref f) => f.this.clone(),
21751 Expression::ArraySize(ref f) => f.this.clone(),
21752 _ => return Ok(e),
21753 };
21754 match target {
21755 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
21756 Ok(Expression::Function(Box::new(Function::new(
21757 "SIZE".to_string(),
21758 vec![arg],
21759 ))))
21760 }
21761 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21762 Ok(Expression::Cardinality(Box::new(
21763 crate::expressions::UnaryFunc::new(arg),
21764 )))
21765 }
21766 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
21767 crate::expressions::UnaryFunc::new(arg),
21768 ))),
21769 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
21770 crate::expressions::UnaryFunc::new(arg),
21771 ))),
21772 DialectType::PostgreSQL | DialectType::Redshift => {
21773 // PostgreSQL ARRAY_LENGTH requires dimension arg
21774 Ok(Expression::Function(Box::new(Function::new(
21775 "ARRAY_LENGTH".to_string(),
21776 vec![arg, Expression::number(1)],
21777 ))))
21778 }
21779 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
21780 crate::expressions::UnaryFunc::new(arg),
21781 ))),
21782 _ => Ok(e), // Keep original
21783 }
21784 }
21785
21786 Action::JsonExtractToArrow => {
21787 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
21788 if let Expression::JsonExtract(mut f) = e {
21789 f.arrow_syntax = true;
21790 // Transform path: convert bracket notation to dot notation
21791 // SQLite strips wildcards, DuckDB preserves them
21792 if let Expression::Literal(ref lit) = f.path {
21793 if let Literal::String(ref s) = lit.as_ref() {
21794 let mut transformed = s.clone();
21795 if matches!(target, DialectType::SQLite) {
21796 transformed = Self::strip_json_wildcards(&transformed);
21797 }
21798 transformed = Self::bracket_to_dot_notation(&transformed);
21799 if transformed != *s {
21800 f.path = Expression::string(&transformed);
21801 }
21802 }
21803 }
21804 Ok(Expression::JsonExtract(f))
21805 } else {
21806 Ok(e)
21807 }
21808 }
21809
21810 Action::JsonExtractToGetJsonObject => {
21811 if let Expression::JsonExtract(f) = e {
21812 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
21813 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
21814 // Use proper decomposition that handles brackets
21815 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path {
21816 if let Literal::String(ref s) = lit.as_ref() {
21817 let parts = Self::decompose_json_path(s);
21818 parts.into_iter().map(|k| Expression::string(&k)).collect()
21819 } else {
21820 vec![]
21821 }
21822 } else {
21823 vec![f.path]
21824 };
21825 let func_name = if matches!(target, DialectType::Redshift) {
21826 "JSON_EXTRACT_PATH_TEXT"
21827 } else {
21828 "JSON_EXTRACT_PATH"
21829 };
21830 let mut args = vec![f.this];
21831 args.extend(keys);
21832 Ok(Expression::Function(Box::new(Function::new(
21833 func_name.to_string(),
21834 args,
21835 ))))
21836 } else {
21837 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
21838 // Convert bracket double quotes to single quotes
21839 let path = if let Expression::Literal(ref lit) = f.path {
21840 if let Literal::String(ref s) = lit.as_ref() {
21841 let normalized = Self::bracket_to_single_quotes(s);
21842 if normalized != *s {
21843 Expression::string(&normalized)
21844 } else {
21845 f.path.clone()
21846 }
21847 } else {
21848 f.path.clone()
21849 }
21850 } else {
21851 f.path.clone()
21852 };
21853 Ok(Expression::Function(Box::new(Function::new(
21854 "GET_JSON_OBJECT".to_string(),
21855 vec![f.this, path],
21856 ))))
21857 }
21858 } else {
21859 Ok(e)
21860 }
21861 }
21862
21863 Action::JsonExtractScalarToGetJsonObject => {
21864 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
21865 if let Expression::JsonExtractScalar(f) = e {
21866 Ok(Expression::Function(Box::new(Function::new(
21867 "GET_JSON_OBJECT".to_string(),
21868 vec![f.this, f.path],
21869 ))))
21870 } else {
21871 Ok(e)
21872 }
21873 }
21874
21875 Action::JsonExtractToTsql => {
21876 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
21877 let (this, path) = match e {
21878 Expression::JsonExtract(f) => (f.this, f.path),
21879 Expression::JsonExtractScalar(f) => (f.this, f.path),
21880 _ => return Ok(e),
21881 };
21882 // Transform path: strip wildcards, convert bracket notation to dot notation
21883 let transformed_path = if let Expression::Literal(ref lit) = path {
21884 if let Literal::String(ref s) = lit.as_ref() {
21885 let stripped = Self::strip_json_wildcards(s);
21886 let dotted = Self::bracket_to_dot_notation(&stripped);
21887 Expression::string(&dotted)
21888 } else {
21889 path.clone()
21890 }
21891 } else {
21892 path
21893 };
21894 let json_query = Expression::Function(Box::new(Function::new(
21895 "JSON_QUERY".to_string(),
21896 vec![this.clone(), transformed_path.clone()],
21897 )));
21898 let json_value = Expression::Function(Box::new(Function::new(
21899 "JSON_VALUE".to_string(),
21900 vec![this, transformed_path],
21901 )));
21902 Ok(Expression::Function(Box::new(Function::new(
21903 "ISNULL".to_string(),
21904 vec![json_query, json_value],
21905 ))))
21906 }
21907
21908 Action::JsonExtractToClickHouse => {
21909 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
21910 let (this, path) = match e {
21911 Expression::JsonExtract(f) => (f.this, f.path),
21912 Expression::JsonExtractScalar(f) => (f.this, f.path),
21913 _ => return Ok(e),
21914 };
21915 let args: Vec<Expression> = if let Expression::Literal(lit) = path {
21916 if let Literal::String(ref s) = lit.as_ref() {
21917 let parts = Self::decompose_json_path(s);
21918 let mut result = vec![this];
21919 for part in parts {
21920 // ClickHouse uses 1-based integer indices for array access
21921 if let Ok(idx) = part.parse::<i64>() {
21922 result.push(Expression::number(idx + 1));
21923 } else {
21924 result.push(Expression::string(&part));
21925 }
21926 }
21927 result
21928 } else {
21929 vec![]
21930 }
21931 } else {
21932 vec![this, path]
21933 };
21934 Ok(Expression::Function(Box::new(Function::new(
21935 "JSONExtractString".to_string(),
21936 args,
21937 ))))
21938 }
21939
21940 Action::JsonExtractScalarConvert => {
21941 // JSON_EXTRACT_SCALAR -> target-specific
21942 if let Expression::JsonExtractScalar(f) = e {
21943 match target {
21944 DialectType::PostgreSQL | DialectType::Redshift => {
21945 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
21946 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path
21947 {
21948 if let Literal::String(ref s) = lit.as_ref() {
21949 let parts = Self::decompose_json_path(s);
21950 parts.into_iter().map(|k| Expression::string(&k)).collect()
21951 } else {
21952 vec![]
21953 }
21954 } else {
21955 vec![f.path]
21956 };
21957 let mut args = vec![f.this];
21958 args.extend(keys);
21959 Ok(Expression::Function(Box::new(Function::new(
21960 "JSON_EXTRACT_PATH_TEXT".to_string(),
21961 args,
21962 ))))
21963 }
21964 DialectType::Snowflake => {
21965 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
21966 let stripped_path = if let Expression::Literal(ref lit) = f.path {
21967 if let Literal::String(ref s) = lit.as_ref() {
21968 let stripped = Self::strip_json_dollar_prefix(s);
21969 Expression::string(&stripped)
21970 } else {
21971 f.path.clone()
21972 }
21973 } else {
21974 f.path
21975 };
21976 Ok(Expression::Function(Box::new(Function::new(
21977 "JSON_EXTRACT_PATH_TEXT".to_string(),
21978 vec![f.this, stripped_path],
21979 ))))
21980 }
21981 DialectType::SQLite | DialectType::DuckDB => {
21982 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
21983 Ok(Expression::JsonExtractScalar(Box::new(
21984 crate::expressions::JsonExtractFunc {
21985 this: f.this,
21986 path: f.path,
21987 returning: f.returning,
21988 arrow_syntax: true,
21989 hash_arrow_syntax: false,
21990 wrapper_option: None,
21991 quotes_option: None,
21992 on_scalar_string: false,
21993 on_error: None,
21994 },
21995 )))
21996 }
21997 _ => Ok(Expression::JsonExtractScalar(f)),
21998 }
21999 } else {
22000 Ok(e)
22001 }
22002 }
22003
22004 Action::JsonPathNormalize => {
22005 // Normalize JSON path format for BigQuery, MySQL, etc.
22006 if let Expression::JsonExtract(mut f) = e {
22007 if let Expression::Literal(ref lit) = f.path {
22008 if let Literal::String(ref s) = lit.as_ref() {
22009 let mut normalized = s.clone();
22010 // Convert bracket notation and handle wildcards per dialect
22011 match target {
22012 DialectType::BigQuery => {
22013 // BigQuery strips wildcards and uses single quotes in brackets
22014 normalized = Self::strip_json_wildcards(&normalized);
22015 normalized = Self::bracket_to_single_quotes(&normalized);
22016 }
22017 DialectType::MySQL => {
22018 // MySQL preserves wildcards, converts brackets to dot notation
22019 normalized = Self::bracket_to_dot_notation(&normalized);
22020 }
22021 _ => {}
22022 }
22023 if normalized != *s {
22024 f.path = Expression::string(&normalized);
22025 }
22026 }
22027 }
22028 Ok(Expression::JsonExtract(f))
22029 } else {
22030 Ok(e)
22031 }
22032 }
22033
22034 Action::JsonQueryValueConvert => {
22035 // JsonQuery/JsonValue -> target-specific
22036 let (f, is_query) = match e {
22037 Expression::JsonQuery(f) => (f, true),
22038 Expression::JsonValue(f) => (f, false),
22039 _ => return Ok(e),
22040 };
22041 match target {
22042 DialectType::TSQL | DialectType::Fabric => {
22043 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
22044 let json_query = Expression::Function(Box::new(Function::new(
22045 "JSON_QUERY".to_string(),
22046 vec![f.this.clone(), f.path.clone()],
22047 )));
22048 let json_value = Expression::Function(Box::new(Function::new(
22049 "JSON_VALUE".to_string(),
22050 vec![f.this, f.path],
22051 )));
22052 Ok(Expression::Function(Box::new(Function::new(
22053 "ISNULL".to_string(),
22054 vec![json_query, json_value],
22055 ))))
22056 }
22057 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
22058 Ok(Expression::Function(Box::new(Function::new(
22059 "GET_JSON_OBJECT".to_string(),
22060 vec![f.this, f.path],
22061 ))))
22062 }
22063 DialectType::PostgreSQL | DialectType::Redshift => {
22064 Ok(Expression::Function(Box::new(Function::new(
22065 "JSON_EXTRACT_PATH_TEXT".to_string(),
22066 vec![f.this, f.path],
22067 ))))
22068 }
22069 DialectType::DuckDB | DialectType::SQLite => {
22070 // json -> path arrow syntax
22071 Ok(Expression::JsonExtract(Box::new(
22072 crate::expressions::JsonExtractFunc {
22073 this: f.this,
22074 path: f.path,
22075 returning: f.returning,
22076 arrow_syntax: true,
22077 hash_arrow_syntax: false,
22078 wrapper_option: f.wrapper_option,
22079 quotes_option: f.quotes_option,
22080 on_scalar_string: f.on_scalar_string,
22081 on_error: f.on_error,
22082 },
22083 )))
22084 }
22085 DialectType::Snowflake => {
22086 // GET_PATH(PARSE_JSON(json), 'path')
22087 // Strip $. prefix from path
22088 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
22089 let json_expr = match &f.this {
22090 Expression::Function(ref inner_f)
22091 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
22092 {
22093 f.this
22094 }
22095 Expression::ParseJson(_) => {
22096 // Already a ParseJson expression, which generates as PARSE_JSON(...)
22097 f.this
22098 }
22099 _ => Expression::Function(Box::new(Function::new(
22100 "PARSE_JSON".to_string(),
22101 vec![f.this],
22102 ))),
22103 };
22104 let path_str = match &f.path {
22105 Expression::Literal(lit)
22106 if matches!(lit.as_ref(), Literal::String(_)) =>
22107 {
22108 let Literal::String(s) = lit.as_ref() else {
22109 unreachable!()
22110 };
22111 let stripped = s.strip_prefix("$.").unwrap_or(s);
22112 Expression::Literal(Box::new(Literal::String(
22113 stripped.to_string(),
22114 )))
22115 }
22116 other => other.clone(),
22117 };
22118 Ok(Expression::Function(Box::new(Function::new(
22119 "GET_PATH".to_string(),
22120 vec![json_expr, path_str],
22121 ))))
22122 }
22123 _ => {
22124 // Default: keep as JSON_QUERY/JSON_VALUE function
22125 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
22126 Ok(Expression::Function(Box::new(Function::new(
22127 func_name.to_string(),
22128 vec![f.this, f.path],
22129 ))))
22130 }
22131 }
22132 }
22133
22134 Action::JsonLiteralToJsonParse => {
22135 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
22136 // Also DuckDB CAST(x AS JSON) -> JSON_PARSE(x) for Trino/Presto/Athena
22137 if let Expression::Cast(c) = e {
22138 let func_name = if matches!(target, DialectType::Snowflake) {
22139 "PARSE_JSON"
22140 } else {
22141 "JSON_PARSE"
22142 };
22143 Ok(Expression::Function(Box::new(Function::new(
22144 func_name.to_string(),
22145 vec![c.this],
22146 ))))
22147 } else {
22148 Ok(e)
22149 }
22150 }
22151
22152 Action::DuckDBTryCastJsonToTryJsonParse => {
22153 // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
22154 if let Expression::TryCast(c) = e {
22155 let json_parse = Expression::Function(Box::new(Function::new(
22156 "JSON_PARSE".to_string(),
22157 vec![c.this],
22158 )));
22159 Ok(Expression::Function(Box::new(Function::new(
22160 "TRY".to_string(),
22161 vec![json_parse],
22162 ))))
22163 } else {
22164 Ok(e)
22165 }
22166 }
22167
22168 Action::DuckDBJsonFuncToJsonParse => {
22169 // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
22170 if let Expression::Function(f) = e {
22171 let args = f.args;
22172 Ok(Expression::Function(Box::new(Function::new(
22173 "JSON_PARSE".to_string(),
22174 args,
22175 ))))
22176 } else {
22177 Ok(e)
22178 }
22179 }
22180
22181 Action::DuckDBJsonValidToIsJson => {
22182 // DuckDB json_valid(x) -> x IS JSON (SQL:2016 predicate) for Trino/Presto/Athena
22183 if let Expression::Function(mut f) = e {
22184 let arg = f.args.remove(0);
22185 Ok(Expression::IsJson(Box::new(
22186 crate::expressions::IsJson {
22187 this: arg,
22188 json_type: None,
22189 unique_keys: None,
22190 negated: false,
22191 },
22192 )))
22193 } else {
22194 Ok(e)
22195 }
22196 }
22197
22198 Action::AtTimeZoneConvert => {
22199 // AT TIME ZONE -> target-specific conversion
22200 if let Expression::AtTimeZone(atz) = e {
22201 match target {
22202 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22203 Ok(Expression::Function(Box::new(Function::new(
22204 "AT_TIMEZONE".to_string(),
22205 vec![atz.this, atz.zone],
22206 ))))
22207 }
22208 DialectType::Spark | DialectType::Databricks => {
22209 Ok(Expression::Function(Box::new(Function::new(
22210 "FROM_UTC_TIMESTAMP".to_string(),
22211 vec![atz.this, atz.zone],
22212 ))))
22213 }
22214 DialectType::Snowflake => {
22215 // CONVERT_TIMEZONE('zone', expr)
22216 Ok(Expression::Function(Box::new(Function::new(
22217 "CONVERT_TIMEZONE".to_string(),
22218 vec![atz.zone, atz.this],
22219 ))))
22220 }
22221 DialectType::BigQuery => {
22222 // TIMESTAMP(DATETIME(expr, 'zone'))
22223 let datetime_call = Expression::Function(Box::new(Function::new(
22224 "DATETIME".to_string(),
22225 vec![atz.this, atz.zone],
22226 )));
22227 Ok(Expression::Function(Box::new(Function::new(
22228 "TIMESTAMP".to_string(),
22229 vec![datetime_call],
22230 ))))
22231 }
22232 _ => Ok(Expression::Function(Box::new(Function::new(
22233 "AT_TIMEZONE".to_string(),
22234 vec![atz.this, atz.zone],
22235 )))),
22236 }
22237 } else {
22238 Ok(e)
22239 }
22240 }
22241
22242 Action::DayOfWeekConvert => {
22243 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
22244 if let Expression::DayOfWeek(f) = e {
22245 match target {
22246 DialectType::DuckDB => Ok(Expression::Function(Box::new(
22247 Function::new("ISODOW".to_string(), vec![f.this]),
22248 ))),
22249 DialectType::Spark | DialectType::Databricks => {
22250 // ((DAYOFWEEK(x) % 7) + 1)
22251 let dayofweek = Expression::Function(Box::new(Function::new(
22252 "DAYOFWEEK".to_string(),
22253 vec![f.this],
22254 )));
22255 let modulo = Expression::Mod(Box::new(BinaryOp {
22256 left: dayofweek,
22257 right: Expression::number(7),
22258 left_comments: Vec::new(),
22259 operator_comments: Vec::new(),
22260 trailing_comments: Vec::new(),
22261 inferred_type: None,
22262 }));
22263 let paren_mod = Expression::Paren(Box::new(Paren {
22264 this: modulo,
22265 trailing_comments: Vec::new(),
22266 }));
22267 let add_one = Expression::Add(Box::new(BinaryOp {
22268 left: paren_mod,
22269 right: Expression::number(1),
22270 left_comments: Vec::new(),
22271 operator_comments: Vec::new(),
22272 trailing_comments: Vec::new(),
22273 inferred_type: None,
22274 }));
22275 Ok(Expression::Paren(Box::new(Paren {
22276 this: add_one,
22277 trailing_comments: Vec::new(),
22278 })))
22279 }
22280 _ => Ok(Expression::DayOfWeek(f)),
22281 }
22282 } else {
22283 Ok(e)
22284 }
22285 }
22286
22287 Action::MaxByMinByConvert => {
22288 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
22289 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
22290 // Handle both Expression::Function and Expression::AggregateFunction
22291 let (is_max, args) = match &e {
22292 Expression::Function(f) => {
22293 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
22294 }
22295 Expression::AggregateFunction(af) => {
22296 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
22297 }
22298 _ => return Ok(e),
22299 };
22300 match target {
22301 DialectType::ClickHouse => {
22302 let name = if is_max { "argMax" } else { "argMin" };
22303 let mut args = args;
22304 args.truncate(2);
22305 Ok(Expression::Function(Box::new(Function::new(
22306 name.to_string(),
22307 args,
22308 ))))
22309 }
22310 DialectType::DuckDB => {
22311 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
22312 Ok(Expression::Function(Box::new(Function::new(
22313 name.to_string(),
22314 args,
22315 ))))
22316 }
22317 DialectType::Spark | DialectType::Databricks => {
22318 let mut args = args;
22319 args.truncate(2);
22320 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
22321 Ok(Expression::Function(Box::new(Function::new(
22322 name.to_string(),
22323 args,
22324 ))))
22325 }
22326 _ => Ok(e),
22327 }
22328 }
22329
22330 Action::ElementAtConvert => {
22331 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
22332 let (arr, idx) = if let Expression::ElementAt(bf) = e {
22333 (bf.this, bf.expression)
22334 } else if let Expression::Function(ref f) = e {
22335 if f.args.len() >= 2 {
22336 if let Expression::Function(f) = e {
22337 let mut args = f.args;
22338 let arr = args.remove(0);
22339 let idx = args.remove(0);
22340 (arr, idx)
22341 } else {
22342 unreachable!("outer condition already matched Expression::Function")
22343 }
22344 } else {
22345 return Ok(e);
22346 }
22347 } else {
22348 return Ok(e);
22349 };
22350 match target {
22351 DialectType::PostgreSQL => {
22352 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
22353 let arr_expr = Expression::Paren(Box::new(Paren {
22354 this: arr,
22355 trailing_comments: vec![],
22356 }));
22357 Ok(Expression::Subscript(Box::new(
22358 crate::expressions::Subscript {
22359 this: arr_expr,
22360 index: idx,
22361 },
22362 )))
22363 }
22364 DialectType::BigQuery => {
22365 // BigQuery: convert ARRAY[...] to bare [...] for subscript
22366 let arr_expr = match arr {
22367 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
22368 crate::expressions::ArrayConstructor {
22369 expressions: af.expressions,
22370 bracket_notation: true,
22371 use_list_keyword: false,
22372 },
22373 )),
22374 other => other,
22375 };
22376 let safe_ordinal = Expression::Function(Box::new(Function::new(
22377 "SAFE_ORDINAL".to_string(),
22378 vec![idx],
22379 )));
22380 Ok(Expression::Subscript(Box::new(
22381 crate::expressions::Subscript {
22382 this: arr_expr,
22383 index: safe_ordinal,
22384 },
22385 )))
22386 }
22387 _ => Ok(Expression::Function(Box::new(Function::new(
22388 "ELEMENT_AT".to_string(),
22389 vec![arr, idx],
22390 )))),
22391 }
22392 }
22393
22394 Action::CurrentUserParens => {
22395 // CURRENT_USER -> CURRENT_USER() for Snowflake
22396 Ok(Expression::Function(Box::new(Function::new(
22397 "CURRENT_USER".to_string(),
22398 vec![],
22399 ))))
22400 }
22401
22402 Action::ArrayAggToCollectList => {
22403 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
22404 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
22405 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
22406 match e {
22407 Expression::AggregateFunction(mut af) => {
22408 let is_simple =
22409 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
22410 let args = if af.args.is_empty() {
22411 vec![]
22412 } else {
22413 vec![af.args[0].clone()]
22414 };
22415 af.name = "COLLECT_LIST".to_string();
22416 af.args = args;
22417 if is_simple {
22418 af.order_by = Vec::new();
22419 }
22420 Ok(Expression::AggregateFunction(af))
22421 }
22422 Expression::ArrayAgg(agg) => {
22423 let is_simple =
22424 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
22425 Ok(Expression::AggregateFunction(Box::new(
22426 crate::expressions::AggregateFunction {
22427 name: "COLLECT_LIST".to_string(),
22428 args: vec![agg.this.clone()],
22429 distinct: agg.distinct,
22430 filter: agg.filter.clone(),
22431 order_by: if is_simple {
22432 Vec::new()
22433 } else {
22434 agg.order_by.clone()
22435 },
22436 limit: agg.limit.clone(),
22437 ignore_nulls: agg.ignore_nulls,
22438 inferred_type: None,
22439 },
22440 )))
22441 }
22442 _ => Ok(e),
22443 }
22444 }
22445
22446 Action::ArraySyntaxConvert => {
22447 match e {
22448 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
22449 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
22450 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
22451 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
22452 expressions: arr.expressions,
22453 bracket_notation: true,
22454 use_list_keyword: false,
22455 })),
22456 ),
22457 // ARRAY(y) function style -> ArrayFunc for target dialect
22458 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
22459 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
22460 let bracket = matches!(
22461 target,
22462 DialectType::BigQuery
22463 | DialectType::DuckDB
22464 | DialectType::Snowflake
22465 | DialectType::ClickHouse
22466 | DialectType::StarRocks
22467 );
22468 Ok(Expression::ArrayFunc(Box::new(
22469 crate::expressions::ArrayConstructor {
22470 expressions: f.args,
22471 bracket_notation: bracket,
22472 use_list_keyword: false,
22473 },
22474 )))
22475 }
22476 _ => Ok(e),
22477 }
22478 }
22479
22480 Action::CastToJsonForSpark => {
22481 // CAST(x AS JSON) -> TO_JSON(x) for Spark
22482 if let Expression::Cast(c) = e {
22483 Ok(Expression::Function(Box::new(Function::new(
22484 "TO_JSON".to_string(),
22485 vec![c.this],
22486 ))))
22487 } else {
22488 Ok(e)
22489 }
22490 }
22491
22492 Action::CastJsonToFromJson => {
22493 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
22494 if let Expression::Cast(c) = e {
22495 // Extract the string literal from ParseJson
22496 let literal_expr = if let Expression::ParseJson(pj) = c.this {
22497 pj.this
22498 } else {
22499 c.this
22500 };
22501 // Convert the target DataType to Spark's type string format
22502 let type_str = Self::data_type_to_spark_string(&c.to);
22503 Ok(Expression::Function(Box::new(Function::new(
22504 "FROM_JSON".to_string(),
22505 vec![
22506 literal_expr,
22507 Expression::Literal(Box::new(Literal::String(type_str))),
22508 ],
22509 ))))
22510 } else {
22511 Ok(e)
22512 }
22513 }
22514
22515 Action::ToJsonConvert => {
22516 // TO_JSON(x) -> target-specific conversion
22517 if let Expression::ToJson(f) = e {
22518 let arg = f.this;
22519 match target {
22520 DialectType::Presto | DialectType::Trino => {
22521 // JSON_FORMAT(CAST(x AS JSON))
22522 let cast_json = Expression::Cast(Box::new(Cast {
22523 this: arg,
22524 to: DataType::Custom {
22525 name: "JSON".to_string(),
22526 },
22527 trailing_comments: vec![],
22528 double_colon_syntax: false,
22529 format: None,
22530 default: None,
22531 inferred_type: None,
22532 }));
22533 Ok(Expression::Function(Box::new(Function::new(
22534 "JSON_FORMAT".to_string(),
22535 vec![cast_json],
22536 ))))
22537 }
22538 DialectType::BigQuery => Ok(Expression::Function(Box::new(
22539 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
22540 ))),
22541 DialectType::DuckDB => {
22542 // CAST(TO_JSON(x) AS TEXT)
22543 let to_json =
22544 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
22545 this: arg,
22546 original_name: None,
22547 inferred_type: None,
22548 }));
22549 Ok(Expression::Cast(Box::new(Cast {
22550 this: to_json,
22551 to: DataType::Text,
22552 trailing_comments: vec![],
22553 double_colon_syntax: false,
22554 format: None,
22555 default: None,
22556 inferred_type: None,
22557 })))
22558 }
22559 _ => Ok(Expression::ToJson(Box::new(
22560 crate::expressions::UnaryFunc {
22561 this: arg,
22562 original_name: None,
22563 inferred_type: None,
22564 },
22565 ))),
22566 }
22567 } else {
22568 Ok(e)
22569 }
22570 }
22571
22572 Action::VarianceToClickHouse => {
22573 if let Expression::Variance(f) = e {
22574 Ok(Expression::Function(Box::new(Function::new(
22575 "varSamp".to_string(),
22576 vec![f.this],
22577 ))))
22578 } else {
22579 Ok(e)
22580 }
22581 }
22582
22583 Action::StddevToClickHouse => {
22584 if let Expression::Stddev(f) = e {
22585 Ok(Expression::Function(Box::new(Function::new(
22586 "stddevSamp".to_string(),
22587 vec![f.this],
22588 ))))
22589 } else {
22590 Ok(e)
22591 }
22592 }
22593
22594 Action::ApproxQuantileConvert => {
22595 if let Expression::ApproxQuantile(aq) = e {
22596 let mut args = vec![*aq.this];
22597 if let Some(q) = aq.quantile {
22598 args.push(*q);
22599 }
22600 Ok(Expression::Function(Box::new(Function::new(
22601 "APPROX_PERCENTILE".to_string(),
22602 args,
22603 ))))
22604 } else {
22605 Ok(e)
22606 }
22607 }
22608
22609 Action::DollarParamConvert => {
22610 if let Expression::Parameter(p) = e {
22611 Ok(Expression::Parameter(Box::new(
22612 crate::expressions::Parameter {
22613 name: p.name,
22614 index: p.index,
22615 style: crate::expressions::ParameterStyle::At,
22616 quoted: p.quoted,
22617 string_quoted: p.string_quoted,
22618 expression: p.expression,
22619 },
22620 )))
22621 } else {
22622 Ok(e)
22623 }
22624 }
22625
22626 Action::EscapeStringNormalize => {
22627 if let Expression::Literal(ref lit) = e {
22628 if let Literal::EscapeString(s) = lit.as_ref() {
22629 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
22630 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
22631 s[2..].to_string()
22632 } else {
22633 s.clone()
22634 };
22635 let normalized = stripped
22636 .replace('\n', "\\n")
22637 .replace('\r', "\\r")
22638 .replace('\t', "\\t");
22639 match target {
22640 DialectType::BigQuery => {
22641 // BigQuery: e'...' -> CAST(b'...' AS STRING)
22642 // Use Raw for the b'...' part to avoid double-escaping
22643 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
22644 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
22645 }
22646 _ => Ok(Expression::Literal(Box::new(Literal::EscapeString(
22647 normalized,
22648 )))),
22649 }
22650 } else {
22651 Ok(e)
22652 }
22653 } else {
22654 Ok(e)
22655 }
22656 }
22657
22658 Action::StraightJoinCase => {
22659 // straight_join: keep lowercase for DuckDB, quote for MySQL
22660 if let Expression::Column(col) = e {
22661 if col.name.name == "STRAIGHT_JOIN" {
22662 let mut new_col = col;
22663 new_col.name.name = "straight_join".to_string();
22664 if matches!(target, DialectType::MySQL) {
22665 // MySQL: needs quoting since it's a reserved keyword
22666 new_col.name.quoted = true;
22667 }
22668 Ok(Expression::Column(new_col))
22669 } else {
22670 Ok(Expression::Column(col))
22671 }
22672 } else {
22673 Ok(e)
22674 }
22675 }
22676
22677 Action::TablesampleReservoir => {
22678 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
22679 if let Expression::TableSample(mut ts) = e {
22680 if let Some(ref mut sample) = ts.sample {
22681 sample.method = crate::expressions::SampleMethod::Reservoir;
22682 sample.explicit_method = true;
22683 }
22684 Ok(Expression::TableSample(ts))
22685 } else {
22686 Ok(e)
22687 }
22688 }
22689
22690 Action::TablesampleSnowflakeStrip => {
22691 // Strip method and PERCENT for Snowflake target from non-Snowflake source
22692 match e {
22693 Expression::TableSample(mut ts) => {
22694 if let Some(ref mut sample) = ts.sample {
22695 sample.suppress_method_output = true;
22696 sample.unit_after_size = false;
22697 sample.is_percent = false;
22698 }
22699 Ok(Expression::TableSample(ts))
22700 }
22701 Expression::Table(mut t) => {
22702 if let Some(ref mut sample) = t.table_sample {
22703 sample.suppress_method_output = true;
22704 sample.unit_after_size = false;
22705 sample.is_percent = false;
22706 }
22707 Ok(Expression::Table(t))
22708 }
22709 _ => Ok(e),
22710 }
22711 }
22712
22713 Action::FirstToAnyValue => {
22714 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
22715 if let Expression::First(mut agg) = e {
22716 agg.ignore_nulls = None;
22717 agg.name = Some("ANY_VALUE".to_string());
22718 Ok(Expression::AnyValue(agg))
22719 } else {
22720 Ok(e)
22721 }
22722 }
22723
22724 Action::ArrayIndexConvert => {
22725 // Subscript index: 1-based to 0-based for BigQuery
22726 if let Expression::Subscript(mut sub) = e {
22727 if let Expression::Literal(ref lit) = sub.index {
22728 if let Literal::Number(ref n) = lit.as_ref() {
22729 if let Ok(val) = n.parse::<i64>() {
22730 sub.index = Expression::Literal(Box::new(Literal::Number(
22731 (val - 1).to_string(),
22732 )));
22733 }
22734 }
22735 }
22736 Ok(Expression::Subscript(sub))
22737 } else {
22738 Ok(e)
22739 }
22740 }
22741
22742 Action::AnyValueIgnoreNulls => {
22743 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
22744 if let Expression::AnyValue(mut av) = e {
22745 if av.ignore_nulls.is_none() {
22746 av.ignore_nulls = Some(true);
22747 }
22748 Ok(Expression::AnyValue(av))
22749 } else {
22750 Ok(e)
22751 }
22752 }
22753
22754 Action::BigQueryNullsOrdering => {
22755 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
22756 if let Expression::WindowFunction(mut wf) = e {
22757 for o in &mut wf.over.order_by {
22758 o.nulls_first = None;
22759 }
22760 Ok(Expression::WindowFunction(wf))
22761 } else if let Expression::Ordered(mut o) = e {
22762 o.nulls_first = None;
22763 Ok(Expression::Ordered(o))
22764 } else {
22765 Ok(e)
22766 }
22767 }
22768
22769 Action::SnowflakeFloatProtect => {
22770 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
22771 // Snowflake's target transform from converting it to DOUBLE.
22772 // Non-Snowflake sources should keep their FLOAT spelling.
22773 if let Expression::DataType(DataType::Float { .. }) = e {
22774 Ok(Expression::DataType(DataType::Custom {
22775 name: "FLOAT".to_string(),
22776 }))
22777 } else {
22778 Ok(e)
22779 }
22780 }
22781
22782 Action::MysqlNullsOrdering => {
22783 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
22784 if let Expression::Ordered(mut o) = e {
22785 let nulls_last = o.nulls_first == Some(false);
22786 let desc = o.desc;
22787 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
22788 // If requested ordering matches default, just strip NULLS clause
22789 let matches_default = if desc {
22790 // DESC default is NULLS FIRST, so nulls_first=true matches
22791 o.nulls_first == Some(true)
22792 } else {
22793 // ASC default is NULLS LAST, so nulls_first=false matches
22794 nulls_last
22795 };
22796 if matches_default {
22797 o.nulls_first = None;
22798 Ok(Expression::Ordered(o))
22799 } else {
22800 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
22801 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
22802 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
22803 let null_val = if desc { 1 } else { 0 };
22804 let non_null_val = if desc { 0 } else { 1 };
22805 let _case_expr = Expression::Case(Box::new(Case {
22806 operand: None,
22807 whens: vec![(
22808 Expression::IsNull(Box::new(crate::expressions::IsNull {
22809 this: o.this.clone(),
22810 not: false,
22811 postfix_form: false,
22812 })),
22813 Expression::number(null_val),
22814 )],
22815 else_: Some(Expression::number(non_null_val)),
22816 comments: Vec::new(),
22817 inferred_type: None,
22818 }));
22819 o.nulls_first = None;
22820 // Return a tuple of [case_expr, ordered_expr]
22821 // We need to return both as part of the ORDER BY
22822 // But since transform_recursive processes individual expressions,
22823 // we can't easily add extra ORDER BY items here.
22824 // Instead, strip the nulls_first
22825 o.nulls_first = None;
22826 Ok(Expression::Ordered(o))
22827 }
22828 } else {
22829 Ok(e)
22830 }
22831 }
22832
22833 Action::MysqlNullsLastRewrite => {
22834 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
22835 // to simulate NULLS LAST for ASC ordering
22836 if let Expression::WindowFunction(mut wf) = e {
22837 let mut new_order_by = Vec::new();
22838 for o in wf.over.order_by {
22839 if !o.desc {
22840 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
22841 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
22842 let case_expr = Expression::Case(Box::new(Case {
22843 operand: None,
22844 whens: vec![(
22845 Expression::IsNull(Box::new(crate::expressions::IsNull {
22846 this: o.this.clone(),
22847 not: false,
22848 postfix_form: false,
22849 })),
22850 Expression::Literal(Box::new(Literal::Number(
22851 "1".to_string(),
22852 ))),
22853 )],
22854 else_: Some(Expression::Literal(Box::new(Literal::Number(
22855 "0".to_string(),
22856 )))),
22857 comments: Vec::new(),
22858 inferred_type: None,
22859 }));
22860 new_order_by.push(crate::expressions::Ordered {
22861 this: case_expr,
22862 desc: false,
22863 nulls_first: None,
22864 explicit_asc: false,
22865 with_fill: None,
22866 });
22867 let mut ordered = o;
22868 ordered.nulls_first = None;
22869 new_order_by.push(ordered);
22870 } else {
22871 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
22872 // No change needed
22873 let mut ordered = o;
22874 ordered.nulls_first = None;
22875 new_order_by.push(ordered);
22876 }
22877 }
22878 wf.over.order_by = new_order_by;
22879 Ok(Expression::WindowFunction(wf))
22880 } else {
22881 Ok(e)
22882 }
22883 }
22884
22885 Action::RespectNullsConvert => {
22886 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
22887 if let Expression::WindowFunction(mut wf) = e {
22888 match &mut wf.this {
22889 Expression::FirstValue(ref mut vf) => {
22890 if vf.ignore_nulls == Some(false) {
22891 vf.ignore_nulls = None;
22892 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
22893 // but that's handled by the generator's NULLS ordering
22894 }
22895 }
22896 Expression::LastValue(ref mut vf) => {
22897 if vf.ignore_nulls == Some(false) {
22898 vf.ignore_nulls = None;
22899 }
22900 }
22901 _ => {}
22902 }
22903 Ok(Expression::WindowFunction(wf))
22904 } else {
22905 Ok(e)
22906 }
22907 }
22908
22909 Action::SnowflakeWindowFrameStrip => {
22910 // Strip the default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
22911 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when targeting Snowflake
22912 if let Expression::WindowFunction(mut wf) = e {
22913 wf.over.frame = None;
22914 Ok(Expression::WindowFunction(wf))
22915 } else {
22916 Ok(e)
22917 }
22918 }
22919
22920 Action::SnowflakeWindowFrameAdd => {
22921 // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
22922 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when transpiling from Snowflake to non-Snowflake
22923 if let Expression::WindowFunction(mut wf) = e {
22924 wf.over.frame = Some(crate::expressions::WindowFrame {
22925 kind: crate::expressions::WindowFrameKind::Rows,
22926 start: crate::expressions::WindowFrameBound::UnboundedPreceding,
22927 end: Some(crate::expressions::WindowFrameBound::UnboundedFollowing),
22928 exclude: None,
22929 kind_text: None,
22930 start_side_text: None,
22931 end_side_text: None,
22932 });
22933 Ok(Expression::WindowFunction(wf))
22934 } else {
22935 Ok(e)
22936 }
22937 }
22938
22939 Action::CreateTableStripComment => {
22940 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
22941 if let Expression::CreateTable(mut ct) = e {
22942 for col in &mut ct.columns {
22943 col.comment = None;
22944 col.constraints.retain(|c| {
22945 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
22946 });
22947 // Also remove Comment from constraint_order
22948 col.constraint_order.retain(|c| {
22949 !matches!(c, crate::expressions::ConstraintType::Comment)
22950 });
22951 }
22952 // Strip properties (USING, PARTITIONED BY, etc.)
22953 ct.properties.clear();
22954 Ok(Expression::CreateTable(ct))
22955 } else {
22956 Ok(e)
22957 }
22958 }
22959
22960 Action::AlterTableToSpRename => {
22961 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
22962 if let Expression::AlterTable(ref at) = e {
22963 if let Some(crate::expressions::AlterTableAction::RenameTable(
22964 ref new_tbl,
22965 )) = at.actions.first()
22966 {
22967 // Build the old table name using TSQL bracket quoting
22968 let old_name = if let Some(ref schema) = at.name.schema {
22969 if at.name.name.quoted || schema.quoted {
22970 format!("[{}].[{}]", schema.name, at.name.name.name)
22971 } else {
22972 format!("{}.{}", schema.name, at.name.name.name)
22973 }
22974 } else {
22975 if at.name.name.quoted {
22976 format!("[{}]", at.name.name.name)
22977 } else {
22978 at.name.name.name.clone()
22979 }
22980 };
22981 let new_name = new_tbl.name.name.clone();
22982 // EXEC sp_rename 'old_name', 'new_name'
22983 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
22984 Ok(Expression::Raw(crate::expressions::Raw { sql }))
22985 } else {
22986 Ok(e)
22987 }
22988 } else {
22989 Ok(e)
22990 }
22991 }
22992
22993 Action::SnowflakeIntervalFormat => {
22994 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
22995 if let Expression::Interval(mut iv) = e {
22996 if let (Some(Expression::Literal(lit)), Some(ref unit_spec)) =
22997 (&iv.this, &iv.unit)
22998 {
22999 if let Literal::String(ref val) = lit.as_ref() {
23000 let unit_str = match unit_spec {
23001 crate::expressions::IntervalUnitSpec::Simple {
23002 unit, ..
23003 } => match unit {
23004 crate::expressions::IntervalUnit::Year => "YEAR",
23005 crate::expressions::IntervalUnit::Quarter => "QUARTER",
23006 crate::expressions::IntervalUnit::Month => "MONTH",
23007 crate::expressions::IntervalUnit::Week => "WEEK",
23008 crate::expressions::IntervalUnit::Day => "DAY",
23009 crate::expressions::IntervalUnit::Hour => "HOUR",
23010 crate::expressions::IntervalUnit::Minute => "MINUTE",
23011 crate::expressions::IntervalUnit::Second => "SECOND",
23012 crate::expressions::IntervalUnit::Millisecond => {
23013 "MILLISECOND"
23014 }
23015 crate::expressions::IntervalUnit::Microsecond => {
23016 "MICROSECOND"
23017 }
23018 crate::expressions::IntervalUnit::Nanosecond => {
23019 "NANOSECOND"
23020 }
23021 },
23022 _ => "",
23023 };
23024 if !unit_str.is_empty() {
23025 let combined = format!("{} {}", val, unit_str);
23026 iv.this = Some(Expression::Literal(Box::new(Literal::String(
23027 combined,
23028 ))));
23029 iv.unit = None;
23030 }
23031 }
23032 }
23033 Ok(Expression::Interval(iv))
23034 } else {
23035 Ok(e)
23036 }
23037 }
23038
23039 Action::ArrayConcatBracketConvert => {
23040 // Expression::Array/ArrayFunc -> target-specific
23041 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
23042 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
23043 match e {
23044 Expression::Array(arr) => {
23045 if matches!(target, DialectType::Redshift) {
23046 Ok(Expression::Function(Box::new(Function::new(
23047 "ARRAY".to_string(),
23048 arr.expressions,
23049 ))))
23050 } else {
23051 Ok(Expression::ArrayFunc(Box::new(
23052 crate::expressions::ArrayConstructor {
23053 expressions: arr.expressions,
23054 bracket_notation: false,
23055 use_list_keyword: false,
23056 },
23057 )))
23058 }
23059 }
23060 Expression::ArrayFunc(arr) => {
23061 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
23062 if matches!(target, DialectType::Redshift) {
23063 Ok(Expression::Function(Box::new(Function::new(
23064 "ARRAY".to_string(),
23065 arr.expressions,
23066 ))))
23067 } else {
23068 Ok(Expression::ArrayFunc(arr))
23069 }
23070 }
23071 _ => Ok(e),
23072 }
23073 }
23074
23075 Action::BitAggFloatCast => {
23076 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
23077 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
23078 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
23079 let int_type = DataType::Int {
23080 length: None,
23081 integer_spelling: false,
23082 };
23083 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
23084 if let Expression::Cast(c) = agg_this {
23085 match &c.to {
23086 DataType::Float { .. }
23087 | DataType::Double { .. }
23088 | DataType::Custom { .. } => {
23089 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
23090 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
23091 let inner_type = match &c.to {
23092 DataType::Float {
23093 precision, scale, ..
23094 } => DataType::Float {
23095 precision: *precision,
23096 scale: *scale,
23097 real_spelling: true,
23098 },
23099 other => other.clone(),
23100 };
23101 let inner_cast =
23102 Expression::Cast(Box::new(crate::expressions::Cast {
23103 this: c.this.clone(),
23104 to: inner_type,
23105 trailing_comments: Vec::new(),
23106 double_colon_syntax: false,
23107 format: None,
23108 default: None,
23109 inferred_type: None,
23110 }));
23111 let rounded = Expression::Function(Box::new(Function::new(
23112 "ROUND".to_string(),
23113 vec![inner_cast],
23114 )));
23115 Expression::Cast(Box::new(crate::expressions::Cast {
23116 this: rounded,
23117 to: int_dt,
23118 trailing_comments: Vec::new(),
23119 double_colon_syntax: false,
23120 format: None,
23121 default: None,
23122 inferred_type: None,
23123 }))
23124 }
23125 DataType::Decimal { .. } => {
23126 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
23127 Expression::Cast(Box::new(crate::expressions::Cast {
23128 this: Expression::Cast(c),
23129 to: int_dt,
23130 trailing_comments: Vec::new(),
23131 double_colon_syntax: false,
23132 format: None,
23133 default: None,
23134 inferred_type: None,
23135 }))
23136 }
23137 _ => Expression::Cast(c),
23138 }
23139 } else {
23140 agg_this
23141 }
23142 };
23143 match e {
23144 Expression::BitwiseOrAgg(mut f) => {
23145 f.this = wrap_agg(f.this, int_type);
23146 Ok(Expression::BitwiseOrAgg(f))
23147 }
23148 Expression::BitwiseAndAgg(mut f) => {
23149 let int_type = DataType::Int {
23150 length: None,
23151 integer_spelling: false,
23152 };
23153 f.this = wrap_agg(f.this, int_type);
23154 Ok(Expression::BitwiseAndAgg(f))
23155 }
23156 Expression::BitwiseXorAgg(mut f) => {
23157 let int_type = DataType::Int {
23158 length: None,
23159 integer_spelling: false,
23160 };
23161 f.this = wrap_agg(f.this, int_type);
23162 Ok(Expression::BitwiseXorAgg(f))
23163 }
23164 _ => Ok(e),
23165 }
23166 }
23167
23168 Action::BitAggSnowflakeRename => {
23169 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
23170 match e {
23171 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
23172 Function::new("BITORAGG".to_string(), vec![f.this]),
23173 ))),
23174 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
23175 Function::new("BITANDAGG".to_string(), vec![f.this]),
23176 ))),
23177 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
23178 Function::new("BITXORAGG".to_string(), vec![f.this]),
23179 ))),
23180 _ => Ok(e),
23181 }
23182 }
23183
23184 Action::StrftimeCastTimestamp => {
23185 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
23186 if let Expression::Cast(mut c) = e {
23187 if matches!(
23188 c.to,
23189 DataType::Timestamp {
23190 timezone: false,
23191 ..
23192 }
23193 ) {
23194 c.to = DataType::Custom {
23195 name: "TIMESTAMP_NTZ".to_string(),
23196 };
23197 }
23198 Ok(Expression::Cast(c))
23199 } else {
23200 Ok(e)
23201 }
23202 }
23203
23204 Action::DecimalDefaultPrecision => {
23205 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
23206 if let Expression::Cast(mut c) = e {
23207 if matches!(
23208 c.to,
23209 DataType::Decimal {
23210 precision: None,
23211 ..
23212 }
23213 ) {
23214 c.to = DataType::Decimal {
23215 precision: Some(18),
23216 scale: Some(3),
23217 };
23218 }
23219 Ok(Expression::Cast(c))
23220 } else {
23221 Ok(e)
23222 }
23223 }
23224
23225 Action::FilterToIff => {
23226 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
23227 if let Expression::Filter(f) = e {
23228 let condition = *f.expression;
23229 let agg = *f.this;
23230 // Strip WHERE from condition
23231 let cond = match condition {
23232 Expression::Where(w) => w.this,
23233 other => other,
23234 };
23235 // Extract the aggregate function and its argument
23236 // We want AVG(IFF(condition, x, NULL))
23237 match agg {
23238 Expression::Function(mut func) => {
23239 if !func.args.is_empty() {
23240 let orig_arg = func.args[0].clone();
23241 let iff_call = Expression::Function(Box::new(Function::new(
23242 "IFF".to_string(),
23243 vec![cond, orig_arg, Expression::Null(Null)],
23244 )));
23245 func.args[0] = iff_call;
23246 Ok(Expression::Function(func))
23247 } else {
23248 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
23249 this: Box::new(Expression::Function(func)),
23250 expression: Box::new(cond),
23251 })))
23252 }
23253 }
23254 Expression::Avg(mut avg) => {
23255 let iff_call = Expression::Function(Box::new(Function::new(
23256 "IFF".to_string(),
23257 vec![cond, avg.this.clone(), Expression::Null(Null)],
23258 )));
23259 avg.this = iff_call;
23260 Ok(Expression::Avg(avg))
23261 }
23262 Expression::Sum(mut s) => {
23263 let iff_call = Expression::Function(Box::new(Function::new(
23264 "IFF".to_string(),
23265 vec![cond, s.this.clone(), Expression::Null(Null)],
23266 )));
23267 s.this = iff_call;
23268 Ok(Expression::Sum(s))
23269 }
23270 Expression::Count(mut c) => {
23271 if let Some(ref this_expr) = c.this {
23272 let iff_call = Expression::Function(Box::new(Function::new(
23273 "IFF".to_string(),
23274 vec![cond, this_expr.clone(), Expression::Null(Null)],
23275 )));
23276 c.this = Some(iff_call);
23277 }
23278 Ok(Expression::Count(c))
23279 }
23280 other => {
23281 // Fallback: keep as Filter
23282 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
23283 this: Box::new(other),
23284 expression: Box::new(cond),
23285 })))
23286 }
23287 }
23288 } else {
23289 Ok(e)
23290 }
23291 }
23292
23293 Action::AggFilterToIff => {
23294 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
23295 // Helper macro to handle the common AggFunc case
23296 macro_rules! handle_agg_filter_to_iff {
23297 ($variant:ident, $agg:expr) => {{
23298 let mut agg = $agg;
23299 if let Some(filter_cond) = agg.filter.take() {
23300 let iff_call = Expression::Function(Box::new(Function::new(
23301 "IFF".to_string(),
23302 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
23303 )));
23304 agg.this = iff_call;
23305 }
23306 Ok(Expression::$variant(agg))
23307 }};
23308 }
23309
23310 match e {
23311 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
23312 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
23313 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
23314 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
23315 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
23316 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
23317 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
23318 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
23319 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
23320 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
23321 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
23322 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
23323 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
23324 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
23325 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
23326 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
23327 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
23328 Expression::ApproxDistinct(agg) => {
23329 handle_agg_filter_to_iff!(ApproxDistinct, agg)
23330 }
23331 Expression::Count(mut c) => {
23332 if let Some(filter_cond) = c.filter.take() {
23333 if let Some(ref this_expr) = c.this {
23334 let iff_call = Expression::Function(Box::new(Function::new(
23335 "IFF".to_string(),
23336 vec![
23337 filter_cond,
23338 this_expr.clone(),
23339 Expression::Null(Null),
23340 ],
23341 )));
23342 c.this = Some(iff_call);
23343 }
23344 }
23345 Ok(Expression::Count(c))
23346 }
23347 other => Ok(other),
23348 }
23349 }
23350
23351 Action::JsonToGetPath => {
23352 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
23353 if let Expression::JsonExtract(je) = e {
23354 // Convert to PARSE_JSON() wrapper:
23355 // - JSON(x) -> PARSE_JSON(x)
23356 // - PARSE_JSON(x) -> keep as-is
23357 // - anything else -> wrap in PARSE_JSON()
23358 let this = match &je.this {
23359 Expression::Function(f)
23360 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
23361 {
23362 Expression::Function(Box::new(Function::new(
23363 "PARSE_JSON".to_string(),
23364 f.args.clone(),
23365 )))
23366 }
23367 Expression::Function(f)
23368 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
23369 {
23370 je.this.clone()
23371 }
23372 // GET_PATH result is already JSON, don't wrap
23373 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
23374 je.this.clone()
23375 }
23376 other => {
23377 // Wrap non-JSON expressions in PARSE_JSON()
23378 Expression::Function(Box::new(Function::new(
23379 "PARSE_JSON".to_string(),
23380 vec![other.clone()],
23381 )))
23382 }
23383 };
23384 // Convert path: extract key from JSONPath or strip $. prefix from string
23385 let path = match &je.path {
23386 Expression::JSONPath(jp) => {
23387 // Extract the key from JSONPath: $root.key -> 'key'
23388 let mut key_parts = Vec::new();
23389 for expr in &jp.expressions {
23390 match expr {
23391 Expression::JSONPathRoot(_) => {} // skip root
23392 Expression::JSONPathKey(k) => {
23393 if let Expression::Literal(lit) = &*k.this {
23394 if let Literal::String(s) = lit.as_ref() {
23395 key_parts.push(s.clone());
23396 }
23397 }
23398 }
23399 _ => {}
23400 }
23401 }
23402 if !key_parts.is_empty() {
23403 Expression::Literal(Box::new(Literal::String(
23404 key_parts.join("."),
23405 )))
23406 } else {
23407 je.path.clone()
23408 }
23409 }
23410 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with("$.")) =>
23411 {
23412 let Literal::String(s) = lit.as_ref() else {
23413 unreachable!()
23414 };
23415 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
23416 Expression::Literal(Box::new(Literal::String(stripped)))
23417 }
23418 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$')) =>
23419 {
23420 let Literal::String(s) = lit.as_ref() else {
23421 unreachable!()
23422 };
23423 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
23424 Expression::Literal(Box::new(Literal::String(stripped)))
23425 }
23426 _ => je.path.clone(),
23427 };
23428 Ok(Expression::Function(Box::new(Function::new(
23429 "GET_PATH".to_string(),
23430 vec![this, path],
23431 ))))
23432 } else {
23433 Ok(e)
23434 }
23435 }
23436
23437 Action::StructToRow => {
23438 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
23439 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
23440
23441 // Extract key-value pairs from either Struct or MapFunc
23442 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
23443 Expression::Struct(s) => Some(
23444 s.fields
23445 .iter()
23446 .map(|(opt_name, field_expr)| {
23447 if let Some(name) = opt_name {
23448 (name.clone(), field_expr.clone())
23449 } else if let Expression::NamedArgument(na) = field_expr {
23450 (na.name.name.clone(), na.value.clone())
23451 } else {
23452 (String::new(), field_expr.clone())
23453 }
23454 })
23455 .collect(),
23456 ),
23457 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
23458 m.keys
23459 .iter()
23460 .zip(m.values.iter())
23461 .map(|(key, value)| {
23462 let key_name = match key {
23463 Expression::Literal(lit)
23464 if matches!(lit.as_ref(), Literal::String(_)) =>
23465 {
23466 let Literal::String(s) = lit.as_ref() else {
23467 unreachable!()
23468 };
23469 s.clone()
23470 }
23471 Expression::Identifier(id) => id.name.clone(),
23472 _ => String::new(),
23473 };
23474 (key_name, value.clone())
23475 })
23476 .collect(),
23477 ),
23478 _ => None,
23479 };
23480
23481 if let Some(pairs) = kv_pairs {
23482 let mut named_args = Vec::new();
23483 for (key_name, value) in pairs {
23484 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
23485 named_args.push(Expression::Alias(Box::new(
23486 crate::expressions::Alias::new(
23487 value,
23488 Identifier::new(key_name),
23489 ),
23490 )));
23491 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
23492 named_args.push(value);
23493 } else {
23494 named_args.push(value);
23495 }
23496 }
23497
23498 if matches!(target, DialectType::BigQuery) {
23499 Ok(Expression::Function(Box::new(Function::new(
23500 "STRUCT".to_string(),
23501 named_args,
23502 ))))
23503 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
23504 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
23505 let row_func = Expression::Function(Box::new(Function::new(
23506 "ROW".to_string(),
23507 named_args,
23508 )));
23509
23510 // Try to infer types for each pair
23511 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
23512 Expression::Struct(s) => Some(
23513 s.fields
23514 .iter()
23515 .map(|(opt_name, field_expr)| {
23516 if let Some(name) = opt_name {
23517 (name.clone(), field_expr.clone())
23518 } else if let Expression::NamedArgument(na) = field_expr
23519 {
23520 (na.name.name.clone(), na.value.clone())
23521 } else {
23522 (String::new(), field_expr.clone())
23523 }
23524 })
23525 .collect(),
23526 ),
23527 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
23528 m.keys
23529 .iter()
23530 .zip(m.values.iter())
23531 .map(|(key, value)| {
23532 let key_name = match key {
23533 Expression::Literal(lit)
23534 if matches!(
23535 lit.as_ref(),
23536 Literal::String(_)
23537 ) =>
23538 {
23539 let Literal::String(s) = lit.as_ref() else {
23540 unreachable!()
23541 };
23542 s.clone()
23543 }
23544 Expression::Identifier(id) => id.name.clone(),
23545 _ => String::new(),
23546 };
23547 (key_name, value.clone())
23548 })
23549 .collect(),
23550 ),
23551 _ => None,
23552 };
23553
23554 if let Some(pairs) = kv_pairs_again {
23555 // Infer types for all values
23556 let mut all_inferred = true;
23557 let mut fields = Vec::new();
23558 for (name, value) in &pairs {
23559 let inferred_type = match value {
23560 Expression::Literal(lit)
23561 if matches!(lit.as_ref(), Literal::Number(_)) =>
23562 {
23563 let Literal::Number(n) = lit.as_ref() else {
23564 unreachable!()
23565 };
23566 if n.contains('.') {
23567 Some(DataType::Double {
23568 precision: None,
23569 scale: None,
23570 })
23571 } else {
23572 Some(DataType::Int {
23573 length: None,
23574 integer_spelling: true,
23575 })
23576 }
23577 }
23578 Expression::Literal(lit)
23579 if matches!(lit.as_ref(), Literal::String(_)) =>
23580 {
23581 Some(DataType::VarChar {
23582 length: None,
23583 parenthesized_length: false,
23584 })
23585 }
23586 Expression::Boolean(_) => Some(DataType::Boolean),
23587 _ => None,
23588 };
23589 if let Some(dt) = inferred_type {
23590 fields.push(crate::expressions::StructField::new(
23591 name.clone(),
23592 dt,
23593 ));
23594 } else {
23595 all_inferred = false;
23596 break;
23597 }
23598 }
23599
23600 if all_inferred && !fields.is_empty() {
23601 let row_type = DataType::Struct {
23602 fields,
23603 nested: true,
23604 };
23605 Ok(Expression::Cast(Box::new(Cast {
23606 this: row_func,
23607 to: row_type,
23608 trailing_comments: Vec::new(),
23609 double_colon_syntax: false,
23610 format: None,
23611 default: None,
23612 inferred_type: None,
23613 })))
23614 } else {
23615 Ok(row_func)
23616 }
23617 } else {
23618 Ok(row_func)
23619 }
23620 } else {
23621 Ok(Expression::Function(Box::new(Function::new(
23622 "ROW".to_string(),
23623 named_args,
23624 ))))
23625 }
23626 } else {
23627 Ok(e)
23628 }
23629 }
23630
23631 Action::SparkStructConvert => {
23632 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
23633 // or DuckDB {'name': val, ...}
23634 if let Expression::Function(f) = e {
23635 // Extract name-value pairs from aliased args
23636 let mut pairs: Vec<(String, Expression)> = Vec::new();
23637 for arg in &f.args {
23638 match arg {
23639 Expression::Alias(a) => {
23640 pairs.push((a.alias.name.clone(), a.this.clone()));
23641 }
23642 _ => {
23643 pairs.push((String::new(), arg.clone()));
23644 }
23645 }
23646 }
23647
23648 match target {
23649 DialectType::DuckDB => {
23650 // Convert to DuckDB struct literal {'name': value, ...}
23651 let mut keys = Vec::new();
23652 let mut values = Vec::new();
23653 for (name, value) in &pairs {
23654 keys.push(Expression::Literal(Box::new(Literal::String(
23655 name.clone(),
23656 ))));
23657 values.push(value.clone());
23658 }
23659 Ok(Expression::MapFunc(Box::new(
23660 crate::expressions::MapConstructor {
23661 keys,
23662 values,
23663 curly_brace_syntax: true,
23664 with_map_keyword: false,
23665 },
23666 )))
23667 }
23668 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23669 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
23670 let row_args: Vec<Expression> =
23671 pairs.iter().map(|(_, v)| v.clone()).collect();
23672 let row_func = Expression::Function(Box::new(Function::new(
23673 "ROW".to_string(),
23674 row_args,
23675 )));
23676
23677 // Infer types
23678 let mut all_inferred = true;
23679 let mut fields = Vec::new();
23680 for (name, value) in &pairs {
23681 let inferred_type = match value {
23682 Expression::Literal(lit)
23683 if matches!(lit.as_ref(), Literal::Number(_)) =>
23684 {
23685 let Literal::Number(n) = lit.as_ref() else {
23686 unreachable!()
23687 };
23688 if n.contains('.') {
23689 Some(DataType::Double {
23690 precision: None,
23691 scale: None,
23692 })
23693 } else {
23694 Some(DataType::Int {
23695 length: None,
23696 integer_spelling: true,
23697 })
23698 }
23699 }
23700 Expression::Literal(lit)
23701 if matches!(lit.as_ref(), Literal::String(_)) =>
23702 {
23703 Some(DataType::VarChar {
23704 length: None,
23705 parenthesized_length: false,
23706 })
23707 }
23708 Expression::Boolean(_) => Some(DataType::Boolean),
23709 _ => None,
23710 };
23711 if let Some(dt) = inferred_type {
23712 fields.push(crate::expressions::StructField::new(
23713 name.clone(),
23714 dt,
23715 ));
23716 } else {
23717 all_inferred = false;
23718 break;
23719 }
23720 }
23721
23722 if all_inferred && !fields.is_empty() {
23723 let row_type = DataType::Struct {
23724 fields,
23725 nested: true,
23726 };
23727 Ok(Expression::Cast(Box::new(Cast {
23728 this: row_func,
23729 to: row_type,
23730 trailing_comments: Vec::new(),
23731 double_colon_syntax: false,
23732 format: None,
23733 default: None,
23734 inferred_type: None,
23735 })))
23736 } else {
23737 Ok(row_func)
23738 }
23739 }
23740 _ => Ok(Expression::Function(f)),
23741 }
23742 } else {
23743 Ok(e)
23744 }
23745 }
23746
23747 Action::ApproxCountDistinctToApproxDistinct => {
23748 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
23749 if let Expression::ApproxCountDistinct(f) = e {
23750 Ok(Expression::ApproxDistinct(f))
23751 } else {
23752 Ok(e)
23753 }
23754 }
23755
23756 Action::CollectListToArrayAgg => {
23757 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
23758 if let Expression::AggregateFunction(f) = e {
23759 let filter_expr = if !f.args.is_empty() {
23760 let arg = f.args[0].clone();
23761 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
23762 this: arg,
23763 not: true,
23764 postfix_form: false,
23765 })))
23766 } else {
23767 None
23768 };
23769 let agg = crate::expressions::AggFunc {
23770 this: if f.args.is_empty() {
23771 Expression::Null(crate::expressions::Null)
23772 } else {
23773 f.args[0].clone()
23774 },
23775 distinct: f.distinct,
23776 order_by: f.order_by.clone(),
23777 filter: filter_expr,
23778 ignore_nulls: None,
23779 name: None,
23780 having_max: None,
23781 limit: None,
23782 inferred_type: None,
23783 };
23784 Ok(Expression::ArrayAgg(Box::new(agg)))
23785 } else {
23786 Ok(e)
23787 }
23788 }
23789
23790 Action::CollectSetConvert => {
23791 // COLLECT_SET(x) -> target-specific
23792 if let Expression::AggregateFunction(f) = e {
23793 match target {
23794 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
23795 crate::expressions::AggregateFunction {
23796 name: "SET_AGG".to_string(),
23797 args: f.args,
23798 distinct: false,
23799 order_by: f.order_by,
23800 filter: f.filter,
23801 limit: f.limit,
23802 ignore_nulls: f.ignore_nulls,
23803 inferred_type: None,
23804 },
23805 ))),
23806 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
23807 crate::expressions::AggregateFunction {
23808 name: "ARRAY_UNIQUE_AGG".to_string(),
23809 args: f.args,
23810 distinct: false,
23811 order_by: f.order_by,
23812 filter: f.filter,
23813 limit: f.limit,
23814 ignore_nulls: f.ignore_nulls,
23815 inferred_type: None,
23816 },
23817 ))),
23818 DialectType::Trino | DialectType::DuckDB => {
23819 let agg = crate::expressions::AggFunc {
23820 this: if f.args.is_empty() {
23821 Expression::Null(crate::expressions::Null)
23822 } else {
23823 f.args[0].clone()
23824 },
23825 distinct: true,
23826 order_by: Vec::new(),
23827 filter: None,
23828 ignore_nulls: None,
23829 name: None,
23830 having_max: None,
23831 limit: None,
23832 inferred_type: None,
23833 };
23834 Ok(Expression::ArrayAgg(Box::new(agg)))
23835 }
23836 _ => Ok(Expression::AggregateFunction(f)),
23837 }
23838 } else {
23839 Ok(e)
23840 }
23841 }
23842
23843 Action::PercentileConvert => {
23844 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
23845 if let Expression::AggregateFunction(f) = e {
23846 let name = match target {
23847 DialectType::DuckDB => "QUANTILE",
23848 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
23849 _ => "PERCENTILE",
23850 };
23851 Ok(Expression::AggregateFunction(Box::new(
23852 crate::expressions::AggregateFunction {
23853 name: name.to_string(),
23854 args: f.args,
23855 distinct: f.distinct,
23856 order_by: f.order_by,
23857 filter: f.filter,
23858 limit: f.limit,
23859 ignore_nulls: f.ignore_nulls,
23860 inferred_type: None,
23861 },
23862 )))
23863 } else {
23864 Ok(e)
23865 }
23866 }
23867
23868 Action::CorrIsnanWrap => {
23869 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
23870 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
23871 let corr_clone = e.clone();
23872 let isnan = Expression::Function(Box::new(Function::new(
23873 "ISNAN".to_string(),
23874 vec![corr_clone.clone()],
23875 )));
23876 let case_expr = Expression::Case(Box::new(Case {
23877 operand: None,
23878 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
23879 else_: Some(corr_clone),
23880 comments: Vec::new(),
23881 inferred_type: None,
23882 }));
23883 Ok(case_expr)
23884 }
23885
23886 Action::TruncToDateTrunc => {
23887 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
23888 if let Expression::Function(f) = e {
23889 if f.args.len() == 2 {
23890 let timestamp = f.args[0].clone();
23891 let unit_expr = f.args[1].clone();
23892
23893 if matches!(target, DialectType::ClickHouse) {
23894 // For ClickHouse, produce Expression::DateTrunc which the generator
23895 // outputs as DATE_TRUNC(...) without going through the ClickHouse
23896 // target transform that would convert it to dateTrunc
23897 let unit_str = Self::get_unit_str_static(&unit_expr);
23898 let dt_field = match unit_str.as_str() {
23899 "YEAR" => DateTimeField::Year,
23900 "MONTH" => DateTimeField::Month,
23901 "DAY" => DateTimeField::Day,
23902 "HOUR" => DateTimeField::Hour,
23903 "MINUTE" => DateTimeField::Minute,
23904 "SECOND" => DateTimeField::Second,
23905 "WEEK" => DateTimeField::Week,
23906 "QUARTER" => DateTimeField::Quarter,
23907 _ => DateTimeField::Custom(unit_str),
23908 };
23909 Ok(Expression::DateTrunc(Box::new(
23910 crate::expressions::DateTruncFunc {
23911 this: timestamp,
23912 unit: dt_field,
23913 },
23914 )))
23915 } else {
23916 let new_args = vec![unit_expr, timestamp];
23917 Ok(Expression::Function(Box::new(Function::new(
23918 "DATE_TRUNC".to_string(),
23919 new_args,
23920 ))))
23921 }
23922 } else {
23923 Ok(Expression::Function(f))
23924 }
23925 } else {
23926 Ok(e)
23927 }
23928 }
23929
23930 Action::ArrayContainsConvert => {
23931 if let Expression::ArrayContains(f) = e {
23932 match target {
23933 DialectType::Presto | DialectType::Trino => {
23934 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
23935 Ok(Expression::Function(Box::new(Function::new(
23936 "CONTAINS".to_string(),
23937 vec![f.this, f.expression],
23938 ))))
23939 }
23940 DialectType::Snowflake => {
23941 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
23942 let cast_val =
23943 Expression::Cast(Box::new(crate::expressions::Cast {
23944 this: f.expression,
23945 to: crate::expressions::DataType::Custom {
23946 name: "VARIANT".to_string(),
23947 },
23948 trailing_comments: Vec::new(),
23949 double_colon_syntax: false,
23950 format: None,
23951 default: None,
23952 inferred_type: None,
23953 }));
23954 Ok(Expression::Function(Box::new(Function::new(
23955 "ARRAY_CONTAINS".to_string(),
23956 vec![cast_val, f.this],
23957 ))))
23958 }
23959 _ => Ok(Expression::ArrayContains(f)),
23960 }
23961 } else {
23962 Ok(e)
23963 }
23964 }
23965
23966 Action::ArrayExceptConvert => {
23967 if let Expression::ArrayExcept(f) = e {
23968 let source_arr = f.this;
23969 let exclude_arr = f.expression;
23970 match target {
23971 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
23972 // Snowflake ARRAY_EXCEPT -> DuckDB bag semantics:
23973 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
23974 // ELSE LIST_TRANSFORM(LIST_FILTER(
23975 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source))),
23976 // pair -> (LENGTH(LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1]))
23977 // > LENGTH(LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])))),
23978 // pair -> pair[1])
23979 // END
23980
23981 // Build null check
23982 let source_is_null =
23983 Expression::IsNull(Box::new(crate::expressions::IsNull {
23984 this: source_arr.clone(),
23985 not: false,
23986 postfix_form: false,
23987 }));
23988 let exclude_is_null =
23989 Expression::IsNull(Box::new(crate::expressions::IsNull {
23990 this: exclude_arr.clone(),
23991 not: false,
23992 postfix_form: false,
23993 }));
23994 let null_check =
23995 Expression::Or(Box::new(crate::expressions::BinaryOp {
23996 left: source_is_null,
23997 right: exclude_is_null,
23998 left_comments: vec![],
23999 operator_comments: vec![],
24000 trailing_comments: vec![],
24001 inferred_type: None,
24002 }));
24003
24004 // GENERATE_SERIES(1, LENGTH(source))
24005 let gen_series = Expression::Function(Box::new(Function::new(
24006 "GENERATE_SERIES".to_string(),
24007 vec![
24008 Expression::number(1),
24009 Expression::Function(Box::new(Function::new(
24010 "LENGTH".to_string(),
24011 vec![source_arr.clone()],
24012 ))),
24013 ],
24014 )));
24015
24016 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source)))
24017 let list_zip = Expression::Function(Box::new(Function::new(
24018 "LIST_ZIP".to_string(),
24019 vec![source_arr.clone(), gen_series],
24020 )));
24021
24022 // pair[1] and pair[2]
24023 let pair_col = Expression::column("pair");
24024 let pair_1 = Expression::Subscript(Box::new(
24025 crate::expressions::Subscript {
24026 this: pair_col.clone(),
24027 index: Expression::number(1),
24028 },
24029 ));
24030 let pair_2 = Expression::Subscript(Box::new(
24031 crate::expressions::Subscript {
24032 this: pair_col.clone(),
24033 index: Expression::number(2),
24034 },
24035 ));
24036
24037 // source[1:pair[2]]
24038 let source_slice = Expression::ArraySlice(Box::new(
24039 crate::expressions::ArraySlice {
24040 this: source_arr.clone(),
24041 start: Some(Expression::number(1)),
24042 end: Some(pair_2),
24043 },
24044 ));
24045
24046 let e_col = Expression::column("e");
24047
24048 // e -> e IS NOT DISTINCT FROM pair[1]
24049 let inner_lambda1 =
24050 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24051 parameters: vec![crate::expressions::Identifier::new("e")],
24052 body: Expression::NullSafeEq(Box::new(
24053 crate::expressions::BinaryOp {
24054 left: e_col.clone(),
24055 right: pair_1.clone(),
24056 left_comments: vec![],
24057 operator_comments: vec![],
24058 trailing_comments: vec![],
24059 inferred_type: None,
24060 },
24061 )),
24062 colon: false,
24063 parameter_types: vec![],
24064 }));
24065
24066 // LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
24067 let inner_filter1 = Expression::Function(Box::new(Function::new(
24068 "LIST_FILTER".to_string(),
24069 vec![source_slice, inner_lambda1],
24070 )));
24071
24072 // LENGTH(LIST_FILTER(source[1:pair[2]], ...))
24073 let len1 = Expression::Function(Box::new(Function::new(
24074 "LENGTH".to_string(),
24075 vec![inner_filter1],
24076 )));
24077
24078 // e -> e IS NOT DISTINCT FROM pair[1]
24079 let inner_lambda2 =
24080 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24081 parameters: vec![crate::expressions::Identifier::new("e")],
24082 body: Expression::NullSafeEq(Box::new(
24083 crate::expressions::BinaryOp {
24084 left: e_col,
24085 right: pair_1.clone(),
24086 left_comments: vec![],
24087 operator_comments: vec![],
24088 trailing_comments: vec![],
24089 inferred_type: None,
24090 },
24091 )),
24092 colon: false,
24093 parameter_types: vec![],
24094 }));
24095
24096 // LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])
24097 let inner_filter2 = Expression::Function(Box::new(Function::new(
24098 "LIST_FILTER".to_string(),
24099 vec![exclude_arr.clone(), inner_lambda2],
24100 )));
24101
24102 // LENGTH(LIST_FILTER(exclude, ...))
24103 let len2 = Expression::Function(Box::new(Function::new(
24104 "LENGTH".to_string(),
24105 vec![inner_filter2],
24106 )));
24107
24108 // (LENGTH(...) > LENGTH(...))
24109 let cond = Expression::Paren(Box::new(Paren {
24110 this: Expression::Gt(Box::new(crate::expressions::BinaryOp {
24111 left: len1,
24112 right: len2,
24113 left_comments: vec![],
24114 operator_comments: vec![],
24115 trailing_comments: vec![],
24116 inferred_type: None,
24117 })),
24118 trailing_comments: vec![],
24119 }));
24120
24121 // pair -> (condition)
24122 let filter_lambda =
24123 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24124 parameters: vec![crate::expressions::Identifier::new(
24125 "pair",
24126 )],
24127 body: cond,
24128 colon: false,
24129 parameter_types: vec![],
24130 }));
24131
24132 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
24133 let outer_filter = Expression::Function(Box::new(Function::new(
24134 "LIST_FILTER".to_string(),
24135 vec![list_zip, filter_lambda],
24136 )));
24137
24138 // pair -> pair[1]
24139 let transform_lambda =
24140 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24141 parameters: vec![crate::expressions::Identifier::new(
24142 "pair",
24143 )],
24144 body: pair_1,
24145 colon: false,
24146 parameter_types: vec![],
24147 }));
24148
24149 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
24150 let list_transform = Expression::Function(Box::new(Function::new(
24151 "LIST_TRANSFORM".to_string(),
24152 vec![outer_filter, transform_lambda],
24153 )));
24154
24155 Ok(Expression::Case(Box::new(Case {
24156 operand: None,
24157 whens: vec![(null_check, Expression::Null(Null))],
24158 else_: Some(list_transform),
24159 comments: Vec::new(),
24160 inferred_type: None,
24161 })))
24162 }
24163 DialectType::DuckDB => {
24164 // ARRAY_EXCEPT(source, exclude) -> set semantics for DuckDB:
24165 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
24166 // ELSE LIST_FILTER(LIST_DISTINCT(source),
24167 // e -> LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)) = 0)
24168 // END
24169
24170 // Build: source IS NULL
24171 let source_is_null =
24172 Expression::IsNull(Box::new(crate::expressions::IsNull {
24173 this: source_arr.clone(),
24174 not: false,
24175 postfix_form: false,
24176 }));
24177 // Build: exclude IS NULL
24178 let exclude_is_null =
24179 Expression::IsNull(Box::new(crate::expressions::IsNull {
24180 this: exclude_arr.clone(),
24181 not: false,
24182 postfix_form: false,
24183 }));
24184 // source IS NULL OR exclude IS NULL
24185 let null_check =
24186 Expression::Or(Box::new(crate::expressions::BinaryOp {
24187 left: source_is_null,
24188 right: exclude_is_null,
24189 left_comments: vec![],
24190 operator_comments: vec![],
24191 trailing_comments: vec![],
24192 inferred_type: None,
24193 }));
24194
24195 // LIST_DISTINCT(source)
24196 let list_distinct = Expression::Function(Box::new(Function::new(
24197 "LIST_DISTINCT".to_string(),
24198 vec![source_arr.clone()],
24199 )));
24200
24201 // x IS NOT DISTINCT FROM e
24202 let x_col = Expression::column("x");
24203 let e_col = Expression::column("e");
24204 let is_not_distinct = Expression::NullSafeEq(Box::new(
24205 crate::expressions::BinaryOp {
24206 left: x_col,
24207 right: e_col.clone(),
24208 left_comments: vec![],
24209 operator_comments: vec![],
24210 trailing_comments: vec![],
24211 inferred_type: None,
24212 },
24213 ));
24214
24215 // x -> x IS NOT DISTINCT FROM e
24216 let inner_lambda =
24217 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24218 parameters: vec![crate::expressions::Identifier::new("x")],
24219 body: is_not_distinct,
24220 colon: false,
24221 parameter_types: vec![],
24222 }));
24223
24224 // LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)
24225 let inner_list_filter =
24226 Expression::Function(Box::new(Function::new(
24227 "LIST_FILTER".to_string(),
24228 vec![exclude_arr.clone(), inner_lambda],
24229 )));
24230
24231 // LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e))
24232 let len_inner = Expression::Function(Box::new(Function::new(
24233 "LENGTH".to_string(),
24234 vec![inner_list_filter],
24235 )));
24236
24237 // LENGTH(...) = 0
24238 let eq_zero =
24239 Expression::Eq(Box::new(crate::expressions::BinaryOp {
24240 left: len_inner,
24241 right: Expression::number(0),
24242 left_comments: vec![],
24243 operator_comments: vec![],
24244 trailing_comments: vec![],
24245 inferred_type: None,
24246 }));
24247
24248 // e -> LENGTH(LIST_FILTER(...)) = 0
24249 let outer_lambda =
24250 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24251 parameters: vec![crate::expressions::Identifier::new("e")],
24252 body: eq_zero,
24253 colon: false,
24254 parameter_types: vec![],
24255 }));
24256
24257 // LIST_FILTER(LIST_DISTINCT(source), e -> ...)
24258 let outer_list_filter =
24259 Expression::Function(Box::new(Function::new(
24260 "LIST_FILTER".to_string(),
24261 vec![list_distinct, outer_lambda],
24262 )));
24263
24264 // CASE WHEN ... IS NULL ... THEN NULL ELSE LIST_FILTER(...) END
24265 Ok(Expression::Case(Box::new(Case {
24266 operand: None,
24267 whens: vec![(null_check, Expression::Null(Null))],
24268 else_: Some(outer_list_filter),
24269 comments: Vec::new(),
24270 inferred_type: None,
24271 })))
24272 }
24273 DialectType::Snowflake => {
24274 // Snowflake: ARRAY_EXCEPT(source, exclude) - keep as-is
24275 Ok(Expression::ArrayExcept(Box::new(
24276 crate::expressions::BinaryFunc {
24277 this: source_arr,
24278 expression: exclude_arr,
24279 original_name: None,
24280 inferred_type: None,
24281 },
24282 )))
24283 }
24284 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24285 // Presto/Trino: ARRAY_EXCEPT(source, exclude) - keep function name, array syntax already converted
24286 Ok(Expression::Function(Box::new(Function::new(
24287 "ARRAY_EXCEPT".to_string(),
24288 vec![source_arr, exclude_arr],
24289 ))))
24290 }
24291 _ => Ok(Expression::ArrayExcept(Box::new(
24292 crate::expressions::BinaryFunc {
24293 this: source_arr,
24294 expression: exclude_arr,
24295 original_name: None,
24296 inferred_type: None,
24297 },
24298 ))),
24299 }
24300 } else {
24301 Ok(e)
24302 }
24303 }
24304
24305 Action::RegexpLikeExasolAnchor => {
24306 // RegexpLike -> Exasol: wrap pattern with .*...*
24307 // Exasol REGEXP_LIKE does full-string match, but RLIKE/REGEXP from other
24308 // dialects does partial match, so we need to anchor with .* on both sides
24309 if let Expression::RegexpLike(mut f) = e {
24310 match &f.pattern {
24311 Expression::Literal(lit)
24312 if matches!(lit.as_ref(), Literal::String(_)) =>
24313 {
24314 let Literal::String(s) = lit.as_ref() else {
24315 unreachable!()
24316 };
24317 // String literal: wrap with .*...*
24318 f.pattern = Expression::Literal(Box::new(Literal::String(
24319 format!(".*{}.*", s),
24320 )));
24321 }
24322 _ => {
24323 // Non-literal: wrap with CONCAT('.*', pattern, '.*')
24324 f.pattern =
24325 Expression::Paren(Box::new(crate::expressions::Paren {
24326 this: Expression::Concat(Box::new(
24327 crate::expressions::BinaryOp {
24328 left: Expression::Concat(Box::new(
24329 crate::expressions::BinaryOp {
24330 left: Expression::Literal(Box::new(
24331 Literal::String(".*".to_string()),
24332 )),
24333 right: f.pattern,
24334 left_comments: vec![],
24335 operator_comments: vec![],
24336 trailing_comments: vec![],
24337 inferred_type: None,
24338 },
24339 )),
24340 right: Expression::Literal(Box::new(
24341 Literal::String(".*".to_string()),
24342 )),
24343 left_comments: vec![],
24344 operator_comments: vec![],
24345 trailing_comments: vec![],
24346 inferred_type: None,
24347 },
24348 )),
24349 trailing_comments: vec![],
24350 }));
24351 }
24352 }
24353 Ok(Expression::RegexpLike(f))
24354 } else {
24355 Ok(e)
24356 }
24357 }
24358
24359 Action::ArrayPositionSnowflakeSwap => {
24360 // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
24361 if let Expression::ArrayPosition(f) = e {
24362 Ok(Expression::ArrayPosition(Box::new(
24363 crate::expressions::BinaryFunc {
24364 this: f.expression,
24365 expression: f.this,
24366 original_name: f.original_name,
24367 inferred_type: f.inferred_type,
24368 },
24369 )))
24370 } else {
24371 Ok(e)
24372 }
24373 }
24374
24375 Action::SnowflakeArrayPositionToDuckDB => {
24376 // Snowflake ARRAY_POSITION(value, array) -> DuckDB ARRAY_POSITION(array, value) - 1
24377 // Snowflake uses 0-based indexing, DuckDB uses 1-based
24378 // The parser has this=value, expression=array (Snowflake order)
24379 if let Expression::ArrayPosition(f) = e {
24380 // Create ARRAY_POSITION(array, value) in standard order
24381 let standard_pos =
24382 Expression::ArrayPosition(Box::new(crate::expressions::BinaryFunc {
24383 this: f.expression, // array
24384 expression: f.this, // value
24385 original_name: f.original_name,
24386 inferred_type: f.inferred_type,
24387 }));
24388 // Subtract 1 for zero-based indexing
24389 Ok(Expression::Sub(Box::new(BinaryOp {
24390 left: standard_pos,
24391 right: Expression::number(1),
24392 left_comments: vec![],
24393 operator_comments: vec![],
24394 trailing_comments: vec![],
24395 inferred_type: None,
24396 })))
24397 } else {
24398 Ok(e)
24399 }
24400 }
24401
24402 Action::ArrayDistinctConvert => {
24403 // ARRAY_DISTINCT(arr) -> DuckDB NULL-aware CASE:
24404 // CASE WHEN ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
24405 // THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(arr, _u -> NOT _u IS NULL)), NULL)
24406 // ELSE LIST_DISTINCT(arr)
24407 // END
24408 if let Expression::ArrayDistinct(f) = e {
24409 let arr = f.this;
24410
24411 // ARRAY_LENGTH(arr)
24412 let array_length = Expression::Function(Box::new(Function::new(
24413 "ARRAY_LENGTH".to_string(),
24414 vec![arr.clone()],
24415 )));
24416 // LIST_COUNT(arr)
24417 let list_count = Expression::Function(Box::new(Function::new(
24418 "LIST_COUNT".to_string(),
24419 vec![arr.clone()],
24420 )));
24421 // ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
24422 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
24423 left: array_length,
24424 right: list_count,
24425 left_comments: vec![],
24426 operator_comments: vec![],
24427 trailing_comments: vec![],
24428 inferred_type: None,
24429 }));
24430
24431 // _u column
24432 let u_col = Expression::column("_u");
24433 // NOT _u IS NULL
24434 let u_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
24435 this: u_col.clone(),
24436 not: false,
24437 postfix_form: false,
24438 }));
24439 let not_u_is_null =
24440 Expression::Not(Box::new(crate::expressions::UnaryOp {
24441 this: u_is_null,
24442 inferred_type: None,
24443 }));
24444 // _u -> NOT _u IS NULL
24445 let filter_lambda =
24446 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24447 parameters: vec![crate::expressions::Identifier::new("_u")],
24448 body: not_u_is_null,
24449 colon: false,
24450 parameter_types: vec![],
24451 }));
24452 // LIST_FILTER(arr, _u -> NOT _u IS NULL)
24453 let list_filter = Expression::Function(Box::new(Function::new(
24454 "LIST_FILTER".to_string(),
24455 vec![arr.clone(), filter_lambda],
24456 )));
24457 // LIST_DISTINCT(LIST_FILTER(arr, ...))
24458 let list_distinct_filtered = Expression::Function(Box::new(Function::new(
24459 "LIST_DISTINCT".to_string(),
24460 vec![list_filter],
24461 )));
24462 // LIST_APPEND(LIST_DISTINCT(LIST_FILTER(...)), NULL)
24463 let list_append = Expression::Function(Box::new(Function::new(
24464 "LIST_APPEND".to_string(),
24465 vec![list_distinct_filtered, Expression::Null(Null)],
24466 )));
24467
24468 // LIST_DISTINCT(arr)
24469 let list_distinct = Expression::Function(Box::new(Function::new(
24470 "LIST_DISTINCT".to_string(),
24471 vec![arr],
24472 )));
24473
24474 // CASE WHEN neq THEN list_append ELSE list_distinct END
24475 Ok(Expression::Case(Box::new(Case {
24476 operand: None,
24477 whens: vec![(neq, list_append)],
24478 else_: Some(list_distinct),
24479 comments: Vec::new(),
24480 inferred_type: None,
24481 })))
24482 } else {
24483 Ok(e)
24484 }
24485 }
24486
24487 Action::ArrayDistinctClickHouse => {
24488 // ARRAY_DISTINCT(arr) -> arrayDistinct(arr) for ClickHouse
24489 if let Expression::ArrayDistinct(f) = e {
24490 Ok(Expression::Function(Box::new(Function::new(
24491 "arrayDistinct".to_string(),
24492 vec![f.this],
24493 ))))
24494 } else {
24495 Ok(e)
24496 }
24497 }
24498
24499 Action::ArrayContainsDuckDBConvert => {
24500 // Snowflake ARRAY_CONTAINS(value, array) -> DuckDB NULL-aware:
24501 // CASE WHEN value IS NULL
24502 // THEN NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
24503 // ELSE ARRAY_CONTAINS(array, value)
24504 // END
24505 // Note: In Rust AST from Snowflake parse, this=value (first arg), expression=array (second arg)
24506 if let Expression::ArrayContains(f) = e {
24507 let value = f.this;
24508 let array = f.expression;
24509
24510 // value IS NULL
24511 let value_is_null =
24512 Expression::IsNull(Box::new(crate::expressions::IsNull {
24513 this: value.clone(),
24514 not: false,
24515 postfix_form: false,
24516 }));
24517
24518 // ARRAY_LENGTH(array)
24519 let array_length = Expression::Function(Box::new(Function::new(
24520 "ARRAY_LENGTH".to_string(),
24521 vec![array.clone()],
24522 )));
24523 // LIST_COUNT(array)
24524 let list_count = Expression::Function(Box::new(Function::new(
24525 "LIST_COUNT".to_string(),
24526 vec![array.clone()],
24527 )));
24528 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
24529 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
24530 left: array_length,
24531 right: list_count,
24532 left_comments: vec![],
24533 operator_comments: vec![],
24534 trailing_comments: vec![],
24535 inferred_type: None,
24536 }));
24537 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
24538 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
24539 this: Box::new(neq),
24540 expression: Box::new(Expression::Boolean(
24541 crate::expressions::BooleanLiteral { value: false },
24542 )),
24543 }));
24544
24545 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
24546 let array_contains = Expression::Function(Box::new(Function::new(
24547 "ARRAY_CONTAINS".to_string(),
24548 vec![array, value],
24549 )));
24550
24551 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
24552 Ok(Expression::Case(Box::new(Case {
24553 operand: None,
24554 whens: vec![(value_is_null, nullif)],
24555 else_: Some(array_contains),
24556 comments: Vec::new(),
24557 inferred_type: None,
24558 })))
24559 } else {
24560 Ok(e)
24561 }
24562 }
24563
24564 Action::StrPositionExpand => {
24565 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
24566 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
24567 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
24568 if let Expression::StrPosition(sp) = e {
24569 let crate::expressions::StrPosition {
24570 this,
24571 substr,
24572 position,
24573 occurrence,
24574 } = *sp;
24575 let string = *this;
24576 let substr_expr = match substr {
24577 Some(s) => *s,
24578 None => Expression::Null(Null),
24579 };
24580 let pos = match position {
24581 Some(p) => *p,
24582 None => Expression::number(1),
24583 };
24584
24585 // SUBSTRING(string, pos)
24586 let substring_call = Expression::Function(Box::new(Function::new(
24587 "SUBSTRING".to_string(),
24588 vec![string.clone(), pos.clone()],
24589 )));
24590 // STRPOS(SUBSTRING(string, pos), substr)
24591 let strpos_call = Expression::Function(Box::new(Function::new(
24592 "STRPOS".to_string(),
24593 vec![substring_call, substr_expr.clone()],
24594 )));
24595 // STRPOS(...) + pos - 1
24596 let pos_adjusted =
24597 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
24598 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
24599 strpos_call.clone(),
24600 pos.clone(),
24601 ))),
24602 Expression::number(1),
24603 )));
24604 // STRPOS(...) = 0
24605 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
24606 strpos_call.clone(),
24607 Expression::number(0),
24608 )));
24609
24610 match target {
24611 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24612 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
24613 Ok(Expression::Function(Box::new(Function::new(
24614 "IF".to_string(),
24615 vec![is_zero, Expression::number(0), pos_adjusted],
24616 ))))
24617 }
24618 DialectType::DuckDB => {
24619 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
24620 Ok(Expression::Case(Box::new(Case {
24621 operand: None,
24622 whens: vec![(is_zero, Expression::number(0))],
24623 else_: Some(pos_adjusted),
24624 comments: Vec::new(),
24625 inferred_type: None,
24626 })))
24627 }
24628 _ => {
24629 // Reconstruct StrPosition
24630 Ok(Expression::StrPosition(Box::new(
24631 crate::expressions::StrPosition {
24632 this: Box::new(string),
24633 substr: Some(Box::new(substr_expr)),
24634 position: Some(Box::new(pos)),
24635 occurrence,
24636 },
24637 )))
24638 }
24639 }
24640 } else {
24641 Ok(e)
24642 }
24643 }
24644
24645 Action::MonthsBetweenConvert => {
24646 if let Expression::MonthsBetween(mb) = e {
24647 let crate::expressions::BinaryFunc {
24648 this: end_date,
24649 expression: start_date,
24650 ..
24651 } = *mb;
24652 match target {
24653 DialectType::DuckDB => {
24654 let cast_end = Self::ensure_cast_date(end_date);
24655 let cast_start = Self::ensure_cast_date(start_date);
24656 let dd = Expression::Function(Box::new(Function::new(
24657 "DATE_DIFF".to_string(),
24658 vec![
24659 Expression::string("MONTH"),
24660 cast_start.clone(),
24661 cast_end.clone(),
24662 ],
24663 )));
24664 let day_end = Expression::Function(Box::new(Function::new(
24665 "DAY".to_string(),
24666 vec![cast_end.clone()],
24667 )));
24668 let day_start = Expression::Function(Box::new(Function::new(
24669 "DAY".to_string(),
24670 vec![cast_start.clone()],
24671 )));
24672 let last_day_end = Expression::Function(Box::new(Function::new(
24673 "LAST_DAY".to_string(),
24674 vec![cast_end.clone()],
24675 )));
24676 let last_day_start = Expression::Function(Box::new(Function::new(
24677 "LAST_DAY".to_string(),
24678 vec![cast_start.clone()],
24679 )));
24680 let day_last_end = Expression::Function(Box::new(Function::new(
24681 "DAY".to_string(),
24682 vec![last_day_end],
24683 )));
24684 let day_last_start = Expression::Function(Box::new(Function::new(
24685 "DAY".to_string(),
24686 vec![last_day_start],
24687 )));
24688 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
24689 day_end.clone(),
24690 day_last_end,
24691 )));
24692 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
24693 day_start.clone(),
24694 day_last_start,
24695 )));
24696 let both_cond =
24697 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
24698 let day_diff =
24699 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
24700 let day_diff_paren =
24701 Expression::Paren(Box::new(crate::expressions::Paren {
24702 this: day_diff,
24703 trailing_comments: Vec::new(),
24704 }));
24705 let frac = Expression::Div(Box::new(BinaryOp::new(
24706 day_diff_paren,
24707 Expression::Literal(Box::new(Literal::Number(
24708 "31.0".to_string(),
24709 ))),
24710 )));
24711 let case_expr = Expression::Case(Box::new(Case {
24712 operand: None,
24713 whens: vec![(both_cond, Expression::number(0))],
24714 else_: Some(frac),
24715 comments: Vec::new(),
24716 inferred_type: None,
24717 }));
24718 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
24719 }
24720 DialectType::Snowflake | DialectType::Redshift => {
24721 let unit = Expression::Identifier(Identifier::new("MONTH"));
24722 Ok(Expression::Function(Box::new(Function::new(
24723 "DATEDIFF".to_string(),
24724 vec![unit, start_date, end_date],
24725 ))))
24726 }
24727 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24728 Ok(Expression::Function(Box::new(Function::new(
24729 "DATE_DIFF".to_string(),
24730 vec![Expression::string("MONTH"), start_date, end_date],
24731 ))))
24732 }
24733 _ => Ok(Expression::MonthsBetween(Box::new(
24734 crate::expressions::BinaryFunc {
24735 this: end_date,
24736 expression: start_date,
24737 original_name: None,
24738 inferred_type: None,
24739 },
24740 ))),
24741 }
24742 } else {
24743 Ok(e)
24744 }
24745 }
24746
24747 Action::AddMonthsConvert => {
24748 if let Expression::AddMonths(am) = e {
24749 let date = am.this;
24750 let val = am.expression;
24751 match target {
24752 DialectType::TSQL | DialectType::Fabric => {
24753 let cast_date = Self::ensure_cast_datetime2(date);
24754 Ok(Expression::Function(Box::new(Function::new(
24755 "DATEADD".to_string(),
24756 vec![
24757 Expression::Identifier(Identifier::new("MONTH")),
24758 val,
24759 cast_date,
24760 ],
24761 ))))
24762 }
24763 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
24764 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
24765 // Optionally wrapped in CAST(... AS type) if the input had a specific type
24766
24767 // Determine the cast type from the date expression
24768 let (cast_date, return_type) = match &date {
24769 Expression::Literal(lit)
24770 if matches!(lit.as_ref(), Literal::String(_)) =>
24771 {
24772 // String literal: CAST(str AS TIMESTAMP), no outer CAST
24773 (
24774 Expression::Cast(Box::new(Cast {
24775 this: date.clone(),
24776 to: DataType::Timestamp {
24777 precision: None,
24778 timezone: false,
24779 },
24780 trailing_comments: Vec::new(),
24781 double_colon_syntax: false,
24782 format: None,
24783 default: None,
24784 inferred_type: None,
24785 })),
24786 None,
24787 )
24788 }
24789 Expression::Cast(c) => {
24790 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
24791 (date.clone(), Some(c.to.clone()))
24792 }
24793 _ => {
24794 // Expression or NULL::TYPE - keep as-is, check for cast type
24795 if let Expression::Cast(c) = &date {
24796 (date.clone(), Some(c.to.clone()))
24797 } else {
24798 (date.clone(), None)
24799 }
24800 }
24801 };
24802
24803 // Build the interval expression
24804 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
24805 // For integer values, use INTERVAL val MONTH
24806 let is_non_integer_val = match &val {
24807 Expression::Literal(lit)
24808 if matches!(lit.as_ref(), Literal::Number(_)) =>
24809 {
24810 let Literal::Number(n) = lit.as_ref() else {
24811 unreachable!()
24812 };
24813 n.contains('.')
24814 }
24815 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
24816 Expression::Neg(n) => {
24817 if let Expression::Literal(lit) = &n.this {
24818 if let Literal::Number(s) = lit.as_ref() {
24819 s.contains('.')
24820 } else {
24821 false
24822 }
24823 } else {
24824 false
24825 }
24826 }
24827 _ => false,
24828 };
24829
24830 let add_interval = if is_non_integer_val {
24831 // TO_MONTHS(CAST(ROUND(val) AS INT))
24832 let round_val = Expression::Function(Box::new(Function::new(
24833 "ROUND".to_string(),
24834 vec![val.clone()],
24835 )));
24836 let cast_int = Expression::Cast(Box::new(Cast {
24837 this: round_val,
24838 to: DataType::Int {
24839 length: None,
24840 integer_spelling: false,
24841 },
24842 trailing_comments: Vec::new(),
24843 double_colon_syntax: false,
24844 format: None,
24845 default: None,
24846 inferred_type: None,
24847 }));
24848 Expression::Function(Box::new(Function::new(
24849 "TO_MONTHS".to_string(),
24850 vec![cast_int],
24851 )))
24852 } else {
24853 // INTERVAL val MONTH
24854 // For negative numbers, wrap in parens
24855 let interval_val = match &val {
24856 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n.starts_with('-')) =>
24857 {
24858 let Literal::Number(_) = lit.as_ref() else {
24859 unreachable!()
24860 };
24861 Expression::Paren(Box::new(Paren {
24862 this: val.clone(),
24863 trailing_comments: Vec::new(),
24864 }))
24865 }
24866 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
24867 this: val.clone(),
24868 trailing_comments: Vec::new(),
24869 })),
24870 Expression::Null(_) => Expression::Paren(Box::new(Paren {
24871 this: val.clone(),
24872 trailing_comments: Vec::new(),
24873 })),
24874 _ => val.clone(),
24875 };
24876 Expression::Interval(Box::new(crate::expressions::Interval {
24877 this: Some(interval_val),
24878 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24879 unit: crate::expressions::IntervalUnit::Month,
24880 use_plural: false,
24881 }),
24882 }))
24883 };
24884
24885 // Build: date + interval
24886 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
24887 cast_date.clone(),
24888 add_interval.clone(),
24889 )));
24890
24891 // Build LAST_DAY(date)
24892 let last_day_date = Expression::Function(Box::new(Function::new(
24893 "LAST_DAY".to_string(),
24894 vec![cast_date.clone()],
24895 )));
24896
24897 // Build LAST_DAY(date + interval)
24898 let last_day_date_plus =
24899 Expression::Function(Box::new(Function::new(
24900 "LAST_DAY".to_string(),
24901 vec![date_plus_interval.clone()],
24902 )));
24903
24904 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
24905 let case_expr = Expression::Case(Box::new(Case {
24906 operand: None,
24907 whens: vec![(
24908 Expression::Eq(Box::new(BinaryOp::new(
24909 last_day_date,
24910 cast_date.clone(),
24911 ))),
24912 last_day_date_plus,
24913 )],
24914 else_: Some(date_plus_interval),
24915 comments: Vec::new(),
24916 inferred_type: None,
24917 }));
24918
24919 // Wrap in CAST(... AS type) if needed
24920 if let Some(dt) = return_type {
24921 Ok(Expression::Cast(Box::new(Cast {
24922 this: case_expr,
24923 to: dt,
24924 trailing_comments: Vec::new(),
24925 double_colon_syntax: false,
24926 format: None,
24927 default: None,
24928 inferred_type: None,
24929 })))
24930 } else {
24931 Ok(case_expr)
24932 }
24933 }
24934 DialectType::DuckDB => {
24935 // Non-Snowflake source: simple date + INTERVAL
24936 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
24937 {
24938 Expression::Cast(Box::new(Cast {
24939 this: date,
24940 to: DataType::Timestamp {
24941 precision: None,
24942 timezone: false,
24943 },
24944 trailing_comments: Vec::new(),
24945 double_colon_syntax: false,
24946 format: None,
24947 default: None,
24948 inferred_type: None,
24949 }))
24950 } else {
24951 date
24952 };
24953 let interval =
24954 Expression::Interval(Box::new(crate::expressions::Interval {
24955 this: Some(val),
24956 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24957 unit: crate::expressions::IntervalUnit::Month,
24958 use_plural: false,
24959 }),
24960 }));
24961 Ok(Expression::Add(Box::new(BinaryOp::new(
24962 cast_date, interval,
24963 ))))
24964 }
24965 DialectType::Snowflake => {
24966 // Keep ADD_MONTHS when source is also Snowflake
24967 if matches!(source, DialectType::Snowflake) {
24968 Ok(Expression::Function(Box::new(Function::new(
24969 "ADD_MONTHS".to_string(),
24970 vec![date, val],
24971 ))))
24972 } else {
24973 Ok(Expression::Function(Box::new(Function::new(
24974 "DATEADD".to_string(),
24975 vec![
24976 Expression::Identifier(Identifier::new("MONTH")),
24977 val,
24978 date,
24979 ],
24980 ))))
24981 }
24982 }
24983 DialectType::Redshift => {
24984 Ok(Expression::Function(Box::new(Function::new(
24985 "DATEADD".to_string(),
24986 vec![
24987 Expression::Identifier(Identifier::new("MONTH")),
24988 val,
24989 date,
24990 ],
24991 ))))
24992 }
24993 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24994 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
24995 {
24996 Expression::Cast(Box::new(Cast {
24997 this: date,
24998 to: DataType::Timestamp {
24999 precision: None,
25000 timezone: false,
25001 },
25002 trailing_comments: Vec::new(),
25003 double_colon_syntax: false,
25004 format: None,
25005 default: None,
25006 inferred_type: None,
25007 }))
25008 } else {
25009 date
25010 };
25011 Ok(Expression::Function(Box::new(Function::new(
25012 "DATE_ADD".to_string(),
25013 vec![Expression::string("MONTH"), val, cast_date],
25014 ))))
25015 }
25016 DialectType::BigQuery => {
25017 let interval =
25018 Expression::Interval(Box::new(crate::expressions::Interval {
25019 this: Some(val),
25020 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
25021 unit: crate::expressions::IntervalUnit::Month,
25022 use_plural: false,
25023 }),
25024 }));
25025 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
25026 {
25027 Expression::Cast(Box::new(Cast {
25028 this: date,
25029 to: DataType::Custom {
25030 name: "DATETIME".to_string(),
25031 },
25032 trailing_comments: Vec::new(),
25033 double_colon_syntax: false,
25034 format: None,
25035 default: None,
25036 inferred_type: None,
25037 }))
25038 } else {
25039 date
25040 };
25041 Ok(Expression::Function(Box::new(Function::new(
25042 "DATE_ADD".to_string(),
25043 vec![cast_date, interval],
25044 ))))
25045 }
25046 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
25047 Ok(Expression::Function(Box::new(Function::new(
25048 "ADD_MONTHS".to_string(),
25049 vec![date, val],
25050 ))))
25051 }
25052 _ => {
25053 // Default: keep as AddMonths expression
25054 Ok(Expression::AddMonths(Box::new(
25055 crate::expressions::BinaryFunc {
25056 this: date,
25057 expression: val,
25058 original_name: None,
25059 inferred_type: None,
25060 },
25061 )))
25062 }
25063 }
25064 } else {
25065 Ok(e)
25066 }
25067 }
25068
25069 Action::PercentileContConvert => {
25070 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
25071 // Presto/Trino: APPROX_PERCENTILE(col, p)
25072 // Spark/Databricks: PERCENTILE_APPROX(col, p)
25073 if let Expression::WithinGroup(wg) = e {
25074 // Extract percentile value and order by column
25075 let (percentile, _is_disc) = match &wg.this {
25076 Expression::Function(f) => {
25077 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
25078 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
25079 Box::new(Literal::Number("0.5".to_string())),
25080 ));
25081 (pct, is_disc)
25082 }
25083 Expression::AggregateFunction(af) => {
25084 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
25085 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
25086 Box::new(Literal::Number("0.5".to_string())),
25087 ));
25088 (pct, is_disc)
25089 }
25090 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
25091 _ => return Ok(Expression::WithinGroup(wg)),
25092 };
25093 let col = wg.order_by.first().map(|o| o.this.clone()).unwrap_or(
25094 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
25095 );
25096
25097 let func_name = match target {
25098 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25099 "APPROX_PERCENTILE"
25100 }
25101 _ => "PERCENTILE_APPROX", // Spark, Databricks
25102 };
25103 Ok(Expression::Function(Box::new(Function::new(
25104 func_name.to_string(),
25105 vec![col, percentile],
25106 ))))
25107 } else {
25108 Ok(e)
25109 }
25110 }
25111
25112 Action::CurrentUserSparkParens => {
25113 // CURRENT_USER -> CURRENT_USER() for Spark
25114 if let Expression::CurrentUser(_) = e {
25115 Ok(Expression::Function(Box::new(Function::new(
25116 "CURRENT_USER".to_string(),
25117 vec![],
25118 ))))
25119 } else {
25120 Ok(e)
25121 }
25122 }
25123
25124 Action::SparkDateFuncCast => {
25125 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
25126 let cast_arg = |arg: Expression| -> Expression {
25127 match target {
25128 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25129 Self::double_cast_timestamp_date(arg)
25130 }
25131 _ => {
25132 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
25133 Self::ensure_cast_date(arg)
25134 }
25135 }
25136 };
25137 match e {
25138 Expression::Month(f) => Ok(Expression::Month(Box::new(
25139 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
25140 ))),
25141 Expression::Year(f) => Ok(Expression::Year(Box::new(
25142 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
25143 ))),
25144 Expression::Day(f) => Ok(Expression::Day(Box::new(
25145 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
25146 ))),
25147 other => Ok(other),
25148 }
25149 }
25150
25151 Action::MapFromArraysConvert => {
25152 // Expression::MapFromArrays -> target-specific
25153 if let Expression::MapFromArrays(mfa) = e {
25154 let keys = mfa.this;
25155 let values = mfa.expression;
25156 match target {
25157 DialectType::Snowflake => Ok(Expression::Function(Box::new(
25158 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
25159 ))),
25160 _ => {
25161 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
25162 Ok(Expression::Function(Box::new(Function::new(
25163 "MAP".to_string(),
25164 vec![keys, values],
25165 ))))
25166 }
25167 }
25168 } else {
25169 Ok(e)
25170 }
25171 }
25172
25173 Action::AnyToExists => {
25174 if let Expression::Any(q) = e {
25175 if let Some(op) = q.op.clone() {
25176 let lambda_param = crate::expressions::Identifier::new("x");
25177 let rhs = Expression::Identifier(lambda_param.clone());
25178 let body = match op {
25179 crate::expressions::QuantifiedOp::Eq => {
25180 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
25181 }
25182 crate::expressions::QuantifiedOp::Neq => {
25183 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
25184 }
25185 crate::expressions::QuantifiedOp::Lt => {
25186 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
25187 }
25188 crate::expressions::QuantifiedOp::Lte => {
25189 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
25190 }
25191 crate::expressions::QuantifiedOp::Gt => {
25192 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
25193 }
25194 crate::expressions::QuantifiedOp::Gte => {
25195 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
25196 }
25197 };
25198 let lambda =
25199 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25200 parameters: vec![lambda_param],
25201 body,
25202 colon: false,
25203 parameter_types: Vec::new(),
25204 }));
25205 Ok(Expression::Function(Box::new(Function::new(
25206 "EXISTS".to_string(),
25207 vec![q.subquery, lambda],
25208 ))))
25209 } else {
25210 Ok(Expression::Any(q))
25211 }
25212 } else {
25213 Ok(e)
25214 }
25215 }
25216
25217 Action::GenerateSeriesConvert => {
25218 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
25219 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
25220 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
25221 if let Expression::Function(f) = e {
25222 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
25223 let start = f.args[0].clone();
25224 let end = f.args[1].clone();
25225 let step = f.args.get(2).cloned();
25226
25227 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
25228 let step = step.map(|s| Self::normalize_interval_string(s, target));
25229
25230 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
25231 let maybe_cast_timestamp = |arg: Expression| -> Expression {
25232 if matches!(
25233 target,
25234 DialectType::Presto
25235 | DialectType::Trino
25236 | DialectType::Athena
25237 | DialectType::Spark
25238 | DialectType::Databricks
25239 | DialectType::Hive
25240 ) {
25241 match &arg {
25242 Expression::CurrentTimestamp(_) => {
25243 Expression::Cast(Box::new(Cast {
25244 this: arg,
25245 to: DataType::Timestamp {
25246 precision: None,
25247 timezone: false,
25248 },
25249 trailing_comments: Vec::new(),
25250 double_colon_syntax: false,
25251 format: None,
25252 default: None,
25253 inferred_type: None,
25254 }))
25255 }
25256 _ => arg,
25257 }
25258 } else {
25259 arg
25260 }
25261 };
25262
25263 let start = maybe_cast_timestamp(start);
25264 let end = maybe_cast_timestamp(end);
25265
25266 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
25267 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
25268 let mut gs_args = vec![start, end];
25269 if let Some(step) = step {
25270 gs_args.push(step);
25271 }
25272 return Ok(Expression::Function(Box::new(Function::new(
25273 "GENERATE_SERIES".to_string(),
25274 gs_args,
25275 ))));
25276 }
25277
25278 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
25279 if matches!(target, DialectType::DuckDB) {
25280 let mut gs_args = vec![start, end];
25281 if let Some(step) = step {
25282 gs_args.push(step);
25283 }
25284 let gs = Expression::Function(Box::new(Function::new(
25285 "GENERATE_SERIES".to_string(),
25286 gs_args,
25287 )));
25288 return Ok(Expression::Function(Box::new(Function::new(
25289 "UNNEST".to_string(),
25290 vec![gs],
25291 ))));
25292 }
25293
25294 let mut seq_args = vec![start, end];
25295 if let Some(step) = step {
25296 seq_args.push(step);
25297 }
25298
25299 let seq = Expression::Function(Box::new(Function::new(
25300 "SEQUENCE".to_string(),
25301 seq_args,
25302 )));
25303
25304 match target {
25305 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25306 // Wrap in UNNEST
25307 Ok(Expression::Function(Box::new(Function::new(
25308 "UNNEST".to_string(),
25309 vec![seq],
25310 ))))
25311 }
25312 DialectType::Spark
25313 | DialectType::Databricks
25314 | DialectType::Hive => {
25315 // Wrap in EXPLODE
25316 Ok(Expression::Function(Box::new(Function::new(
25317 "EXPLODE".to_string(),
25318 vec![seq],
25319 ))))
25320 }
25321 _ => {
25322 // Just SEQUENCE for others
25323 Ok(seq)
25324 }
25325 }
25326 } else {
25327 Ok(Expression::Function(f))
25328 }
25329 } else {
25330 Ok(e)
25331 }
25332 }
25333
25334 Action::ConcatCoalesceWrap => {
25335 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
25336 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
25337 if let Expression::Function(f) = e {
25338 if f.name.eq_ignore_ascii_case("CONCAT") {
25339 let new_args: Vec<Expression> = f
25340 .args
25341 .into_iter()
25342 .map(|arg| {
25343 let cast_arg = if matches!(
25344 target,
25345 DialectType::Presto
25346 | DialectType::Trino
25347 | DialectType::Athena
25348 ) {
25349 Expression::Cast(Box::new(Cast {
25350 this: arg,
25351 to: DataType::VarChar {
25352 length: None,
25353 parenthesized_length: false,
25354 },
25355 trailing_comments: Vec::new(),
25356 double_colon_syntax: false,
25357 format: None,
25358 default: None,
25359 inferred_type: None,
25360 }))
25361 } else {
25362 arg
25363 };
25364 Expression::Function(Box::new(Function::new(
25365 "COALESCE".to_string(),
25366 vec![cast_arg, Expression::string("")],
25367 )))
25368 })
25369 .collect();
25370 Ok(Expression::Function(Box::new(Function::new(
25371 "CONCAT".to_string(),
25372 new_args,
25373 ))))
25374 } else {
25375 Ok(Expression::Function(f))
25376 }
25377 } else {
25378 Ok(e)
25379 }
25380 }
25381
25382 Action::PipeConcatToConcat => {
25383 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
25384 if let Expression::Concat(op) = e {
25385 let cast_left = Expression::Cast(Box::new(Cast {
25386 this: op.left,
25387 to: DataType::VarChar {
25388 length: None,
25389 parenthesized_length: false,
25390 },
25391 trailing_comments: Vec::new(),
25392 double_colon_syntax: false,
25393 format: None,
25394 default: None,
25395 inferred_type: None,
25396 }));
25397 let cast_right = Expression::Cast(Box::new(Cast {
25398 this: op.right,
25399 to: DataType::VarChar {
25400 length: None,
25401 parenthesized_length: false,
25402 },
25403 trailing_comments: Vec::new(),
25404 double_colon_syntax: false,
25405 format: None,
25406 default: None,
25407 inferred_type: None,
25408 }));
25409 Ok(Expression::Function(Box::new(Function::new(
25410 "CONCAT".to_string(),
25411 vec![cast_left, cast_right],
25412 ))))
25413 } else {
25414 Ok(e)
25415 }
25416 }
25417
25418 Action::DivFuncConvert => {
25419 // DIV(a, b) -> target-specific integer division
25420 if let Expression::Function(f) = e {
25421 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
25422 let a = f.args[0].clone();
25423 let b = f.args[1].clone();
25424 match target {
25425 DialectType::DuckDB => {
25426 // DIV(a, b) -> CAST(a // b AS DECIMAL)
25427 let int_div = Expression::IntDiv(Box::new(
25428 crate::expressions::BinaryFunc {
25429 this: a,
25430 expression: b,
25431 original_name: None,
25432 inferred_type: None,
25433 },
25434 ));
25435 Ok(Expression::Cast(Box::new(Cast {
25436 this: int_div,
25437 to: DataType::Decimal {
25438 precision: None,
25439 scale: None,
25440 },
25441 trailing_comments: Vec::new(),
25442 double_colon_syntax: false,
25443 format: None,
25444 default: None,
25445 inferred_type: None,
25446 })))
25447 }
25448 DialectType::BigQuery => {
25449 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
25450 let div_func = Expression::Function(Box::new(Function::new(
25451 "DIV".to_string(),
25452 vec![a, b],
25453 )));
25454 Ok(Expression::Cast(Box::new(Cast {
25455 this: div_func,
25456 to: DataType::Custom {
25457 name: "NUMERIC".to_string(),
25458 },
25459 trailing_comments: Vec::new(),
25460 double_colon_syntax: false,
25461 format: None,
25462 default: None,
25463 inferred_type: None,
25464 })))
25465 }
25466 DialectType::SQLite => {
25467 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
25468 let cast_a = Expression::Cast(Box::new(Cast {
25469 this: a,
25470 to: DataType::Custom {
25471 name: "REAL".to_string(),
25472 },
25473 trailing_comments: Vec::new(),
25474 double_colon_syntax: false,
25475 format: None,
25476 default: None,
25477 inferred_type: None,
25478 }));
25479 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
25480 let cast_int = Expression::Cast(Box::new(Cast {
25481 this: div,
25482 to: DataType::Int {
25483 length: None,
25484 integer_spelling: true,
25485 },
25486 trailing_comments: Vec::new(),
25487 double_colon_syntax: false,
25488 format: None,
25489 default: None,
25490 inferred_type: None,
25491 }));
25492 Ok(Expression::Cast(Box::new(Cast {
25493 this: cast_int,
25494 to: DataType::Custom {
25495 name: "REAL".to_string(),
25496 },
25497 trailing_comments: Vec::new(),
25498 double_colon_syntax: false,
25499 format: None,
25500 default: None,
25501 inferred_type: None,
25502 })))
25503 }
25504 _ => Ok(Expression::Function(f)),
25505 }
25506 } else {
25507 Ok(Expression::Function(f))
25508 }
25509 } else {
25510 Ok(e)
25511 }
25512 }
25513
25514 Action::JsonObjectAggConvert => {
25515 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
25516 match e {
25517 Expression::Function(f) => Ok(Expression::Function(Box::new(
25518 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
25519 ))),
25520 Expression::AggregateFunction(af) => {
25521 // AggregateFunction stores all args in the `args` vec
25522 Ok(Expression::Function(Box::new(Function::new(
25523 "JSON_GROUP_OBJECT".to_string(),
25524 af.args,
25525 ))))
25526 }
25527 other => Ok(other),
25528 }
25529 }
25530
25531 Action::JsonbExistsConvert => {
25532 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
25533 if let Expression::Function(f) = e {
25534 if f.args.len() == 2 {
25535 let json_expr = f.args[0].clone();
25536 let key = match &f.args[1] {
25537 Expression::Literal(lit)
25538 if matches!(
25539 lit.as_ref(),
25540 crate::expressions::Literal::String(_)
25541 ) =>
25542 {
25543 let crate::expressions::Literal::String(s) = lit.as_ref()
25544 else {
25545 unreachable!()
25546 };
25547 format!("$.{}", s)
25548 }
25549 _ => return Ok(Expression::Function(f)),
25550 };
25551 Ok(Expression::Function(Box::new(Function::new(
25552 "JSON_EXISTS".to_string(),
25553 vec![json_expr, Expression::string(&key)],
25554 ))))
25555 } else {
25556 Ok(Expression::Function(f))
25557 }
25558 } else {
25559 Ok(e)
25560 }
25561 }
25562
25563 Action::DateBinConvert => {
25564 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
25565 if let Expression::Function(f) = e {
25566 Ok(Expression::Function(Box::new(Function::new(
25567 "TIME_BUCKET".to_string(),
25568 f.args,
25569 ))))
25570 } else {
25571 Ok(e)
25572 }
25573 }
25574
25575 Action::MysqlCastCharToText => {
25576 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
25577 if let Expression::Cast(mut c) = e {
25578 c.to = DataType::Text;
25579 Ok(Expression::Cast(c))
25580 } else {
25581 Ok(e)
25582 }
25583 }
25584
25585 Action::SparkCastVarcharToString => {
25586 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
25587 match e {
25588 Expression::Cast(mut c) => {
25589 c.to = Self::normalize_varchar_to_string(c.to);
25590 Ok(Expression::Cast(c))
25591 }
25592 Expression::TryCast(mut c) => {
25593 c.to = Self::normalize_varchar_to_string(c.to);
25594 Ok(Expression::TryCast(c))
25595 }
25596 _ => Ok(e),
25597 }
25598 }
25599
25600 Action::MinMaxToLeastGreatest => {
25601 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
25602 if let Expression::Function(f) = e {
25603 let new_name = if f.name.eq_ignore_ascii_case("MIN") {
25604 "LEAST"
25605 } else if f.name.eq_ignore_ascii_case("MAX") {
25606 "GREATEST"
25607 } else {
25608 return Ok(Expression::Function(f));
25609 };
25610 Ok(Expression::Function(Box::new(Function::new(
25611 new_name.to_string(),
25612 f.args,
25613 ))))
25614 } else {
25615 Ok(e)
25616 }
25617 }
25618
25619 Action::ClickHouseUniqToApproxCountDistinct => {
25620 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
25621 if let Expression::Function(f) = e {
25622 Ok(Expression::Function(Box::new(Function::new(
25623 "APPROX_COUNT_DISTINCT".to_string(),
25624 f.args,
25625 ))))
25626 } else {
25627 Ok(e)
25628 }
25629 }
25630
25631 Action::ClickHouseAnyToAnyValue => {
25632 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
25633 if let Expression::Function(f) = e {
25634 Ok(Expression::Function(Box::new(Function::new(
25635 "ANY_VALUE".to_string(),
25636 f.args,
25637 ))))
25638 } else {
25639 Ok(e)
25640 }
25641 }
25642
25643 Action::OracleVarchar2ToVarchar => {
25644 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
25645 if let Expression::DataType(DataType::Custom { ref name }) = e {
25646 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
25647 let starts_varchar2 =
25648 name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2(");
25649 let starts_nvarchar2 =
25650 name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2(");
25651 let inner = if starts_varchar2 || starts_nvarchar2 {
25652 let start = if starts_nvarchar2 { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
25653 let end = name.len() - 1; // skip trailing ")"
25654 Some(&name[start..end])
25655 } else {
25656 Option::None
25657 };
25658 if let Some(inner_str) = inner {
25659 // Parse the number part, ignoring BYTE/CHAR qualifier
25660 let num_str = inner_str.split_whitespace().next().unwrap_or("");
25661 if let Ok(n) = num_str.parse::<u32>() {
25662 Ok(Expression::DataType(DataType::VarChar {
25663 length: Some(n),
25664 parenthesized_length: false,
25665 }))
25666 } else {
25667 Ok(e)
25668 }
25669 } else {
25670 // Plain VARCHAR2 / NVARCHAR2 without parens
25671 Ok(Expression::DataType(DataType::VarChar {
25672 length: Option::None,
25673 parenthesized_length: false,
25674 }))
25675 }
25676 } else {
25677 Ok(e)
25678 }
25679 }
25680
25681 Action::Nvl2Expand => {
25682 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
25683 // But keep as NVL2 for dialects that support it natively
25684 let nvl2_native = matches!(
25685 target,
25686 DialectType::Oracle
25687 | DialectType::Snowflake
25688 | DialectType::Redshift
25689 | DialectType::Teradata
25690 | DialectType::Spark
25691 | DialectType::Databricks
25692 );
25693 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
25694 if nvl2_native {
25695 return Ok(Expression::Nvl2(nvl2));
25696 }
25697 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
25698 } else if let Expression::Function(f) = e {
25699 if nvl2_native {
25700 return Ok(Expression::Function(Box::new(Function::new(
25701 "NVL2".to_string(),
25702 f.args,
25703 ))));
25704 }
25705 if f.args.len() < 2 {
25706 return Ok(Expression::Function(f));
25707 }
25708 let mut args = f.args;
25709 let a = args.remove(0);
25710 let b = args.remove(0);
25711 let c = if !args.is_empty() {
25712 Some(args.remove(0))
25713 } else {
25714 Option::None
25715 };
25716 (a, b, c)
25717 } else {
25718 return Ok(e);
25719 };
25720 // Build: NOT (a IS NULL)
25721 let is_null = Expression::IsNull(Box::new(IsNull {
25722 this: a,
25723 not: false,
25724 postfix_form: false,
25725 }));
25726 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
25727 this: is_null,
25728 inferred_type: None,
25729 }));
25730 Ok(Expression::Case(Box::new(Case {
25731 operand: Option::None,
25732 whens: vec![(not_null, b)],
25733 else_: c,
25734 comments: Vec::new(),
25735 inferred_type: None,
25736 })))
25737 }
25738
25739 Action::IfnullToCoalesce => {
25740 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
25741 if let Expression::Coalesce(mut cf) = e {
25742 cf.original_name = Option::None;
25743 Ok(Expression::Coalesce(cf))
25744 } else if let Expression::Function(f) = e {
25745 Ok(Expression::Function(Box::new(Function::new(
25746 "COALESCE".to_string(),
25747 f.args,
25748 ))))
25749 } else {
25750 Ok(e)
25751 }
25752 }
25753
25754 Action::IsAsciiConvert => {
25755 // IS_ASCII(x) -> dialect-specific ASCII check
25756 if let Expression::Function(f) = e {
25757 let arg = f.args.into_iter().next().unwrap();
25758 match target {
25759 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
25760 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
25761 Ok(Expression::Function(Box::new(Function::new(
25762 "REGEXP_LIKE".to_string(),
25763 vec![
25764 arg,
25765 Expression::Literal(Box::new(Literal::String(
25766 "^[[:ascii:]]*$".to_string(),
25767 ))),
25768 ],
25769 ))))
25770 }
25771 DialectType::PostgreSQL
25772 | DialectType::Redshift
25773 | DialectType::Materialize
25774 | DialectType::RisingWave => {
25775 // (x ~ '^[[:ascii:]]*$')
25776 Ok(Expression::Paren(Box::new(Paren {
25777 this: Expression::RegexpLike(Box::new(
25778 crate::expressions::RegexpFunc {
25779 this: arg,
25780 pattern: Expression::Literal(Box::new(
25781 Literal::String("^[[:ascii:]]*$".to_string()),
25782 )),
25783 flags: Option::None,
25784 },
25785 )),
25786 trailing_comments: Vec::new(),
25787 })))
25788 }
25789 DialectType::SQLite => {
25790 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
25791 let hex_lit = Expression::Literal(Box::new(Literal::HexString(
25792 "2a5b5e012d7f5d2a".to_string(),
25793 )));
25794 let cast_expr = Expression::Cast(Box::new(Cast {
25795 this: hex_lit,
25796 to: DataType::Text,
25797 trailing_comments: Vec::new(),
25798 double_colon_syntax: false,
25799 format: Option::None,
25800 default: Option::None,
25801 inferred_type: None,
25802 }));
25803 let glob = Expression::Glob(Box::new(BinaryOp {
25804 left: arg,
25805 right: cast_expr,
25806 left_comments: Vec::new(),
25807 operator_comments: Vec::new(),
25808 trailing_comments: Vec::new(),
25809 inferred_type: None,
25810 }));
25811 Ok(Expression::Paren(Box::new(Paren {
25812 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
25813 this: glob,
25814 inferred_type: None,
25815 })),
25816 trailing_comments: Vec::new(),
25817 })))
25818 }
25819 DialectType::TSQL | DialectType::Fabric => {
25820 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
25821 let hex_lit = Expression::Literal(Box::new(Literal::HexNumber(
25822 "255b5e002d7f5d25".to_string(),
25823 )));
25824 let convert_expr = Expression::Convert(Box::new(
25825 crate::expressions::ConvertFunc {
25826 this: hex_lit,
25827 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
25828 style: None,
25829 },
25830 ));
25831 let collated = Expression::Collation(Box::new(
25832 crate::expressions::CollationExpr {
25833 this: convert_expr,
25834 collation: "Latin1_General_BIN".to_string(),
25835 quoted: false,
25836 double_quoted: false,
25837 },
25838 ));
25839 let patindex = Expression::Function(Box::new(Function::new(
25840 "PATINDEX".to_string(),
25841 vec![collated, arg],
25842 )));
25843 let zero =
25844 Expression::Literal(Box::new(Literal::Number("0".to_string())));
25845 let eq_zero = Expression::Eq(Box::new(BinaryOp {
25846 left: patindex,
25847 right: zero,
25848 left_comments: Vec::new(),
25849 operator_comments: Vec::new(),
25850 trailing_comments: Vec::new(),
25851 inferred_type: None,
25852 }));
25853 Ok(Expression::Paren(Box::new(Paren {
25854 this: eq_zero,
25855 trailing_comments: Vec::new(),
25856 })))
25857 }
25858 DialectType::Oracle => {
25859 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
25860 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
25861 let s1 = Expression::Literal(Box::new(Literal::String(
25862 "^[".to_string(),
25863 )));
25864 let chr1 = Expression::Function(Box::new(Function::new(
25865 "CHR".to_string(),
25866 vec![Expression::Literal(Box::new(Literal::Number(
25867 "1".to_string(),
25868 )))],
25869 )));
25870 let dash =
25871 Expression::Literal(Box::new(Literal::String("-".to_string())));
25872 let chr127 = Expression::Function(Box::new(Function::new(
25873 "CHR".to_string(),
25874 vec![Expression::Literal(Box::new(Literal::Number(
25875 "127".to_string(),
25876 )))],
25877 )));
25878 let s2 = Expression::Literal(Box::new(Literal::String(
25879 "]*$".to_string(),
25880 )));
25881 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
25882 let concat1 =
25883 Expression::DPipe(Box::new(crate::expressions::DPipe {
25884 this: Box::new(s1),
25885 expression: Box::new(chr1),
25886 safe: None,
25887 }));
25888 let concat2 =
25889 Expression::DPipe(Box::new(crate::expressions::DPipe {
25890 this: Box::new(concat1),
25891 expression: Box::new(dash),
25892 safe: None,
25893 }));
25894 let concat3 =
25895 Expression::DPipe(Box::new(crate::expressions::DPipe {
25896 this: Box::new(concat2),
25897 expression: Box::new(chr127),
25898 safe: None,
25899 }));
25900 let concat4 =
25901 Expression::DPipe(Box::new(crate::expressions::DPipe {
25902 this: Box::new(concat3),
25903 expression: Box::new(s2),
25904 safe: None,
25905 }));
25906 let regexp_like = Expression::Function(Box::new(Function::new(
25907 "REGEXP_LIKE".to_string(),
25908 vec![arg, concat4],
25909 )));
25910 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
25911 let true_expr =
25912 Expression::Column(Box::new(crate::expressions::Column {
25913 name: Identifier {
25914 name: "TRUE".to_string(),
25915 quoted: false,
25916 trailing_comments: Vec::new(),
25917 span: None,
25918 },
25919 table: None,
25920 join_mark: false,
25921 trailing_comments: Vec::new(),
25922 span: None,
25923 inferred_type: None,
25924 }));
25925 let nvl = Expression::Function(Box::new(Function::new(
25926 "NVL".to_string(),
25927 vec![regexp_like, true_expr],
25928 )));
25929 Ok(nvl)
25930 }
25931 _ => Ok(Expression::Function(Box::new(Function::new(
25932 "IS_ASCII".to_string(),
25933 vec![arg],
25934 )))),
25935 }
25936 } else {
25937 Ok(e)
25938 }
25939 }
25940
25941 Action::StrPositionConvert => {
25942 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
25943 if let Expression::Function(f) = e {
25944 if f.args.len() < 2 {
25945 return Ok(Expression::Function(f));
25946 }
25947 let mut args = f.args;
25948
25949 let haystack = args.remove(0);
25950 let needle = args.remove(0);
25951 let position = if !args.is_empty() {
25952 Some(args.remove(0))
25953 } else {
25954 Option::None
25955 };
25956 let occurrence = if !args.is_empty() {
25957 Some(args.remove(0))
25958 } else {
25959 Option::None
25960 };
25961
25962 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
25963 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
25964 fn build_position_expansion(
25965 haystack: Expression,
25966 needle: Expression,
25967 pos: Expression,
25968 occurrence: Option<Expression>,
25969 inner_func: &str,
25970 wrapper: &str, // "CASE", "IF", "IIF"
25971 ) -> Expression {
25972 let substr = Expression::Function(Box::new(Function::new(
25973 "SUBSTRING".to_string(),
25974 vec![haystack, pos.clone()],
25975 )));
25976 let mut inner_args = vec![substr, needle];
25977 if let Some(occ) = occurrence {
25978 inner_args.push(occ);
25979 }
25980 let inner_call = Expression::Function(Box::new(Function::new(
25981 inner_func.to_string(),
25982 inner_args,
25983 )));
25984 let zero =
25985 Expression::Literal(Box::new(Literal::Number("0".to_string())));
25986 let one =
25987 Expression::Literal(Box::new(Literal::Number("1".to_string())));
25988 let eq_zero = Expression::Eq(Box::new(BinaryOp {
25989 left: inner_call.clone(),
25990 right: zero.clone(),
25991 left_comments: Vec::new(),
25992 operator_comments: Vec::new(),
25993 trailing_comments: Vec::new(),
25994 inferred_type: None,
25995 }));
25996 let add_pos = Expression::Add(Box::new(BinaryOp {
25997 left: inner_call,
25998 right: pos,
25999 left_comments: Vec::new(),
26000 operator_comments: Vec::new(),
26001 trailing_comments: Vec::new(),
26002 inferred_type: None,
26003 }));
26004 let sub_one = Expression::Sub(Box::new(BinaryOp {
26005 left: add_pos,
26006 right: one,
26007 left_comments: Vec::new(),
26008 operator_comments: Vec::new(),
26009 trailing_comments: Vec::new(),
26010 inferred_type: None,
26011 }));
26012
26013 match wrapper {
26014 "CASE" => Expression::Case(Box::new(Case {
26015 operand: Option::None,
26016 whens: vec![(eq_zero, zero)],
26017 else_: Some(sub_one),
26018 comments: Vec::new(),
26019 inferred_type: None,
26020 })),
26021 "IIF" => Expression::Function(Box::new(Function::new(
26022 "IIF".to_string(),
26023 vec![eq_zero, zero, sub_one],
26024 ))),
26025 _ => Expression::Function(Box::new(Function::new(
26026 "IF".to_string(),
26027 vec![eq_zero, zero, sub_one],
26028 ))),
26029 }
26030 }
26031
26032 match target {
26033 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
26034 DialectType::Athena
26035 | DialectType::DuckDB
26036 | DialectType::Presto
26037 | DialectType::Trino
26038 | DialectType::Drill => {
26039 if let Some(pos) = position {
26040 let wrapper = if matches!(target, DialectType::DuckDB) {
26041 "CASE"
26042 } else {
26043 "IF"
26044 };
26045 let result = build_position_expansion(
26046 haystack, needle, pos, occurrence, "STRPOS", wrapper,
26047 );
26048 if matches!(target, DialectType::Drill) {
26049 // Drill uses backtick-quoted `IF`
26050 if let Expression::Function(mut f) = result {
26051 f.name = "`IF`".to_string();
26052 Ok(Expression::Function(f))
26053 } else {
26054 Ok(result)
26055 }
26056 } else {
26057 Ok(result)
26058 }
26059 } else {
26060 Ok(Expression::Function(Box::new(Function::new(
26061 "STRPOS".to_string(),
26062 vec![haystack, needle],
26063 ))))
26064 }
26065 }
26066 // SQLite: IIF wrapper
26067 DialectType::SQLite => {
26068 if let Some(pos) = position {
26069 Ok(build_position_expansion(
26070 haystack, needle, pos, occurrence, "INSTR", "IIF",
26071 ))
26072 } else {
26073 Ok(Expression::Function(Box::new(Function::new(
26074 "INSTR".to_string(),
26075 vec![haystack, needle],
26076 ))))
26077 }
26078 }
26079 // INSTR group: Teradata, BigQuery, Oracle
26080 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
26081 let mut a = vec![haystack, needle];
26082 if let Some(pos) = position {
26083 a.push(pos);
26084 }
26085 if let Some(occ) = occurrence {
26086 a.push(occ);
26087 }
26088 Ok(Expression::Function(Box::new(Function::new(
26089 "INSTR".to_string(),
26090 a,
26091 ))))
26092 }
26093 // CHARINDEX group: Snowflake, TSQL
26094 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
26095 let mut a = vec![needle, haystack];
26096 if let Some(pos) = position {
26097 a.push(pos);
26098 }
26099 Ok(Expression::Function(Box::new(Function::new(
26100 "CHARINDEX".to_string(),
26101 a,
26102 ))))
26103 }
26104 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
26105 DialectType::PostgreSQL
26106 | DialectType::Materialize
26107 | DialectType::RisingWave
26108 | DialectType::Redshift => {
26109 if let Some(pos) = position {
26110 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
26111 // ELSE POSITION(...) + pos - 1 END
26112 let substr = Expression::Substring(Box::new(
26113 crate::expressions::SubstringFunc {
26114 this: haystack,
26115 start: pos.clone(),
26116 length: Option::None,
26117 from_for_syntax: true,
26118 },
26119 ));
26120 let pos_in = Expression::StrPosition(Box::new(
26121 crate::expressions::StrPosition {
26122 this: Box::new(substr),
26123 substr: Some(Box::new(needle)),
26124 position: Option::None,
26125 occurrence: Option::None,
26126 },
26127 ));
26128 let zero = Expression::Literal(Box::new(Literal::Number(
26129 "0".to_string(),
26130 )));
26131 let one = Expression::Literal(Box::new(Literal::Number(
26132 "1".to_string(),
26133 )));
26134 let eq_zero = Expression::Eq(Box::new(BinaryOp {
26135 left: pos_in.clone(),
26136 right: zero.clone(),
26137 left_comments: Vec::new(),
26138 operator_comments: Vec::new(),
26139 trailing_comments: Vec::new(),
26140 inferred_type: None,
26141 }));
26142 let add_pos = Expression::Add(Box::new(BinaryOp {
26143 left: pos_in,
26144 right: pos,
26145 left_comments: Vec::new(),
26146 operator_comments: Vec::new(),
26147 trailing_comments: Vec::new(),
26148 inferred_type: None,
26149 }));
26150 let sub_one = Expression::Sub(Box::new(BinaryOp {
26151 left: add_pos,
26152 right: one,
26153 left_comments: Vec::new(),
26154 operator_comments: Vec::new(),
26155 trailing_comments: Vec::new(),
26156 inferred_type: None,
26157 }));
26158 Ok(Expression::Case(Box::new(Case {
26159 operand: Option::None,
26160 whens: vec![(eq_zero, zero)],
26161 else_: Some(sub_one),
26162 comments: Vec::new(),
26163 inferred_type: None,
26164 })))
26165 } else {
26166 Ok(Expression::StrPosition(Box::new(
26167 crate::expressions::StrPosition {
26168 this: Box::new(haystack),
26169 substr: Some(Box::new(needle)),
26170 position: Option::None,
26171 occurrence: Option::None,
26172 },
26173 )))
26174 }
26175 }
26176 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
26177 DialectType::MySQL
26178 | DialectType::SingleStore
26179 | DialectType::TiDB
26180 | DialectType::Hive
26181 | DialectType::Spark
26182 | DialectType::Databricks
26183 | DialectType::Doris
26184 | DialectType::StarRocks => {
26185 let mut a = vec![needle, haystack];
26186 if let Some(pos) = position {
26187 a.push(pos);
26188 }
26189 Ok(Expression::Function(Box::new(Function::new(
26190 "LOCATE".to_string(),
26191 a,
26192 ))))
26193 }
26194 // ClickHouse: POSITION(haystack, needle[, position])
26195 DialectType::ClickHouse => {
26196 let mut a = vec![haystack, needle];
26197 if let Some(pos) = position {
26198 a.push(pos);
26199 }
26200 Ok(Expression::Function(Box::new(Function::new(
26201 "POSITION".to_string(),
26202 a,
26203 ))))
26204 }
26205 _ => {
26206 let mut a = vec![haystack, needle];
26207 if let Some(pos) = position {
26208 a.push(pos);
26209 }
26210 if let Some(occ) = occurrence {
26211 a.push(occ);
26212 }
26213 Ok(Expression::Function(Box::new(Function::new(
26214 "STR_POSITION".to_string(),
26215 a,
26216 ))))
26217 }
26218 }
26219 } else {
26220 Ok(e)
26221 }
26222 }
26223
26224 Action::ArraySumConvert => {
26225 // ARRAY_SUM(arr) -> dialect-specific
26226 if let Expression::Function(f) = e {
26227 let args = f.args;
26228 match target {
26229 DialectType::DuckDB => Ok(Expression::Function(Box::new(
26230 Function::new("LIST_SUM".to_string(), args),
26231 ))),
26232 DialectType::Spark | DialectType::Databricks => {
26233 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
26234 let arr = args.into_iter().next().unwrap();
26235 let zero =
26236 Expression::Literal(Box::new(Literal::Number("0".to_string())));
26237 let acc_id = Identifier::new("acc");
26238 let x_id = Identifier::new("x");
26239 let acc = Expression::Identifier(acc_id.clone());
26240 let x = Expression::Identifier(x_id.clone());
26241 let add = Expression::Add(Box::new(BinaryOp {
26242 left: acc.clone(),
26243 right: x,
26244 left_comments: Vec::new(),
26245 operator_comments: Vec::new(),
26246 trailing_comments: Vec::new(),
26247 inferred_type: None,
26248 }));
26249 let lambda1 =
26250 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26251 parameters: vec![acc_id.clone(), x_id],
26252 body: add,
26253 colon: false,
26254 parameter_types: Vec::new(),
26255 }));
26256 let lambda2 =
26257 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26258 parameters: vec![acc_id],
26259 body: acc,
26260 colon: false,
26261 parameter_types: Vec::new(),
26262 }));
26263 Ok(Expression::Function(Box::new(Function::new(
26264 "AGGREGATE".to_string(),
26265 vec![arr, zero, lambda1, lambda2],
26266 ))))
26267 }
26268 DialectType::Presto | DialectType::Athena => {
26269 // Presto/Athena keep ARRAY_SUM natively
26270 Ok(Expression::Function(Box::new(Function::new(
26271 "ARRAY_SUM".to_string(),
26272 args,
26273 ))))
26274 }
26275 DialectType::Trino => {
26276 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
26277 if args.len() == 1 {
26278 let arr = args.into_iter().next().unwrap();
26279 let zero = Expression::Literal(Box::new(Literal::Number(
26280 "0".to_string(),
26281 )));
26282 let acc_id = Identifier::new("acc");
26283 let x_id = Identifier::new("x");
26284 let acc = Expression::Identifier(acc_id.clone());
26285 let x = Expression::Identifier(x_id.clone());
26286 let add = Expression::Add(Box::new(BinaryOp {
26287 left: acc.clone(),
26288 right: x,
26289 left_comments: Vec::new(),
26290 operator_comments: Vec::new(),
26291 trailing_comments: Vec::new(),
26292 inferred_type: None,
26293 }));
26294 let lambda1 = Expression::Lambda(Box::new(
26295 crate::expressions::LambdaExpr {
26296 parameters: vec![acc_id.clone(), x_id],
26297 body: add,
26298 colon: false,
26299 parameter_types: Vec::new(),
26300 },
26301 ));
26302 let lambda2 = Expression::Lambda(Box::new(
26303 crate::expressions::LambdaExpr {
26304 parameters: vec![acc_id],
26305 body: acc,
26306 colon: false,
26307 parameter_types: Vec::new(),
26308 },
26309 ));
26310 Ok(Expression::Function(Box::new(Function::new(
26311 "REDUCE".to_string(),
26312 vec![arr, zero, lambda1, lambda2],
26313 ))))
26314 } else {
26315 Ok(Expression::Function(Box::new(Function::new(
26316 "ARRAY_SUM".to_string(),
26317 args,
26318 ))))
26319 }
26320 }
26321 DialectType::ClickHouse => {
26322 // arraySum(lambda, arr) or arraySum(arr)
26323 Ok(Expression::Function(Box::new(Function::new(
26324 "arraySum".to_string(),
26325 args,
26326 ))))
26327 }
26328 _ => Ok(Expression::Function(Box::new(Function::new(
26329 "ARRAY_SUM".to_string(),
26330 args,
26331 )))),
26332 }
26333 } else {
26334 Ok(e)
26335 }
26336 }
26337
26338 Action::ArraySizeConvert => {
26339 if let Expression::Function(f) = e {
26340 Ok(Expression::Function(Box::new(Function::new(
26341 "REPEATED_COUNT".to_string(),
26342 f.args,
26343 ))))
26344 } else {
26345 Ok(e)
26346 }
26347 }
26348
26349 Action::ArrayAnyConvert => {
26350 if let Expression::Function(f) = e {
26351 let mut args = f.args;
26352 if args.len() == 2 {
26353 let arr = args.remove(0);
26354 let lambda = args.remove(0);
26355
26356 // Extract lambda parameter name and body
26357 let (param_name, pred_body) =
26358 if let Expression::Lambda(ref lam) = lambda {
26359 let name = if let Some(p) = lam.parameters.first() {
26360 p.name.clone()
26361 } else {
26362 "x".to_string()
26363 };
26364 (name, lam.body.clone())
26365 } else {
26366 ("x".to_string(), lambda.clone())
26367 };
26368
26369 // Helper: build a function call Expression
26370 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
26371 Expression::Function(Box::new(Function::new(
26372 name.to_string(),
26373 args,
26374 )))
26375 };
26376
26377 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
26378 let build_filter_pattern = |len_func: &str,
26379 len_args_extra: Vec<Expression>,
26380 filter_expr: Expression|
26381 -> Expression {
26382 // len_func(arr, ...extra) = 0
26383 let mut len_arr_args = vec![arr.clone()];
26384 len_arr_args.extend(len_args_extra.clone());
26385 let len_arr = make_func(len_func, len_arr_args);
26386 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
26387 len_arr,
26388 Expression::number(0),
26389 )));
26390
26391 // len_func(filter_expr, ...extra) <> 0
26392 let mut len_filter_args = vec![filter_expr];
26393 len_filter_args.extend(len_args_extra);
26394 let len_filter = make_func(len_func, len_filter_args);
26395 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
26396 len_filter,
26397 Expression::number(0),
26398 )));
26399
26400 // (eq_zero OR neq_zero)
26401 let or_expr =
26402 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
26403 Expression::Paren(Box::new(Paren {
26404 this: or_expr,
26405 trailing_comments: Vec::new(),
26406 }))
26407 };
26408
26409 match target {
26410 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
26411 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
26412 }
26413 DialectType::ClickHouse => {
26414 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
26415 // ClickHouse arrayFilter takes lambda first, then array
26416 let filter_expr =
26417 make_func("arrayFilter", vec![lambda, arr.clone()]);
26418 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
26419 }
26420 DialectType::Databricks | DialectType::Spark => {
26421 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
26422 let filter_expr =
26423 make_func("FILTER", vec![arr.clone(), lambda]);
26424 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
26425 }
26426 DialectType::DuckDB => {
26427 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
26428 let filter_expr =
26429 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
26430 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
26431 }
26432 DialectType::Teradata => {
26433 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
26434 let filter_expr =
26435 make_func("FILTER", vec![arr.clone(), lambda]);
26436 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
26437 }
26438 DialectType::BigQuery => {
26439 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
26440 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
26441 let param_col = Expression::column(¶m_name);
26442 let unnest_expr = Expression::Unnest(Box::new(
26443 crate::expressions::UnnestFunc {
26444 this: arr.clone(),
26445 expressions: vec![],
26446 with_ordinality: false,
26447 alias: Some(Identifier::new(¶m_name)),
26448 offset_alias: None,
26449 },
26450 ));
26451 let mut sel = crate::expressions::Select::default();
26452 sel.expressions = vec![param_col];
26453 sel.from = Some(crate::expressions::From {
26454 expressions: vec![unnest_expr],
26455 });
26456 sel.where_clause =
26457 Some(crate::expressions::Where { this: pred_body });
26458 let array_subquery =
26459 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
26460 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
26461 }
26462 DialectType::PostgreSQL => {
26463 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
26464 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
26465 let param_col = Expression::column(¶m_name);
26466 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
26467 let unnest_with_alias =
26468 Expression::Alias(Box::new(crate::expressions::Alias {
26469 this: Expression::Unnest(Box::new(
26470 crate::expressions::UnnestFunc {
26471 this: arr.clone(),
26472 expressions: vec![],
26473 with_ordinality: false,
26474 alias: None,
26475 offset_alias: None,
26476 },
26477 )),
26478 alias: Identifier::new("_t0"),
26479 column_aliases: vec![Identifier::new(¶m_name)],
26480 pre_alias_comments: Vec::new(),
26481 trailing_comments: Vec::new(),
26482 inferred_type: None,
26483 }));
26484 let mut sel = crate::expressions::Select::default();
26485 sel.expressions = vec![param_col];
26486 sel.from = Some(crate::expressions::From {
26487 expressions: vec![unnest_with_alias],
26488 });
26489 sel.where_clause =
26490 Some(crate::expressions::Where { this: pred_body });
26491 let array_subquery =
26492 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
26493 Ok(build_filter_pattern(
26494 "ARRAY_LENGTH",
26495 vec![Expression::number(1)],
26496 array_subquery,
26497 ))
26498 }
26499 _ => Ok(Expression::Function(Box::new(Function::new(
26500 "ARRAY_ANY".to_string(),
26501 vec![arr, lambda],
26502 )))),
26503 }
26504 } else {
26505 Ok(Expression::Function(Box::new(Function::new(
26506 "ARRAY_ANY".to_string(),
26507 args,
26508 ))))
26509 }
26510 } else {
26511 Ok(e)
26512 }
26513 }
26514
26515 Action::DecodeSimplify => {
26516 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
26517 // For literal search values: CASE WHEN x = search THEN result
26518 // For NULL search: CASE WHEN x IS NULL THEN result
26519 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
26520 fn is_decode_literal(e: &Expression) -> bool {
26521 matches!(
26522 e,
26523 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
26524 )
26525 }
26526
26527 let build_decode_case =
26528 |this_expr: Expression,
26529 pairs: Vec<(Expression, Expression)>,
26530 default: Option<Expression>| {
26531 let whens: Vec<(Expression, Expression)> = pairs
26532 .into_iter()
26533 .map(|(search, result)| {
26534 if matches!(&search, Expression::Null(_)) {
26535 // NULL search -> IS NULL
26536 let condition = Expression::Is(Box::new(BinaryOp {
26537 left: this_expr.clone(),
26538 right: Expression::Null(crate::expressions::Null),
26539 left_comments: Vec::new(),
26540 operator_comments: Vec::new(),
26541 trailing_comments: Vec::new(),
26542 inferred_type: None,
26543 }));
26544 (condition, result)
26545 } else if is_decode_literal(&search)
26546 || is_decode_literal(&this_expr)
26547 {
26548 // At least one side is a literal -> simple equality (no NULL check needed)
26549 let eq = Expression::Eq(Box::new(BinaryOp {
26550 left: this_expr.clone(),
26551 right: search,
26552 left_comments: Vec::new(),
26553 operator_comments: Vec::new(),
26554 trailing_comments: Vec::new(),
26555 inferred_type: None,
26556 }));
26557 (eq, result)
26558 } else {
26559 // Non-literal -> null-safe comparison
26560 let needs_paren = matches!(
26561 &search,
26562 Expression::Eq(_)
26563 | Expression::Neq(_)
26564 | Expression::Gt(_)
26565 | Expression::Gte(_)
26566 | Expression::Lt(_)
26567 | Expression::Lte(_)
26568 );
26569 let search_ref = if needs_paren {
26570 Expression::Paren(Box::new(crate::expressions::Paren {
26571 this: search.clone(),
26572 trailing_comments: Vec::new(),
26573 }))
26574 } else {
26575 search.clone()
26576 };
26577 // Build: x = search OR (x IS NULL AND search IS NULL)
26578 let eq = Expression::Eq(Box::new(BinaryOp {
26579 left: this_expr.clone(),
26580 right: search_ref,
26581 left_comments: Vec::new(),
26582 operator_comments: Vec::new(),
26583 trailing_comments: Vec::new(),
26584 inferred_type: None,
26585 }));
26586 let search_in_null = if needs_paren {
26587 Expression::Paren(Box::new(crate::expressions::Paren {
26588 this: search.clone(),
26589 trailing_comments: Vec::new(),
26590 }))
26591 } else {
26592 search.clone()
26593 };
26594 let x_is_null = Expression::Is(Box::new(BinaryOp {
26595 left: this_expr.clone(),
26596 right: Expression::Null(crate::expressions::Null),
26597 left_comments: Vec::new(),
26598 operator_comments: Vec::new(),
26599 trailing_comments: Vec::new(),
26600 inferred_type: None,
26601 }));
26602 let search_is_null = Expression::Is(Box::new(BinaryOp {
26603 left: search_in_null,
26604 right: Expression::Null(crate::expressions::Null),
26605 left_comments: Vec::new(),
26606 operator_comments: Vec::new(),
26607 trailing_comments: Vec::new(),
26608 inferred_type: None,
26609 }));
26610 let both_null = Expression::And(Box::new(BinaryOp {
26611 left: x_is_null,
26612 right: search_is_null,
26613 left_comments: Vec::new(),
26614 operator_comments: Vec::new(),
26615 trailing_comments: Vec::new(),
26616 inferred_type: None,
26617 }));
26618 let condition = Expression::Or(Box::new(BinaryOp {
26619 left: eq,
26620 right: Expression::Paren(Box::new(
26621 crate::expressions::Paren {
26622 this: both_null,
26623 trailing_comments: Vec::new(),
26624 },
26625 )),
26626 left_comments: Vec::new(),
26627 operator_comments: Vec::new(),
26628 trailing_comments: Vec::new(),
26629 inferred_type: None,
26630 }));
26631 (condition, result)
26632 }
26633 })
26634 .collect();
26635 Expression::Case(Box::new(Case {
26636 operand: None,
26637 whens,
26638 else_: default,
26639 comments: Vec::new(),
26640 inferred_type: None,
26641 }))
26642 };
26643
26644 if let Expression::Decode(decode) = e {
26645 Ok(build_decode_case(
26646 decode.this,
26647 decode.search_results,
26648 decode.default,
26649 ))
26650 } else if let Expression::DecodeCase(dc) = e {
26651 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
26652 let mut exprs = dc.expressions;
26653 if exprs.len() < 3 {
26654 return Ok(Expression::DecodeCase(Box::new(
26655 crate::expressions::DecodeCase { expressions: exprs },
26656 )));
26657 }
26658 let this_expr = exprs.remove(0);
26659 let mut pairs = Vec::new();
26660 let mut default = None;
26661 let mut i = 0;
26662 while i + 1 < exprs.len() {
26663 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
26664 i += 2;
26665 }
26666 if i < exprs.len() {
26667 // Odd remaining element is the default
26668 default = Some(exprs[i].clone());
26669 }
26670 Ok(build_decode_case(this_expr, pairs, default))
26671 } else {
26672 Ok(e)
26673 }
26674 }
26675
26676 Action::CreateTableLikeToCtas => {
26677 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
26678 if let Expression::CreateTable(ct) = e {
26679 let like_source = ct.constraints.iter().find_map(|c| {
26680 if let crate::expressions::TableConstraint::Like { source, .. } = c {
26681 Some(source.clone())
26682 } else {
26683 None
26684 }
26685 });
26686 if let Some(source_table) = like_source {
26687 let mut new_ct = *ct;
26688 new_ct.constraints.clear();
26689 // Build: SELECT * FROM b LIMIT 0
26690 let select = Expression::Select(Box::new(crate::expressions::Select {
26691 expressions: vec![Expression::Star(crate::expressions::Star {
26692 table: None,
26693 except: None,
26694 replace: None,
26695 rename: None,
26696 trailing_comments: Vec::new(),
26697 span: None,
26698 })],
26699 from: Some(crate::expressions::From {
26700 expressions: vec![Expression::Table(Box::new(source_table))],
26701 }),
26702 limit: Some(crate::expressions::Limit {
26703 this: Expression::Literal(Box::new(Literal::Number(
26704 "0".to_string(),
26705 ))),
26706 percent: false,
26707 comments: Vec::new(),
26708 }),
26709 ..Default::default()
26710 }));
26711 new_ct.as_select = Some(select);
26712 Ok(Expression::CreateTable(Box::new(new_ct)))
26713 } else {
26714 Ok(Expression::CreateTable(ct))
26715 }
26716 } else {
26717 Ok(e)
26718 }
26719 }
26720
26721 Action::CreateTableLikeToSelectInto => {
26722 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
26723 if let Expression::CreateTable(ct) = e {
26724 let like_source = ct.constraints.iter().find_map(|c| {
26725 if let crate::expressions::TableConstraint::Like { source, .. } = c {
26726 Some(source.clone())
26727 } else {
26728 None
26729 }
26730 });
26731 if let Some(source_table) = like_source {
26732 let mut aliased_source = source_table;
26733 aliased_source.alias = Some(Identifier::new("temp"));
26734 // Build: SELECT TOP 0 * INTO a FROM b AS temp
26735 let select = Expression::Select(Box::new(crate::expressions::Select {
26736 expressions: vec![Expression::Star(crate::expressions::Star {
26737 table: None,
26738 except: None,
26739 replace: None,
26740 rename: None,
26741 trailing_comments: Vec::new(),
26742 span: None,
26743 })],
26744 from: Some(crate::expressions::From {
26745 expressions: vec![Expression::Table(Box::new(aliased_source))],
26746 }),
26747 into: Some(crate::expressions::SelectInto {
26748 this: Expression::Table(Box::new(ct.name.clone())),
26749 temporary: false,
26750 unlogged: false,
26751 bulk_collect: false,
26752 expressions: Vec::new(),
26753 }),
26754 top: Some(crate::expressions::Top {
26755 this: Expression::Literal(Box::new(Literal::Number(
26756 "0".to_string(),
26757 ))),
26758 percent: false,
26759 with_ties: false,
26760 parenthesized: false,
26761 }),
26762 ..Default::default()
26763 }));
26764 Ok(select)
26765 } else {
26766 Ok(Expression::CreateTable(ct))
26767 }
26768 } else {
26769 Ok(e)
26770 }
26771 }
26772
26773 Action::CreateTableLikeToAs => {
26774 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
26775 if let Expression::CreateTable(ct) = e {
26776 let like_source = ct.constraints.iter().find_map(|c| {
26777 if let crate::expressions::TableConstraint::Like { source, .. } = c {
26778 Some(source.clone())
26779 } else {
26780 None
26781 }
26782 });
26783 if let Some(source_table) = like_source {
26784 let mut new_ct = *ct;
26785 new_ct.constraints.clear();
26786 // AS b (just a table reference, not a SELECT)
26787 new_ct.as_select = Some(Expression::Table(Box::new(source_table)));
26788 Ok(Expression::CreateTable(Box::new(new_ct)))
26789 } else {
26790 Ok(Expression::CreateTable(ct))
26791 }
26792 } else {
26793 Ok(e)
26794 }
26795 }
26796
26797 Action::TsOrDsToDateConvert => {
26798 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
26799 if let Expression::Function(f) = e {
26800 let mut args = f.args;
26801 let this = args.remove(0);
26802 let fmt = if !args.is_empty() {
26803 match &args[0] {
26804 Expression::Literal(lit)
26805 if matches!(lit.as_ref(), Literal::String(_)) =>
26806 {
26807 let Literal::String(s) = lit.as_ref() else {
26808 unreachable!()
26809 };
26810 Some(s.clone())
26811 }
26812 _ => None,
26813 }
26814 } else {
26815 None
26816 };
26817 Ok(Expression::TsOrDsToDate(Box::new(
26818 crate::expressions::TsOrDsToDate {
26819 this: Box::new(this),
26820 format: fmt,
26821 safe: None,
26822 },
26823 )))
26824 } else {
26825 Ok(e)
26826 }
26827 }
26828
26829 Action::TsOrDsToDateStrConvert => {
26830 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
26831 if let Expression::Function(f) = e {
26832 let arg = f.args.into_iter().next().unwrap();
26833 let str_type = match target {
26834 DialectType::DuckDB
26835 | DialectType::PostgreSQL
26836 | DialectType::Materialize => DataType::Text,
26837 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
26838 DataType::Custom {
26839 name: "STRING".to_string(),
26840 }
26841 }
26842 DialectType::Presto
26843 | DialectType::Trino
26844 | DialectType::Athena
26845 | DialectType::Drill => DataType::VarChar {
26846 length: None,
26847 parenthesized_length: false,
26848 },
26849 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
26850 DataType::Custom {
26851 name: "STRING".to_string(),
26852 }
26853 }
26854 _ => DataType::VarChar {
26855 length: None,
26856 parenthesized_length: false,
26857 },
26858 };
26859 let cast_expr = Expression::Cast(Box::new(Cast {
26860 this: arg,
26861 to: str_type,
26862 double_colon_syntax: false,
26863 trailing_comments: Vec::new(),
26864 format: None,
26865 default: None,
26866 inferred_type: None,
26867 }));
26868 Ok(Expression::Substring(Box::new(
26869 crate::expressions::SubstringFunc {
26870 this: cast_expr,
26871 start: Expression::number(1),
26872 length: Some(Expression::number(10)),
26873 from_for_syntax: false,
26874 },
26875 )))
26876 } else {
26877 Ok(e)
26878 }
26879 }
26880
26881 Action::DateStrToDateConvert => {
26882 // DATE_STR_TO_DATE(x) -> dialect-specific
26883 if let Expression::Function(f) = e {
26884 let arg = f.args.into_iter().next().unwrap();
26885 match target {
26886 DialectType::SQLite => {
26887 // SQLite: just the bare expression (dates are strings)
26888 Ok(arg)
26889 }
26890 _ => Ok(Expression::Cast(Box::new(Cast {
26891 this: arg,
26892 to: DataType::Date,
26893 double_colon_syntax: false,
26894 trailing_comments: Vec::new(),
26895 format: None,
26896 default: None,
26897 inferred_type: None,
26898 }))),
26899 }
26900 } else {
26901 Ok(e)
26902 }
26903 }
26904
26905 Action::TimeStrToDateConvert => {
26906 // TIME_STR_TO_DATE(x) -> dialect-specific
26907 if let Expression::Function(f) = e {
26908 let arg = f.args.into_iter().next().unwrap();
26909 match target {
26910 DialectType::Hive
26911 | DialectType::Doris
26912 | DialectType::StarRocks
26913 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
26914 Function::new("TO_DATE".to_string(), vec![arg]),
26915 ))),
26916 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26917 // Presto: CAST(x AS TIMESTAMP)
26918 Ok(Expression::Cast(Box::new(Cast {
26919 this: arg,
26920 to: DataType::Timestamp {
26921 timezone: false,
26922 precision: None,
26923 },
26924 double_colon_syntax: false,
26925 trailing_comments: Vec::new(),
26926 format: None,
26927 default: None,
26928 inferred_type: None,
26929 })))
26930 }
26931 _ => {
26932 // Default: CAST(x AS DATE)
26933 Ok(Expression::Cast(Box::new(Cast {
26934 this: arg,
26935 to: DataType::Date,
26936 double_colon_syntax: false,
26937 trailing_comments: Vec::new(),
26938 format: None,
26939 default: None,
26940 inferred_type: None,
26941 })))
26942 }
26943 }
26944 } else {
26945 Ok(e)
26946 }
26947 }
26948
26949 Action::TimeStrToTimeConvert => {
26950 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
26951 if let Expression::Function(f) = e {
26952 let mut args = f.args;
26953 let this = args.remove(0);
26954 let zone = if !args.is_empty() {
26955 match &args[0] {
26956 Expression::Literal(lit)
26957 if matches!(lit.as_ref(), Literal::String(_)) =>
26958 {
26959 let Literal::String(s) = lit.as_ref() else {
26960 unreachable!()
26961 };
26962 Some(s.clone())
26963 }
26964 _ => None,
26965 }
26966 } else {
26967 None
26968 };
26969 let has_zone = zone.is_some();
26970
26971 match target {
26972 DialectType::SQLite => {
26973 // SQLite: just the bare expression
26974 Ok(this)
26975 }
26976 DialectType::MySQL => {
26977 if has_zone {
26978 // MySQL with zone: TIMESTAMP(x)
26979 Ok(Expression::Function(Box::new(Function::new(
26980 "TIMESTAMP".to_string(),
26981 vec![this],
26982 ))))
26983 } else {
26984 // MySQL: CAST(x AS DATETIME) or with precision
26985 // Use DataType::Custom to avoid MySQL's transform_cast converting
26986 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
26987 let precision = if let Expression::Literal(ref lit) = this {
26988 if let Literal::String(ref s) = lit.as_ref() {
26989 if let Some(dot_pos) = s.rfind('.') {
26990 let frac = &s[dot_pos + 1..];
26991 let digit_count = frac
26992 .chars()
26993 .take_while(|c| c.is_ascii_digit())
26994 .count();
26995 if digit_count > 0 {
26996 Some(digit_count)
26997 } else {
26998 None
26999 }
27000 } else {
27001 None
27002 }
27003 } else {
27004 None
27005 }
27006 } else {
27007 None
27008 };
27009 let type_name = match precision {
27010 Some(p) => format!("DATETIME({})", p),
27011 None => "DATETIME".to_string(),
27012 };
27013 Ok(Expression::Cast(Box::new(Cast {
27014 this,
27015 to: DataType::Custom { name: type_name },
27016 double_colon_syntax: false,
27017 trailing_comments: Vec::new(),
27018 format: None,
27019 default: None,
27020 inferred_type: None,
27021 })))
27022 }
27023 }
27024 DialectType::ClickHouse => {
27025 if has_zone {
27026 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
27027 // We need to strip the timezone offset from the literal if present
27028 let clean_this = if let Expression::Literal(ref lit) = this {
27029 if let Literal::String(ref s) = lit.as_ref() {
27030 // Strip timezone offset like "-08:00" or "+00:00"
27031 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
27032 if let Some(offset_pos) = re_offset {
27033 if offset_pos > 10 {
27034 // After the date part
27035 let trimmed = s[..offset_pos].to_string();
27036 Expression::Literal(Box::new(Literal::String(
27037 trimmed,
27038 )))
27039 } else {
27040 this.clone()
27041 }
27042 } else {
27043 this.clone()
27044 }
27045 } else {
27046 this.clone()
27047 }
27048 } else {
27049 this.clone()
27050 };
27051 let zone_str = zone.unwrap();
27052 // Build: CAST(x AS DateTime64(6, 'zone'))
27053 let type_name = format!("DateTime64(6, '{}')", zone_str);
27054 Ok(Expression::Cast(Box::new(Cast {
27055 this: clean_this,
27056 to: DataType::Custom { name: type_name },
27057 double_colon_syntax: false,
27058 trailing_comments: Vec::new(),
27059 format: None,
27060 default: None,
27061 inferred_type: None,
27062 })))
27063 } else {
27064 Ok(Expression::Cast(Box::new(Cast {
27065 this,
27066 to: DataType::Custom {
27067 name: "DateTime64(6)".to_string(),
27068 },
27069 double_colon_syntax: false,
27070 trailing_comments: Vec::new(),
27071 format: None,
27072 default: None,
27073 inferred_type: None,
27074 })))
27075 }
27076 }
27077 DialectType::BigQuery => {
27078 if has_zone {
27079 // BigQuery with zone: CAST(x AS TIMESTAMP)
27080 Ok(Expression::Cast(Box::new(Cast {
27081 this,
27082 to: DataType::Timestamp {
27083 timezone: false,
27084 precision: None,
27085 },
27086 double_colon_syntax: false,
27087 trailing_comments: Vec::new(),
27088 format: None,
27089 default: None,
27090 inferred_type: None,
27091 })))
27092 } else {
27093 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
27094 Ok(Expression::Cast(Box::new(Cast {
27095 this,
27096 to: DataType::Custom {
27097 name: "DATETIME".to_string(),
27098 },
27099 double_colon_syntax: false,
27100 trailing_comments: Vec::new(),
27101 format: None,
27102 default: None,
27103 inferred_type: None,
27104 })))
27105 }
27106 }
27107 DialectType::Doris => {
27108 // Doris: CAST(x AS DATETIME)
27109 Ok(Expression::Cast(Box::new(Cast {
27110 this,
27111 to: DataType::Custom {
27112 name: "DATETIME".to_string(),
27113 },
27114 double_colon_syntax: false,
27115 trailing_comments: Vec::new(),
27116 format: None,
27117 default: None,
27118 inferred_type: None,
27119 })))
27120 }
27121 DialectType::TSQL | DialectType::Fabric => {
27122 if has_zone {
27123 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
27124 let cast_expr = Expression::Cast(Box::new(Cast {
27125 this,
27126 to: DataType::Custom {
27127 name: "DATETIMEOFFSET".to_string(),
27128 },
27129 double_colon_syntax: false,
27130 trailing_comments: Vec::new(),
27131 format: None,
27132 default: None,
27133 inferred_type: None,
27134 }));
27135 Ok(Expression::AtTimeZone(Box::new(
27136 crate::expressions::AtTimeZone {
27137 this: cast_expr,
27138 zone: Expression::Literal(Box::new(Literal::String(
27139 "UTC".to_string(),
27140 ))),
27141 },
27142 )))
27143 } else {
27144 // TSQL: CAST(x AS DATETIME2)
27145 Ok(Expression::Cast(Box::new(Cast {
27146 this,
27147 to: DataType::Custom {
27148 name: "DATETIME2".to_string(),
27149 },
27150 double_colon_syntax: false,
27151 trailing_comments: Vec::new(),
27152 format: None,
27153 default: None,
27154 inferred_type: None,
27155 })))
27156 }
27157 }
27158 DialectType::DuckDB => {
27159 if has_zone {
27160 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
27161 Ok(Expression::Cast(Box::new(Cast {
27162 this,
27163 to: DataType::Timestamp {
27164 timezone: true,
27165 precision: None,
27166 },
27167 double_colon_syntax: false,
27168 trailing_comments: Vec::new(),
27169 format: None,
27170 default: None,
27171 inferred_type: None,
27172 })))
27173 } else {
27174 // DuckDB: CAST(x AS TIMESTAMP)
27175 Ok(Expression::Cast(Box::new(Cast {
27176 this,
27177 to: DataType::Timestamp {
27178 timezone: false,
27179 precision: None,
27180 },
27181 double_colon_syntax: false,
27182 trailing_comments: Vec::new(),
27183 format: None,
27184 default: None,
27185 inferred_type: None,
27186 })))
27187 }
27188 }
27189 DialectType::PostgreSQL
27190 | DialectType::Materialize
27191 | DialectType::RisingWave => {
27192 if has_zone {
27193 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
27194 Ok(Expression::Cast(Box::new(Cast {
27195 this,
27196 to: DataType::Timestamp {
27197 timezone: true,
27198 precision: None,
27199 },
27200 double_colon_syntax: false,
27201 trailing_comments: Vec::new(),
27202 format: None,
27203 default: None,
27204 inferred_type: None,
27205 })))
27206 } else {
27207 // PostgreSQL: CAST(x AS TIMESTAMP)
27208 Ok(Expression::Cast(Box::new(Cast {
27209 this,
27210 to: DataType::Timestamp {
27211 timezone: false,
27212 precision: None,
27213 },
27214 double_colon_syntax: false,
27215 trailing_comments: Vec::new(),
27216 format: None,
27217 default: None,
27218 inferred_type: None,
27219 })))
27220 }
27221 }
27222 DialectType::Snowflake => {
27223 if has_zone {
27224 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
27225 Ok(Expression::Cast(Box::new(Cast {
27226 this,
27227 to: DataType::Timestamp {
27228 timezone: true,
27229 precision: None,
27230 },
27231 double_colon_syntax: false,
27232 trailing_comments: Vec::new(),
27233 format: None,
27234 default: None,
27235 inferred_type: None,
27236 })))
27237 } else {
27238 // Snowflake: CAST(x AS TIMESTAMP)
27239 Ok(Expression::Cast(Box::new(Cast {
27240 this,
27241 to: DataType::Timestamp {
27242 timezone: false,
27243 precision: None,
27244 },
27245 double_colon_syntax: false,
27246 trailing_comments: Vec::new(),
27247 format: None,
27248 default: None,
27249 inferred_type: None,
27250 })))
27251 }
27252 }
27253 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27254 if has_zone {
27255 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
27256 // Check for precision from sub-second digits
27257 let precision = if let Expression::Literal(ref lit) = this {
27258 if let Literal::String(ref s) = lit.as_ref() {
27259 if let Some(dot_pos) = s.rfind('.') {
27260 let frac = &s[dot_pos + 1..];
27261 let digit_count = frac
27262 .chars()
27263 .take_while(|c| c.is_ascii_digit())
27264 .count();
27265 if digit_count > 0
27266 && matches!(target, DialectType::Trino)
27267 {
27268 Some(digit_count as u32)
27269 } else {
27270 None
27271 }
27272 } else {
27273 None
27274 }
27275 } else {
27276 None
27277 }
27278 } else {
27279 None
27280 };
27281 let dt = if let Some(prec) = precision {
27282 DataType::Timestamp {
27283 timezone: true,
27284 precision: Some(prec),
27285 }
27286 } else {
27287 DataType::Timestamp {
27288 timezone: true,
27289 precision: None,
27290 }
27291 };
27292 Ok(Expression::Cast(Box::new(Cast {
27293 this,
27294 to: dt,
27295 double_colon_syntax: false,
27296 trailing_comments: Vec::new(),
27297 format: None,
27298 default: None,
27299 inferred_type: None,
27300 })))
27301 } else {
27302 // Check for sub-second precision for Trino
27303 let precision = if let Expression::Literal(ref lit) = this {
27304 if let Literal::String(ref s) = lit.as_ref() {
27305 if let Some(dot_pos) = s.rfind('.') {
27306 let frac = &s[dot_pos + 1..];
27307 let digit_count = frac
27308 .chars()
27309 .take_while(|c| c.is_ascii_digit())
27310 .count();
27311 if digit_count > 0
27312 && matches!(target, DialectType::Trino)
27313 {
27314 Some(digit_count as u32)
27315 } else {
27316 None
27317 }
27318 } else {
27319 None
27320 }
27321 } else {
27322 None
27323 }
27324 } else {
27325 None
27326 };
27327 let dt = DataType::Timestamp {
27328 timezone: false,
27329 precision,
27330 };
27331 Ok(Expression::Cast(Box::new(Cast {
27332 this,
27333 to: dt,
27334 double_colon_syntax: false,
27335 trailing_comments: Vec::new(),
27336 format: None,
27337 default: None,
27338 inferred_type: None,
27339 })))
27340 }
27341 }
27342 DialectType::Redshift => {
27343 if has_zone {
27344 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
27345 Ok(Expression::Cast(Box::new(Cast {
27346 this,
27347 to: DataType::Timestamp {
27348 timezone: true,
27349 precision: None,
27350 },
27351 double_colon_syntax: false,
27352 trailing_comments: Vec::new(),
27353 format: None,
27354 default: None,
27355 inferred_type: None,
27356 })))
27357 } else {
27358 // Redshift: CAST(x AS TIMESTAMP)
27359 Ok(Expression::Cast(Box::new(Cast {
27360 this,
27361 to: DataType::Timestamp {
27362 timezone: false,
27363 precision: None,
27364 },
27365 double_colon_syntax: false,
27366 trailing_comments: Vec::new(),
27367 format: None,
27368 default: None,
27369 inferred_type: None,
27370 })))
27371 }
27372 }
27373 _ => {
27374 // Default: CAST(x AS TIMESTAMP)
27375 Ok(Expression::Cast(Box::new(Cast {
27376 this,
27377 to: DataType::Timestamp {
27378 timezone: false,
27379 precision: None,
27380 },
27381 double_colon_syntax: false,
27382 trailing_comments: Vec::new(),
27383 format: None,
27384 default: None,
27385 inferred_type: None,
27386 })))
27387 }
27388 }
27389 } else {
27390 Ok(e)
27391 }
27392 }
27393
27394 Action::DateToDateStrConvert => {
27395 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
27396 if let Expression::Function(f) = e {
27397 let arg = f.args.into_iter().next().unwrap();
27398 let str_type = match target {
27399 DialectType::DuckDB => DataType::Text,
27400 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27401 DataType::Custom {
27402 name: "STRING".to_string(),
27403 }
27404 }
27405 DialectType::Presto
27406 | DialectType::Trino
27407 | DialectType::Athena
27408 | DialectType::Drill => DataType::VarChar {
27409 length: None,
27410 parenthesized_length: false,
27411 },
27412 _ => DataType::VarChar {
27413 length: None,
27414 parenthesized_length: false,
27415 },
27416 };
27417 Ok(Expression::Cast(Box::new(Cast {
27418 this: arg,
27419 to: str_type,
27420 double_colon_syntax: false,
27421 trailing_comments: Vec::new(),
27422 format: None,
27423 default: None,
27424 inferred_type: None,
27425 })))
27426 } else {
27427 Ok(e)
27428 }
27429 }
27430
27431 Action::DateToDiConvert => {
27432 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
27433 if let Expression::Function(f) = e {
27434 let arg = f.args.into_iter().next().unwrap();
27435 let inner = match target {
27436 DialectType::DuckDB => {
27437 // STRFTIME(x, '%Y%m%d')
27438 Expression::Function(Box::new(Function::new(
27439 "STRFTIME".to_string(),
27440 vec![arg, Expression::string("%Y%m%d")],
27441 )))
27442 }
27443 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27444 // DATE_FORMAT(x, 'yyyyMMdd')
27445 Expression::Function(Box::new(Function::new(
27446 "DATE_FORMAT".to_string(),
27447 vec![arg, Expression::string("yyyyMMdd")],
27448 )))
27449 }
27450 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27451 // DATE_FORMAT(x, '%Y%m%d')
27452 Expression::Function(Box::new(Function::new(
27453 "DATE_FORMAT".to_string(),
27454 vec![arg, Expression::string("%Y%m%d")],
27455 )))
27456 }
27457 DialectType::Drill => {
27458 // TO_DATE(x, 'yyyyMMdd')
27459 Expression::Function(Box::new(Function::new(
27460 "TO_DATE".to_string(),
27461 vec![arg, Expression::string("yyyyMMdd")],
27462 )))
27463 }
27464 _ => {
27465 // Default: STRFTIME(x, '%Y%m%d')
27466 Expression::Function(Box::new(Function::new(
27467 "STRFTIME".to_string(),
27468 vec![arg, Expression::string("%Y%m%d")],
27469 )))
27470 }
27471 };
27472 // Use INT (not INTEGER) for Presto/Trino
27473 let int_type = match target {
27474 DialectType::Presto
27475 | DialectType::Trino
27476 | DialectType::Athena
27477 | DialectType::TSQL
27478 | DialectType::Fabric
27479 | DialectType::SQLite
27480 | DialectType::Redshift => DataType::Custom {
27481 name: "INT".to_string(),
27482 },
27483 _ => DataType::Int {
27484 length: None,
27485 integer_spelling: false,
27486 },
27487 };
27488 Ok(Expression::Cast(Box::new(Cast {
27489 this: inner,
27490 to: int_type,
27491 double_colon_syntax: false,
27492 trailing_comments: Vec::new(),
27493 format: None,
27494 default: None,
27495 inferred_type: None,
27496 })))
27497 } else {
27498 Ok(e)
27499 }
27500 }
27501
27502 Action::DiToDateConvert => {
27503 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
27504 if let Expression::Function(f) = e {
27505 let arg = f.args.into_iter().next().unwrap();
27506 match target {
27507 DialectType::DuckDB => {
27508 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
27509 let cast_text = Expression::Cast(Box::new(Cast {
27510 this: arg,
27511 to: DataType::Text,
27512 double_colon_syntax: false,
27513 trailing_comments: Vec::new(),
27514 format: None,
27515 default: None,
27516 inferred_type: None,
27517 }));
27518 let strptime = Expression::Function(Box::new(Function::new(
27519 "STRPTIME".to_string(),
27520 vec![cast_text, Expression::string("%Y%m%d")],
27521 )));
27522 Ok(Expression::Cast(Box::new(Cast {
27523 this: strptime,
27524 to: DataType::Date,
27525 double_colon_syntax: false,
27526 trailing_comments: Vec::new(),
27527 format: None,
27528 default: None,
27529 inferred_type: None,
27530 })))
27531 }
27532 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27533 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
27534 let cast_str = Expression::Cast(Box::new(Cast {
27535 this: arg,
27536 to: DataType::Custom {
27537 name: "STRING".to_string(),
27538 },
27539 double_colon_syntax: false,
27540 trailing_comments: Vec::new(),
27541 format: None,
27542 default: None,
27543 inferred_type: None,
27544 }));
27545 Ok(Expression::Function(Box::new(Function::new(
27546 "TO_DATE".to_string(),
27547 vec![cast_str, Expression::string("yyyyMMdd")],
27548 ))))
27549 }
27550 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27551 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
27552 let cast_varchar = Expression::Cast(Box::new(Cast {
27553 this: arg,
27554 to: DataType::VarChar {
27555 length: None,
27556 parenthesized_length: false,
27557 },
27558 double_colon_syntax: false,
27559 trailing_comments: Vec::new(),
27560 format: None,
27561 default: None,
27562 inferred_type: None,
27563 }));
27564 let date_parse = Expression::Function(Box::new(Function::new(
27565 "DATE_PARSE".to_string(),
27566 vec![cast_varchar, Expression::string("%Y%m%d")],
27567 )));
27568 Ok(Expression::Cast(Box::new(Cast {
27569 this: date_parse,
27570 to: DataType::Date,
27571 double_colon_syntax: false,
27572 trailing_comments: Vec::new(),
27573 format: None,
27574 default: None,
27575 inferred_type: None,
27576 })))
27577 }
27578 DialectType::Drill => {
27579 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
27580 let cast_varchar = Expression::Cast(Box::new(Cast {
27581 this: arg,
27582 to: DataType::VarChar {
27583 length: None,
27584 parenthesized_length: false,
27585 },
27586 double_colon_syntax: false,
27587 trailing_comments: Vec::new(),
27588 format: None,
27589 default: None,
27590 inferred_type: None,
27591 }));
27592 Ok(Expression::Function(Box::new(Function::new(
27593 "TO_DATE".to_string(),
27594 vec![cast_varchar, Expression::string("yyyyMMdd")],
27595 ))))
27596 }
27597 _ => Ok(Expression::Function(Box::new(Function::new(
27598 "DI_TO_DATE".to_string(),
27599 vec![arg],
27600 )))),
27601 }
27602 } else {
27603 Ok(e)
27604 }
27605 }
27606
27607 Action::TsOrDiToDiConvert => {
27608 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
27609 if let Expression::Function(f) = e {
27610 let arg = f.args.into_iter().next().unwrap();
27611 let str_type = match target {
27612 DialectType::DuckDB => DataType::Text,
27613 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27614 DataType::Custom {
27615 name: "STRING".to_string(),
27616 }
27617 }
27618 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27619 DataType::VarChar {
27620 length: None,
27621 parenthesized_length: false,
27622 }
27623 }
27624 _ => DataType::VarChar {
27625 length: None,
27626 parenthesized_length: false,
27627 },
27628 };
27629 let cast_str = Expression::Cast(Box::new(Cast {
27630 this: arg,
27631 to: str_type,
27632 double_colon_syntax: false,
27633 trailing_comments: Vec::new(),
27634 format: None,
27635 default: None,
27636 inferred_type: None,
27637 }));
27638 let replace_expr = Expression::Function(Box::new(Function::new(
27639 "REPLACE".to_string(),
27640 vec![cast_str, Expression::string("-"), Expression::string("")],
27641 )));
27642 let substr_name = match target {
27643 DialectType::DuckDB
27644 | DialectType::Hive
27645 | DialectType::Spark
27646 | DialectType::Databricks => "SUBSTR",
27647 _ => "SUBSTR",
27648 };
27649 let substr = Expression::Function(Box::new(Function::new(
27650 substr_name.to_string(),
27651 vec![replace_expr, Expression::number(1), Expression::number(8)],
27652 )));
27653 // Use INT (not INTEGER) for Presto/Trino etc.
27654 let int_type = match target {
27655 DialectType::Presto
27656 | DialectType::Trino
27657 | DialectType::Athena
27658 | DialectType::TSQL
27659 | DialectType::Fabric
27660 | DialectType::SQLite
27661 | DialectType::Redshift => DataType::Custom {
27662 name: "INT".to_string(),
27663 },
27664 _ => DataType::Int {
27665 length: None,
27666 integer_spelling: false,
27667 },
27668 };
27669 Ok(Expression::Cast(Box::new(Cast {
27670 this: substr,
27671 to: int_type,
27672 double_colon_syntax: false,
27673 trailing_comments: Vec::new(),
27674 format: None,
27675 default: None,
27676 inferred_type: None,
27677 })))
27678 } else {
27679 Ok(e)
27680 }
27681 }
27682
27683 Action::UnixToStrConvert => {
27684 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
27685 if let Expression::Function(f) = e {
27686 let mut args = f.args;
27687 let this = args.remove(0);
27688 let fmt_expr = if !args.is_empty() {
27689 Some(args.remove(0))
27690 } else {
27691 None
27692 };
27693
27694 // Check if format is a string literal
27695 let fmt_str = fmt_expr.as_ref().and_then(|f| {
27696 if let Expression::Literal(lit) = f {
27697 if let Literal::String(s) = lit.as_ref() {
27698 Some(s.clone())
27699 } else {
27700 None
27701 }
27702 } else {
27703 None
27704 }
27705 });
27706
27707 if let Some(fmt_string) = fmt_str {
27708 // String literal format -> use UnixToStr expression (generator handles it)
27709 Ok(Expression::UnixToStr(Box::new(
27710 crate::expressions::UnixToStr {
27711 this: Box::new(this),
27712 format: Some(fmt_string),
27713 },
27714 )))
27715 } else if let Some(fmt_e) = fmt_expr {
27716 // Non-literal format (e.g., identifier `y`) -> build target expression directly
27717 match target {
27718 DialectType::DuckDB => {
27719 // STRFTIME(TO_TIMESTAMP(x), y)
27720 let to_ts = Expression::Function(Box::new(Function::new(
27721 "TO_TIMESTAMP".to_string(),
27722 vec![this],
27723 )));
27724 Ok(Expression::Function(Box::new(Function::new(
27725 "STRFTIME".to_string(),
27726 vec![to_ts, fmt_e],
27727 ))))
27728 }
27729 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27730 // DATE_FORMAT(FROM_UNIXTIME(x), y)
27731 let from_unix = Expression::Function(Box::new(Function::new(
27732 "FROM_UNIXTIME".to_string(),
27733 vec![this],
27734 )));
27735 Ok(Expression::Function(Box::new(Function::new(
27736 "DATE_FORMAT".to_string(),
27737 vec![from_unix, fmt_e],
27738 ))))
27739 }
27740 DialectType::Hive
27741 | DialectType::Spark
27742 | DialectType::Databricks
27743 | DialectType::Doris
27744 | DialectType::StarRocks => {
27745 // FROM_UNIXTIME(x, y)
27746 Ok(Expression::Function(Box::new(Function::new(
27747 "FROM_UNIXTIME".to_string(),
27748 vec![this, fmt_e],
27749 ))))
27750 }
27751 _ => {
27752 // Default: keep as UNIX_TO_STR(x, y)
27753 Ok(Expression::Function(Box::new(Function::new(
27754 "UNIX_TO_STR".to_string(),
27755 vec![this, fmt_e],
27756 ))))
27757 }
27758 }
27759 } else {
27760 Ok(Expression::UnixToStr(Box::new(
27761 crate::expressions::UnixToStr {
27762 this: Box::new(this),
27763 format: None,
27764 },
27765 )))
27766 }
27767 } else {
27768 Ok(e)
27769 }
27770 }
27771
27772 Action::UnixToTimeConvert => {
27773 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
27774 if let Expression::Function(f) = e {
27775 let arg = f.args.into_iter().next().unwrap();
27776 Ok(Expression::UnixToTime(Box::new(
27777 crate::expressions::UnixToTime {
27778 this: Box::new(arg),
27779 scale: None,
27780 zone: None,
27781 hours: None,
27782 minutes: None,
27783 format: None,
27784 target_type: None,
27785 },
27786 )))
27787 } else {
27788 Ok(e)
27789 }
27790 }
27791
27792 Action::UnixToTimeStrConvert => {
27793 // UNIX_TO_TIME_STR(x) -> dialect-specific
27794 if let Expression::Function(f) = e {
27795 let arg = f.args.into_iter().next().unwrap();
27796 match target {
27797 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27798 // FROM_UNIXTIME(x)
27799 Ok(Expression::Function(Box::new(Function::new(
27800 "FROM_UNIXTIME".to_string(),
27801 vec![arg],
27802 ))))
27803 }
27804 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27805 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
27806 let from_unix = Expression::Function(Box::new(Function::new(
27807 "FROM_UNIXTIME".to_string(),
27808 vec![arg],
27809 )));
27810 Ok(Expression::Cast(Box::new(Cast {
27811 this: from_unix,
27812 to: DataType::VarChar {
27813 length: None,
27814 parenthesized_length: false,
27815 },
27816 double_colon_syntax: false,
27817 trailing_comments: Vec::new(),
27818 format: None,
27819 default: None,
27820 inferred_type: None,
27821 })))
27822 }
27823 DialectType::DuckDB => {
27824 // CAST(TO_TIMESTAMP(x) AS TEXT)
27825 let to_ts = Expression::Function(Box::new(Function::new(
27826 "TO_TIMESTAMP".to_string(),
27827 vec![arg],
27828 )));
27829 Ok(Expression::Cast(Box::new(Cast {
27830 this: to_ts,
27831 to: DataType::Text,
27832 double_colon_syntax: false,
27833 trailing_comments: Vec::new(),
27834 format: None,
27835 default: None,
27836 inferred_type: None,
27837 })))
27838 }
27839 _ => Ok(Expression::Function(Box::new(Function::new(
27840 "UNIX_TO_TIME_STR".to_string(),
27841 vec![arg],
27842 )))),
27843 }
27844 } else {
27845 Ok(e)
27846 }
27847 }
27848
27849 Action::TimeToUnixConvert => {
27850 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
27851 if let Expression::Function(f) = e {
27852 let arg = f.args.into_iter().next().unwrap();
27853 Ok(Expression::TimeToUnix(Box::new(
27854 crate::expressions::UnaryFunc {
27855 this: arg,
27856 original_name: None,
27857 inferred_type: None,
27858 },
27859 )))
27860 } else {
27861 Ok(e)
27862 }
27863 }
27864
27865 Action::TimeToStrConvert => {
27866 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
27867 if let Expression::Function(f) = e {
27868 let mut args = f.args;
27869 let this = args.remove(0);
27870 let fmt = match args.remove(0) {
27871 Expression::Literal(lit)
27872 if matches!(lit.as_ref(), Literal::String(_)) =>
27873 {
27874 let Literal::String(s) = lit.as_ref() else {
27875 unreachable!()
27876 };
27877 s.clone()
27878 }
27879 other => {
27880 return Ok(Expression::Function(Box::new(Function::new(
27881 "TIME_TO_STR".to_string(),
27882 vec![this, other],
27883 ))));
27884 }
27885 };
27886 Ok(Expression::TimeToStr(Box::new(
27887 crate::expressions::TimeToStr {
27888 this: Box::new(this),
27889 format: fmt,
27890 culture: None,
27891 zone: None,
27892 },
27893 )))
27894 } else {
27895 Ok(e)
27896 }
27897 }
27898
27899 Action::StrToUnixConvert => {
27900 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
27901 if let Expression::Function(f) = e {
27902 let mut args = f.args;
27903 let this = args.remove(0);
27904 let fmt = match args.remove(0) {
27905 Expression::Literal(lit)
27906 if matches!(lit.as_ref(), Literal::String(_)) =>
27907 {
27908 let Literal::String(s) = lit.as_ref() else {
27909 unreachable!()
27910 };
27911 s.clone()
27912 }
27913 other => {
27914 return Ok(Expression::Function(Box::new(Function::new(
27915 "STR_TO_UNIX".to_string(),
27916 vec![this, other],
27917 ))));
27918 }
27919 };
27920 Ok(Expression::StrToUnix(Box::new(
27921 crate::expressions::StrToUnix {
27922 this: Some(Box::new(this)),
27923 format: Some(fmt),
27924 },
27925 )))
27926 } else {
27927 Ok(e)
27928 }
27929 }
27930
27931 Action::TimeStrToUnixConvert => {
27932 // TIME_STR_TO_UNIX(x) -> dialect-specific
27933 if let Expression::Function(f) = e {
27934 let arg = f.args.into_iter().next().unwrap();
27935 match target {
27936 DialectType::DuckDB => {
27937 // EPOCH(CAST(x AS TIMESTAMP))
27938 let cast_ts = Expression::Cast(Box::new(Cast {
27939 this: arg,
27940 to: DataType::Timestamp {
27941 timezone: false,
27942 precision: None,
27943 },
27944 double_colon_syntax: false,
27945 trailing_comments: Vec::new(),
27946 format: None,
27947 default: None,
27948 inferred_type: None,
27949 }));
27950 Ok(Expression::Function(Box::new(Function::new(
27951 "EPOCH".to_string(),
27952 vec![cast_ts],
27953 ))))
27954 }
27955 DialectType::Hive
27956 | DialectType::Doris
27957 | DialectType::StarRocks
27958 | DialectType::MySQL => {
27959 // UNIX_TIMESTAMP(x)
27960 Ok(Expression::Function(Box::new(Function::new(
27961 "UNIX_TIMESTAMP".to_string(),
27962 vec![arg],
27963 ))))
27964 }
27965 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27966 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
27967 let date_parse = Expression::Function(Box::new(Function::new(
27968 "DATE_PARSE".to_string(),
27969 vec![arg, Expression::string("%Y-%m-%d %T")],
27970 )));
27971 Ok(Expression::Function(Box::new(Function::new(
27972 "TO_UNIXTIME".to_string(),
27973 vec![date_parse],
27974 ))))
27975 }
27976 _ => Ok(Expression::Function(Box::new(Function::new(
27977 "TIME_STR_TO_UNIX".to_string(),
27978 vec![arg],
27979 )))),
27980 }
27981 } else {
27982 Ok(e)
27983 }
27984 }
27985
27986 Action::TimeToTimeStrConvert => {
27987 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
27988 if let Expression::Function(f) = e {
27989 let arg = f.args.into_iter().next().unwrap();
27990 let str_type = match target {
27991 DialectType::DuckDB => DataType::Text,
27992 DialectType::Hive
27993 | DialectType::Spark
27994 | DialectType::Databricks
27995 | DialectType::Doris
27996 | DialectType::StarRocks => DataType::Custom {
27997 name: "STRING".to_string(),
27998 },
27999 DialectType::Redshift => DataType::Custom {
28000 name: "VARCHAR(MAX)".to_string(),
28001 },
28002 _ => DataType::VarChar {
28003 length: None,
28004 parenthesized_length: false,
28005 },
28006 };
28007 Ok(Expression::Cast(Box::new(Cast {
28008 this: arg,
28009 to: str_type,
28010 double_colon_syntax: false,
28011 trailing_comments: Vec::new(),
28012 format: None,
28013 default: None,
28014 inferred_type: None,
28015 })))
28016 } else {
28017 Ok(e)
28018 }
28019 }
28020
28021 Action::DateTruncSwapArgs => {
28022 // DATE_TRUNC('unit', x) from Generic -> target-specific
28023 if let Expression::Function(f) = e {
28024 if f.args.len() == 2 {
28025 let unit_arg = f.args[0].clone();
28026 let expr_arg = f.args[1].clone();
28027 // Extract unit string from the first arg
28028 let unit_str = match &unit_arg {
28029 Expression::Literal(lit)
28030 if matches!(lit.as_ref(), Literal::String(_)) =>
28031 {
28032 let Literal::String(s) = lit.as_ref() else {
28033 unreachable!()
28034 };
28035 s.to_ascii_uppercase()
28036 }
28037 _ => return Ok(Expression::Function(f)),
28038 };
28039 match target {
28040 DialectType::BigQuery => {
28041 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
28042 let unit_ident =
28043 Expression::Column(Box::new(crate::expressions::Column {
28044 name: crate::expressions::Identifier::new(unit_str),
28045 table: None,
28046 join_mark: false,
28047 trailing_comments: Vec::new(),
28048 span: None,
28049 inferred_type: None,
28050 }));
28051 Ok(Expression::Function(Box::new(Function::new(
28052 "DATE_TRUNC".to_string(),
28053 vec![expr_arg, unit_ident],
28054 ))))
28055 }
28056 DialectType::Doris => {
28057 // Doris: DATE_TRUNC(x, 'UNIT')
28058 Ok(Expression::Function(Box::new(Function::new(
28059 "DATE_TRUNC".to_string(),
28060 vec![expr_arg, Expression::string(&unit_str)],
28061 ))))
28062 }
28063 DialectType::StarRocks => {
28064 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
28065 Ok(Expression::Function(Box::new(Function::new(
28066 "DATE_TRUNC".to_string(),
28067 vec![Expression::string(&unit_str), expr_arg],
28068 ))))
28069 }
28070 DialectType::Spark | DialectType::Databricks => {
28071 // Spark: TRUNC(x, 'UNIT')
28072 Ok(Expression::Function(Box::new(Function::new(
28073 "TRUNC".to_string(),
28074 vec![expr_arg, Expression::string(&unit_str)],
28075 ))))
28076 }
28077 DialectType::MySQL => {
28078 // MySQL: complex expansion based on unit
28079 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
28080 }
28081 _ => Ok(Expression::Function(f)),
28082 }
28083 } else {
28084 Ok(Expression::Function(f))
28085 }
28086 } else {
28087 Ok(e)
28088 }
28089 }
28090
28091 Action::TimestampTruncConvert => {
28092 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
28093 if let Expression::Function(f) = e {
28094 if f.args.len() >= 2 {
28095 let expr_arg = f.args[0].clone();
28096 let unit_arg = f.args[1].clone();
28097 let tz_arg = if f.args.len() >= 3 {
28098 Some(f.args[2].clone())
28099 } else {
28100 None
28101 };
28102 // Extract unit string
28103 let unit_str = match &unit_arg {
28104 Expression::Literal(lit)
28105 if matches!(lit.as_ref(), Literal::String(_)) =>
28106 {
28107 let Literal::String(s) = lit.as_ref() else {
28108 unreachable!()
28109 };
28110 s.to_ascii_uppercase()
28111 }
28112 Expression::Column(c) => c.name.name.to_ascii_uppercase(),
28113 _ => {
28114 return Ok(Expression::Function(f));
28115 }
28116 };
28117 match target {
28118 DialectType::Spark | DialectType::Databricks => {
28119 // Spark: DATE_TRUNC('UNIT', x)
28120 Ok(Expression::Function(Box::new(Function::new(
28121 "DATE_TRUNC".to_string(),
28122 vec![Expression::string(&unit_str), expr_arg],
28123 ))))
28124 }
28125 DialectType::Doris | DialectType::StarRocks => {
28126 // Doris: DATE_TRUNC(x, 'UNIT')
28127 Ok(Expression::Function(Box::new(Function::new(
28128 "DATE_TRUNC".to_string(),
28129 vec![expr_arg, Expression::string(&unit_str)],
28130 ))))
28131 }
28132 DialectType::BigQuery => {
28133 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
28134 let unit_ident =
28135 Expression::Column(Box::new(crate::expressions::Column {
28136 name: crate::expressions::Identifier::new(unit_str),
28137 table: None,
28138 join_mark: false,
28139 trailing_comments: Vec::new(),
28140 span: None,
28141 inferred_type: None,
28142 }));
28143 let mut args = vec![expr_arg, unit_ident];
28144 if let Some(tz) = tz_arg {
28145 args.push(tz);
28146 }
28147 Ok(Expression::Function(Box::new(Function::new(
28148 "TIMESTAMP_TRUNC".to_string(),
28149 args,
28150 ))))
28151 }
28152 DialectType::DuckDB => {
28153 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
28154 if let Some(tz) = tz_arg {
28155 let tz_str = match &tz {
28156 Expression::Literal(lit)
28157 if matches!(lit.as_ref(), Literal::String(_)) =>
28158 {
28159 let Literal::String(s) = lit.as_ref() else {
28160 unreachable!()
28161 };
28162 s.clone()
28163 }
28164 _ => "UTC".to_string(),
28165 };
28166 // x AT TIME ZONE 'tz'
28167 let at_tz = Expression::AtTimeZone(Box::new(
28168 crate::expressions::AtTimeZone {
28169 this: expr_arg,
28170 zone: Expression::string(&tz_str),
28171 },
28172 ));
28173 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
28174 let trunc = Expression::Function(Box::new(Function::new(
28175 "DATE_TRUNC".to_string(),
28176 vec![Expression::string(&unit_str), at_tz],
28177 )));
28178 // DATE_TRUNC(...) AT TIME ZONE 'tz'
28179 Ok(Expression::AtTimeZone(Box::new(
28180 crate::expressions::AtTimeZone {
28181 this: trunc,
28182 zone: Expression::string(&tz_str),
28183 },
28184 )))
28185 } else {
28186 Ok(Expression::Function(Box::new(Function::new(
28187 "DATE_TRUNC".to_string(),
28188 vec![Expression::string(&unit_str), expr_arg],
28189 ))))
28190 }
28191 }
28192 DialectType::Presto
28193 | DialectType::Trino
28194 | DialectType::Athena
28195 | DialectType::Snowflake => {
28196 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
28197 Ok(Expression::Function(Box::new(Function::new(
28198 "DATE_TRUNC".to_string(),
28199 vec![Expression::string(&unit_str), expr_arg],
28200 ))))
28201 }
28202 _ => {
28203 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
28204 let mut args = vec![Expression::string(&unit_str), expr_arg];
28205 if let Some(tz) = tz_arg {
28206 args.push(tz);
28207 }
28208 Ok(Expression::Function(Box::new(Function::new(
28209 "DATE_TRUNC".to_string(),
28210 args,
28211 ))))
28212 }
28213 }
28214 } else {
28215 Ok(Expression::Function(f))
28216 }
28217 } else {
28218 Ok(e)
28219 }
28220 }
28221
28222 Action::StrToDateConvert => {
28223 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
28224 if let Expression::Function(f) = e {
28225 if f.args.len() == 2 {
28226 let mut args = f.args;
28227 let this = args.remove(0);
28228 let fmt_expr = args.remove(0);
28229 let fmt_str = match &fmt_expr {
28230 Expression::Literal(lit)
28231 if matches!(lit.as_ref(), Literal::String(_)) =>
28232 {
28233 let Literal::String(s) = lit.as_ref() else {
28234 unreachable!()
28235 };
28236 Some(s.clone())
28237 }
28238 _ => None,
28239 };
28240 let default_date = "%Y-%m-%d";
28241 let default_time = "%Y-%m-%d %H:%M:%S";
28242 let is_default = fmt_str
28243 .as_ref()
28244 .map_or(false, |f| f == default_date || f == default_time);
28245
28246 if is_default {
28247 // Default format: handle per-dialect
28248 match target {
28249 DialectType::MySQL
28250 | DialectType::Doris
28251 | DialectType::StarRocks => {
28252 // Keep STR_TO_DATE(x, fmt) as-is
28253 Ok(Expression::Function(Box::new(Function::new(
28254 "STR_TO_DATE".to_string(),
28255 vec![this, fmt_expr],
28256 ))))
28257 }
28258 DialectType::Hive => {
28259 // Hive: CAST(x AS DATE)
28260 Ok(Expression::Cast(Box::new(Cast {
28261 this,
28262 to: DataType::Date,
28263 double_colon_syntax: false,
28264 trailing_comments: Vec::new(),
28265 format: None,
28266 default: None,
28267 inferred_type: None,
28268 })))
28269 }
28270 DialectType::Presto
28271 | DialectType::Trino
28272 | DialectType::Athena => {
28273 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
28274 let date_parse =
28275 Expression::Function(Box::new(Function::new(
28276 "DATE_PARSE".to_string(),
28277 vec![this, fmt_expr],
28278 )));
28279 Ok(Expression::Cast(Box::new(Cast {
28280 this: date_parse,
28281 to: DataType::Date,
28282 double_colon_syntax: false,
28283 trailing_comments: Vec::new(),
28284 format: None,
28285 default: None,
28286 inferred_type: None,
28287 })))
28288 }
28289 _ => {
28290 // Others: TsOrDsToDate (delegates to generator)
28291 Ok(Expression::TsOrDsToDate(Box::new(
28292 crate::expressions::TsOrDsToDate {
28293 this: Box::new(this),
28294 format: None,
28295 safe: None,
28296 },
28297 )))
28298 }
28299 }
28300 } else if let Some(fmt) = fmt_str {
28301 match target {
28302 DialectType::Doris
28303 | DialectType::StarRocks
28304 | DialectType::MySQL => {
28305 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
28306 let mut normalized = fmt.clone();
28307 normalized = normalized.replace("%-d", "%e");
28308 normalized = normalized.replace("%-m", "%c");
28309 normalized = normalized.replace("%H:%M:%S", "%T");
28310 Ok(Expression::Function(Box::new(Function::new(
28311 "STR_TO_DATE".to_string(),
28312 vec![this, Expression::string(&normalized)],
28313 ))))
28314 }
28315 DialectType::Hive => {
28316 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
28317 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
28318 let unix_ts =
28319 Expression::Function(Box::new(Function::new(
28320 "UNIX_TIMESTAMP".to_string(),
28321 vec![this, Expression::string(&java_fmt)],
28322 )));
28323 let from_unix =
28324 Expression::Function(Box::new(Function::new(
28325 "FROM_UNIXTIME".to_string(),
28326 vec![unix_ts],
28327 )));
28328 Ok(Expression::Cast(Box::new(Cast {
28329 this: from_unix,
28330 to: DataType::Date,
28331 double_colon_syntax: false,
28332 trailing_comments: Vec::new(),
28333 format: None,
28334 default: None,
28335 inferred_type: None,
28336 })))
28337 }
28338 DialectType::Spark | DialectType::Databricks => {
28339 // Spark: TO_DATE(x, java_fmt)
28340 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
28341 Ok(Expression::Function(Box::new(Function::new(
28342 "TO_DATE".to_string(),
28343 vec![this, Expression::string(&java_fmt)],
28344 ))))
28345 }
28346 DialectType::Drill => {
28347 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
28348 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
28349 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
28350 let java_fmt = java_fmt.replace('T', "'T'");
28351 Ok(Expression::Function(Box::new(Function::new(
28352 "TO_DATE".to_string(),
28353 vec![this, Expression::string(&java_fmt)],
28354 ))))
28355 }
28356 _ => {
28357 // For other dialects: use TsOrDsToDate which delegates to generator
28358 Ok(Expression::TsOrDsToDate(Box::new(
28359 crate::expressions::TsOrDsToDate {
28360 this: Box::new(this),
28361 format: Some(fmt),
28362 safe: None,
28363 },
28364 )))
28365 }
28366 }
28367 } else {
28368 // Non-string format - keep as-is
28369 let mut new_args = Vec::new();
28370 new_args.push(this);
28371 new_args.push(fmt_expr);
28372 Ok(Expression::Function(Box::new(Function::new(
28373 "STR_TO_DATE".to_string(),
28374 new_args,
28375 ))))
28376 }
28377 } else {
28378 Ok(Expression::Function(f))
28379 }
28380 } else {
28381 Ok(e)
28382 }
28383 }
28384
28385 Action::TsOrDsAddConvert => {
28386 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
28387 if let Expression::Function(f) = e {
28388 if f.args.len() == 3 {
28389 let mut args = f.args;
28390 let x = args.remove(0);
28391 let n = args.remove(0);
28392 let unit_expr = args.remove(0);
28393 let unit_str = match &unit_expr {
28394 Expression::Literal(lit)
28395 if matches!(lit.as_ref(), Literal::String(_)) =>
28396 {
28397 let Literal::String(s) = lit.as_ref() else {
28398 unreachable!()
28399 };
28400 s.to_ascii_uppercase()
28401 }
28402 _ => "DAY".to_string(),
28403 };
28404
28405 match target {
28406 DialectType::Hive
28407 | DialectType::Spark
28408 | DialectType::Databricks => {
28409 // DATE_ADD(x, n) - only supports DAY unit
28410 Ok(Expression::Function(Box::new(Function::new(
28411 "DATE_ADD".to_string(),
28412 vec![x, n],
28413 ))))
28414 }
28415 DialectType::MySQL => {
28416 // DATE_ADD(x, INTERVAL n UNIT)
28417 let iu = match unit_str.as_str() {
28418 "YEAR" => crate::expressions::IntervalUnit::Year,
28419 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
28420 "MONTH" => crate::expressions::IntervalUnit::Month,
28421 "WEEK" => crate::expressions::IntervalUnit::Week,
28422 "HOUR" => crate::expressions::IntervalUnit::Hour,
28423 "MINUTE" => crate::expressions::IntervalUnit::Minute,
28424 "SECOND" => crate::expressions::IntervalUnit::Second,
28425 _ => crate::expressions::IntervalUnit::Day,
28426 };
28427 let interval = Expression::Interval(Box::new(
28428 crate::expressions::Interval {
28429 this: Some(n),
28430 unit: Some(
28431 crate::expressions::IntervalUnitSpec::Simple {
28432 unit: iu,
28433 use_plural: false,
28434 },
28435 ),
28436 },
28437 ));
28438 Ok(Expression::Function(Box::new(Function::new(
28439 "DATE_ADD".to_string(),
28440 vec![x, interval],
28441 ))))
28442 }
28443 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28444 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
28445 let cast_ts = Expression::Cast(Box::new(Cast {
28446 this: x,
28447 to: DataType::Timestamp {
28448 precision: None,
28449 timezone: false,
28450 },
28451 double_colon_syntax: false,
28452 trailing_comments: Vec::new(),
28453 format: None,
28454 default: None,
28455 inferred_type: None,
28456 }));
28457 let cast_date = Expression::Cast(Box::new(Cast {
28458 this: cast_ts,
28459 to: DataType::Date,
28460 double_colon_syntax: false,
28461 trailing_comments: Vec::new(),
28462 format: None,
28463 default: None,
28464 inferred_type: None,
28465 }));
28466 Ok(Expression::Function(Box::new(Function::new(
28467 "DATE_ADD".to_string(),
28468 vec![Expression::string(&unit_str), n, cast_date],
28469 ))))
28470 }
28471 DialectType::DuckDB => {
28472 // CAST(x AS DATE) + INTERVAL n UNIT
28473 let cast_date = Expression::Cast(Box::new(Cast {
28474 this: x,
28475 to: DataType::Date,
28476 double_colon_syntax: false,
28477 trailing_comments: Vec::new(),
28478 format: None,
28479 default: None,
28480 inferred_type: None,
28481 }));
28482 let iu = match unit_str.as_str() {
28483 "YEAR" => crate::expressions::IntervalUnit::Year,
28484 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
28485 "MONTH" => crate::expressions::IntervalUnit::Month,
28486 "WEEK" => crate::expressions::IntervalUnit::Week,
28487 "HOUR" => crate::expressions::IntervalUnit::Hour,
28488 "MINUTE" => crate::expressions::IntervalUnit::Minute,
28489 "SECOND" => crate::expressions::IntervalUnit::Second,
28490 _ => crate::expressions::IntervalUnit::Day,
28491 };
28492 let interval = Expression::Interval(Box::new(
28493 crate::expressions::Interval {
28494 this: Some(n),
28495 unit: Some(
28496 crate::expressions::IntervalUnitSpec::Simple {
28497 unit: iu,
28498 use_plural: false,
28499 },
28500 ),
28501 },
28502 ));
28503 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
28504 left: cast_date,
28505 right: interval,
28506 left_comments: Vec::new(),
28507 operator_comments: Vec::new(),
28508 trailing_comments: Vec::new(),
28509 inferred_type: None,
28510 })))
28511 }
28512 DialectType::Drill => {
28513 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
28514 let cast_date = Expression::Cast(Box::new(Cast {
28515 this: x,
28516 to: DataType::Date,
28517 double_colon_syntax: false,
28518 trailing_comments: Vec::new(),
28519 format: None,
28520 default: None,
28521 inferred_type: None,
28522 }));
28523 let iu = match unit_str.as_str() {
28524 "YEAR" => crate::expressions::IntervalUnit::Year,
28525 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
28526 "MONTH" => crate::expressions::IntervalUnit::Month,
28527 "WEEK" => crate::expressions::IntervalUnit::Week,
28528 "HOUR" => crate::expressions::IntervalUnit::Hour,
28529 "MINUTE" => crate::expressions::IntervalUnit::Minute,
28530 "SECOND" => crate::expressions::IntervalUnit::Second,
28531 _ => crate::expressions::IntervalUnit::Day,
28532 };
28533 let interval = Expression::Interval(Box::new(
28534 crate::expressions::Interval {
28535 this: Some(n),
28536 unit: Some(
28537 crate::expressions::IntervalUnitSpec::Simple {
28538 unit: iu,
28539 use_plural: false,
28540 },
28541 ),
28542 },
28543 ));
28544 Ok(Expression::Function(Box::new(Function::new(
28545 "DATE_ADD".to_string(),
28546 vec![cast_date, interval],
28547 ))))
28548 }
28549 _ => {
28550 // Default: keep as TS_OR_DS_ADD
28551 Ok(Expression::Function(Box::new(Function::new(
28552 "TS_OR_DS_ADD".to_string(),
28553 vec![x, n, unit_expr],
28554 ))))
28555 }
28556 }
28557 } else {
28558 Ok(Expression::Function(f))
28559 }
28560 } else {
28561 Ok(e)
28562 }
28563 }
28564
28565 Action::DateFromUnixDateConvert => {
28566 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
28567 if let Expression::Function(f) = e {
28568 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
28569 if matches!(
28570 target,
28571 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
28572 ) {
28573 return Ok(Expression::Function(Box::new(Function::new(
28574 "DATE_FROM_UNIX_DATE".to_string(),
28575 f.args,
28576 ))));
28577 }
28578 let n = f.args.into_iter().next().unwrap();
28579 let epoch_date = Expression::Cast(Box::new(Cast {
28580 this: Expression::string("1970-01-01"),
28581 to: DataType::Date,
28582 double_colon_syntax: false,
28583 trailing_comments: Vec::new(),
28584 format: None,
28585 default: None,
28586 inferred_type: None,
28587 }));
28588 match target {
28589 DialectType::DuckDB => {
28590 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
28591 let interval =
28592 Expression::Interval(Box::new(crate::expressions::Interval {
28593 this: Some(n),
28594 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28595 unit: crate::expressions::IntervalUnit::Day,
28596 use_plural: false,
28597 }),
28598 }));
28599 Ok(Expression::Add(Box::new(
28600 crate::expressions::BinaryOp::new(epoch_date, interval),
28601 )))
28602 }
28603 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28604 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
28605 Ok(Expression::Function(Box::new(Function::new(
28606 "DATE_ADD".to_string(),
28607 vec![Expression::string("DAY"), n, epoch_date],
28608 ))))
28609 }
28610 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
28611 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
28612 Ok(Expression::Function(Box::new(Function::new(
28613 "DATEADD".to_string(),
28614 vec![
28615 Expression::Identifier(Identifier::new("DAY")),
28616 n,
28617 epoch_date,
28618 ],
28619 ))))
28620 }
28621 DialectType::BigQuery => {
28622 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
28623 let interval =
28624 Expression::Interval(Box::new(crate::expressions::Interval {
28625 this: Some(n),
28626 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28627 unit: crate::expressions::IntervalUnit::Day,
28628 use_plural: false,
28629 }),
28630 }));
28631 Ok(Expression::Function(Box::new(Function::new(
28632 "DATE_ADD".to_string(),
28633 vec![epoch_date, interval],
28634 ))))
28635 }
28636 DialectType::MySQL
28637 | DialectType::Doris
28638 | DialectType::StarRocks
28639 | DialectType::Drill => {
28640 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
28641 let interval =
28642 Expression::Interval(Box::new(crate::expressions::Interval {
28643 this: Some(n),
28644 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28645 unit: crate::expressions::IntervalUnit::Day,
28646 use_plural: false,
28647 }),
28648 }));
28649 Ok(Expression::Function(Box::new(Function::new(
28650 "DATE_ADD".to_string(),
28651 vec![epoch_date, interval],
28652 ))))
28653 }
28654 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
28655 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
28656 Ok(Expression::Function(Box::new(Function::new(
28657 "DATE_ADD".to_string(),
28658 vec![epoch_date, n],
28659 ))))
28660 }
28661 DialectType::PostgreSQL
28662 | DialectType::Materialize
28663 | DialectType::RisingWave => {
28664 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
28665 let n_str = match &n {
28666 Expression::Literal(lit)
28667 if matches!(lit.as_ref(), Literal::Number(_)) =>
28668 {
28669 let Literal::Number(s) = lit.as_ref() else {
28670 unreachable!()
28671 };
28672 s.clone()
28673 }
28674 _ => Self::expr_to_string_static(&n),
28675 };
28676 let interval =
28677 Expression::Interval(Box::new(crate::expressions::Interval {
28678 this: Some(Expression::string(&format!("{} DAY", n_str))),
28679 unit: None,
28680 }));
28681 Ok(Expression::Add(Box::new(
28682 crate::expressions::BinaryOp::new(epoch_date, interval),
28683 )))
28684 }
28685 _ => {
28686 // Default: keep as-is
28687 Ok(Expression::Function(Box::new(Function::new(
28688 "DATE_FROM_UNIX_DATE".to_string(),
28689 vec![n],
28690 ))))
28691 }
28692 }
28693 } else {
28694 Ok(e)
28695 }
28696 }
28697
28698 Action::ArrayRemoveConvert => {
28699 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
28700 if let Expression::ArrayRemove(bf) = e {
28701 let arr = bf.this;
28702 let target_val = bf.expression;
28703 match target {
28704 DialectType::DuckDB => {
28705 let u_id = crate::expressions::Identifier::new("_u");
28706 let lambda =
28707 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
28708 parameters: vec![u_id.clone()],
28709 body: Expression::Neq(Box::new(BinaryOp {
28710 left: Expression::Identifier(u_id),
28711 right: target_val,
28712 left_comments: Vec::new(),
28713 operator_comments: Vec::new(),
28714 trailing_comments: Vec::new(),
28715 inferred_type: None,
28716 })),
28717 colon: false,
28718 parameter_types: Vec::new(),
28719 }));
28720 Ok(Expression::Function(Box::new(Function::new(
28721 "LIST_FILTER".to_string(),
28722 vec![arr, lambda],
28723 ))))
28724 }
28725 DialectType::ClickHouse => {
28726 let u_id = crate::expressions::Identifier::new("_u");
28727 let lambda =
28728 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
28729 parameters: vec![u_id.clone()],
28730 body: Expression::Neq(Box::new(BinaryOp {
28731 left: Expression::Identifier(u_id),
28732 right: target_val,
28733 left_comments: Vec::new(),
28734 operator_comments: Vec::new(),
28735 trailing_comments: Vec::new(),
28736 inferred_type: None,
28737 })),
28738 colon: false,
28739 parameter_types: Vec::new(),
28740 }));
28741 Ok(Expression::Function(Box::new(Function::new(
28742 "arrayFilter".to_string(),
28743 vec![lambda, arr],
28744 ))))
28745 }
28746 DialectType::BigQuery => {
28747 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
28748 let u_id = crate::expressions::Identifier::new("_u");
28749 let u_col =
28750 Expression::Column(Box::new(crate::expressions::Column {
28751 name: u_id.clone(),
28752 table: None,
28753 join_mark: false,
28754 trailing_comments: Vec::new(),
28755 span: None,
28756 inferred_type: None,
28757 }));
28758 let unnest_expr =
28759 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
28760 this: arr,
28761 expressions: Vec::new(),
28762 with_ordinality: false,
28763 alias: None,
28764 offset_alias: None,
28765 }));
28766 let aliased_unnest =
28767 Expression::Alias(Box::new(crate::expressions::Alias {
28768 this: unnest_expr,
28769 alias: u_id.clone(),
28770 column_aliases: Vec::new(),
28771 pre_alias_comments: Vec::new(),
28772 trailing_comments: Vec::new(),
28773 inferred_type: None,
28774 }));
28775 let where_cond = Expression::Neq(Box::new(BinaryOp {
28776 left: u_col.clone(),
28777 right: target_val,
28778 left_comments: Vec::new(),
28779 operator_comments: Vec::new(),
28780 trailing_comments: Vec::new(),
28781 inferred_type: None,
28782 }));
28783 let subquery = Expression::Select(Box::new(
28784 crate::expressions::Select::new()
28785 .column(u_col)
28786 .from(aliased_unnest)
28787 .where_(where_cond),
28788 ));
28789 Ok(Expression::ArrayFunc(Box::new(
28790 crate::expressions::ArrayConstructor {
28791 expressions: vec![subquery],
28792 bracket_notation: false,
28793 use_list_keyword: false,
28794 },
28795 )))
28796 }
28797 _ => Ok(Expression::ArrayRemove(Box::new(
28798 crate::expressions::BinaryFunc {
28799 original_name: None,
28800 this: arr,
28801 expression: target_val,
28802 inferred_type: None,
28803 },
28804 ))),
28805 }
28806 } else {
28807 Ok(e)
28808 }
28809 }
28810
28811 Action::ArrayReverseConvert => {
28812 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
28813 if let Expression::ArrayReverse(af) = e {
28814 Ok(Expression::Function(Box::new(Function::new(
28815 "arrayReverse".to_string(),
28816 vec![af.this],
28817 ))))
28818 } else {
28819 Ok(e)
28820 }
28821 }
28822
28823 Action::JsonKeysConvert => {
28824 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
28825 if let Expression::JsonKeys(uf) = e {
28826 match target {
28827 DialectType::Spark | DialectType::Databricks => {
28828 Ok(Expression::Function(Box::new(Function::new(
28829 "JSON_OBJECT_KEYS".to_string(),
28830 vec![uf.this],
28831 ))))
28832 }
28833 DialectType::Snowflake => Ok(Expression::Function(Box::new(
28834 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
28835 ))),
28836 _ => Ok(Expression::JsonKeys(uf)),
28837 }
28838 } else {
28839 Ok(e)
28840 }
28841 }
28842
28843 Action::ParseJsonStrip => {
28844 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
28845 if let Expression::ParseJson(uf) = e {
28846 Ok(uf.this)
28847 } else {
28848 Ok(e)
28849 }
28850 }
28851
28852 Action::ArraySizeDrill => {
28853 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
28854 if let Expression::ArraySize(uf) = e {
28855 Ok(Expression::Function(Box::new(Function::new(
28856 "REPEATED_COUNT".to_string(),
28857 vec![uf.this],
28858 ))))
28859 } else {
28860 Ok(e)
28861 }
28862 }
28863
28864 Action::WeekOfYearToWeekIso => {
28865 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
28866 if let Expression::WeekOfYear(uf) = e {
28867 Ok(Expression::Function(Box::new(Function::new(
28868 "WEEKISO".to_string(),
28869 vec![uf.this],
28870 ))))
28871 } else {
28872 Ok(e)
28873 }
28874 }
28875 }
28876 })
28877 }
28878
28879 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
28880 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
28881 use crate::expressions::Function;
28882 match unit {
28883 "DAY" => {
28884 // DATE(x)
28885 Ok(Expression::Function(Box::new(Function::new(
28886 "DATE".to_string(),
28887 vec![expr.clone()],
28888 ))))
28889 }
28890 "WEEK" => {
28891 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
28892 let year_x = Expression::Function(Box::new(Function::new(
28893 "YEAR".to_string(),
28894 vec![expr.clone()],
28895 )));
28896 let week_x = Expression::Function(Box::new(Function::new(
28897 "WEEK".to_string(),
28898 vec![expr.clone(), Expression::number(1)],
28899 )));
28900 let concat_args = vec![
28901 year_x,
28902 Expression::string(" "),
28903 week_x,
28904 Expression::string(" 1"),
28905 ];
28906 let concat = Expression::Function(Box::new(Function::new(
28907 "CONCAT".to_string(),
28908 concat_args,
28909 )));
28910 Ok(Expression::Function(Box::new(Function::new(
28911 "STR_TO_DATE".to_string(),
28912 vec![concat, Expression::string("%Y %u %w")],
28913 ))))
28914 }
28915 "MONTH" => {
28916 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
28917 let year_x = Expression::Function(Box::new(Function::new(
28918 "YEAR".to_string(),
28919 vec![expr.clone()],
28920 )));
28921 let month_x = Expression::Function(Box::new(Function::new(
28922 "MONTH".to_string(),
28923 vec![expr.clone()],
28924 )));
28925 let concat_args = vec![
28926 year_x,
28927 Expression::string(" "),
28928 month_x,
28929 Expression::string(" 1"),
28930 ];
28931 let concat = Expression::Function(Box::new(Function::new(
28932 "CONCAT".to_string(),
28933 concat_args,
28934 )));
28935 Ok(Expression::Function(Box::new(Function::new(
28936 "STR_TO_DATE".to_string(),
28937 vec![concat, Expression::string("%Y %c %e")],
28938 ))))
28939 }
28940 "QUARTER" => {
28941 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
28942 let year_x = Expression::Function(Box::new(Function::new(
28943 "YEAR".to_string(),
28944 vec![expr.clone()],
28945 )));
28946 let quarter_x = Expression::Function(Box::new(Function::new(
28947 "QUARTER".to_string(),
28948 vec![expr.clone()],
28949 )));
28950 // QUARTER(x) * 3 - 2
28951 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
28952 left: quarter_x,
28953 right: Expression::number(3),
28954 left_comments: Vec::new(),
28955 operator_comments: Vec::new(),
28956 trailing_comments: Vec::new(),
28957 inferred_type: None,
28958 }));
28959 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
28960 left: mul,
28961 right: Expression::number(2),
28962 left_comments: Vec::new(),
28963 operator_comments: Vec::new(),
28964 trailing_comments: Vec::new(),
28965 inferred_type: None,
28966 }));
28967 let concat_args = vec![
28968 year_x,
28969 Expression::string(" "),
28970 sub,
28971 Expression::string(" 1"),
28972 ];
28973 let concat = Expression::Function(Box::new(Function::new(
28974 "CONCAT".to_string(),
28975 concat_args,
28976 )));
28977 Ok(Expression::Function(Box::new(Function::new(
28978 "STR_TO_DATE".to_string(),
28979 vec![concat, Expression::string("%Y %c %e")],
28980 ))))
28981 }
28982 "YEAR" => {
28983 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
28984 let year_x = Expression::Function(Box::new(Function::new(
28985 "YEAR".to_string(),
28986 vec![expr.clone()],
28987 )));
28988 let concat_args = vec![year_x, Expression::string(" 1 1")];
28989 let concat = Expression::Function(Box::new(Function::new(
28990 "CONCAT".to_string(),
28991 concat_args,
28992 )));
28993 Ok(Expression::Function(Box::new(Function::new(
28994 "STR_TO_DATE".to_string(),
28995 vec![concat, Expression::string("%Y %c %e")],
28996 ))))
28997 }
28998 _ => {
28999 // Unsupported unit -> keep as DATE_TRUNC
29000 Ok(Expression::Function(Box::new(Function::new(
29001 "DATE_TRUNC".to_string(),
29002 vec![Expression::string(unit), expr.clone()],
29003 ))))
29004 }
29005 }
29006 }
29007
29008 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
29009 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
29010 use crate::expressions::DataType;
29011 match dt {
29012 DataType::VarChar { .. } | DataType::Char { .. } => true,
29013 DataType::Struct { fields, .. } => fields
29014 .iter()
29015 .any(|f| Self::has_varchar_char_type(&f.data_type)),
29016 _ => false,
29017 }
29018 }
29019
29020 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
29021 fn normalize_varchar_to_string(
29022 dt: crate::expressions::DataType,
29023 ) -> crate::expressions::DataType {
29024 use crate::expressions::DataType;
29025 match dt {
29026 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
29027 name: "STRING".to_string(),
29028 },
29029 DataType::Struct { fields, nested } => {
29030 let fields = fields
29031 .into_iter()
29032 .map(|mut f| {
29033 f.data_type = Self::normalize_varchar_to_string(f.data_type);
29034 f
29035 })
29036 .collect();
29037 DataType::Struct { fields, nested }
29038 }
29039 other => other,
29040 }
29041 }
29042
29043 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
29044 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
29045 if let Expression::Literal(ref lit) = expr {
29046 if let crate::expressions::Literal::String(ref s) = lit.as_ref() {
29047 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
29048 let trimmed = s.trim();
29049
29050 // Find where digits end and unit text begins
29051 let digit_end = trimmed
29052 .find(|c: char| !c.is_ascii_digit())
29053 .unwrap_or(trimmed.len());
29054 if digit_end == 0 || digit_end == trimmed.len() {
29055 return expr;
29056 }
29057 let num = &trimmed[..digit_end];
29058 let unit_text = trimmed[digit_end..].trim().to_ascii_uppercase();
29059 if unit_text.is_empty() {
29060 return expr;
29061 }
29062
29063 let known_units = [
29064 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS",
29065 "WEEK", "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
29066 ];
29067 if !known_units.contains(&unit_text.as_str()) {
29068 return expr;
29069 }
29070
29071 let unit_str = unit_text.clone();
29072 // Singularize
29073 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
29074 &unit_str[..unit_str.len() - 1]
29075 } else {
29076 &unit_str
29077 };
29078 let unit = unit_singular;
29079
29080 match target {
29081 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29082 // INTERVAL '2' DAY
29083 let iu = match unit {
29084 "DAY" => crate::expressions::IntervalUnit::Day,
29085 "HOUR" => crate::expressions::IntervalUnit::Hour,
29086 "MINUTE" => crate::expressions::IntervalUnit::Minute,
29087 "SECOND" => crate::expressions::IntervalUnit::Second,
29088 "WEEK" => crate::expressions::IntervalUnit::Week,
29089 "MONTH" => crate::expressions::IntervalUnit::Month,
29090 "YEAR" => crate::expressions::IntervalUnit::Year,
29091 _ => return expr,
29092 };
29093 return Expression::Interval(Box::new(crate::expressions::Interval {
29094 this: Some(Expression::string(num)),
29095 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29096 unit: iu,
29097 use_plural: false,
29098 }),
29099 }));
29100 }
29101 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
29102 // INTERVAL '2 DAYS'
29103 let plural = if num != "1" && !unit_str.ends_with('S') {
29104 format!("{} {}S", num, unit)
29105 } else if unit_str.ends_with('S') {
29106 format!("{} {}", num, unit_str)
29107 } else {
29108 format!("{} {}", num, unit)
29109 };
29110 return Expression::Interval(Box::new(crate::expressions::Interval {
29111 this: Some(Expression::string(&plural)),
29112 unit: None,
29113 }));
29114 }
29115 _ => {
29116 // Spark/Databricks/Hive: INTERVAL '1' DAY
29117 let iu = match unit {
29118 "DAY" => crate::expressions::IntervalUnit::Day,
29119 "HOUR" => crate::expressions::IntervalUnit::Hour,
29120 "MINUTE" => crate::expressions::IntervalUnit::Minute,
29121 "SECOND" => crate::expressions::IntervalUnit::Second,
29122 "WEEK" => crate::expressions::IntervalUnit::Week,
29123 "MONTH" => crate::expressions::IntervalUnit::Month,
29124 "YEAR" => crate::expressions::IntervalUnit::Year,
29125 _ => return expr,
29126 };
29127 return Expression::Interval(Box::new(crate::expressions::Interval {
29128 this: Some(Expression::string(num)),
29129 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29130 unit: iu,
29131 use_plural: false,
29132 }),
29133 }));
29134 }
29135 }
29136 }
29137 }
29138 // If it's already an INTERVAL expression, pass through
29139 expr
29140 }
29141
29142 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
29143 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
29144 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
29145 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
29146 fn rewrite_unnest_expansion(
29147 select: &crate::expressions::Select,
29148 target: DialectType,
29149 ) -> Option<crate::expressions::Select> {
29150 use crate::expressions::{
29151 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
29152 UnnestFunc,
29153 };
29154
29155 let index_offset: i64 = match target {
29156 DialectType::Presto | DialectType::Trino => 1,
29157 _ => 0, // BigQuery, Snowflake
29158 };
29159
29160 let if_func_name = match target {
29161 DialectType::Snowflake => "IFF",
29162 _ => "IF",
29163 };
29164
29165 let array_length_func = match target {
29166 DialectType::BigQuery => "ARRAY_LENGTH",
29167 DialectType::Presto | DialectType::Trino => "CARDINALITY",
29168 DialectType::Snowflake => "ARRAY_SIZE",
29169 _ => "ARRAY_LENGTH",
29170 };
29171
29172 let use_table_aliases = matches!(
29173 target,
29174 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
29175 );
29176 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
29177
29178 fn make_col(name: &str, table: Option<&str>) -> Expression {
29179 if let Some(tbl) = table {
29180 Expression::boxed_column(Column {
29181 name: Identifier::new(name.to_string()),
29182 table: Some(Identifier::new(tbl.to_string())),
29183 join_mark: false,
29184 trailing_comments: Vec::new(),
29185 span: None,
29186 inferred_type: None,
29187 })
29188 } else {
29189 Expression::Identifier(Identifier::new(name.to_string()))
29190 }
29191 }
29192
29193 fn make_join(this: Expression) -> Join {
29194 Join {
29195 this,
29196 on: None,
29197 using: Vec::new(),
29198 kind: JoinKind::Cross,
29199 use_inner_keyword: false,
29200 use_outer_keyword: false,
29201 deferred_condition: false,
29202 join_hint: None,
29203 match_condition: None,
29204 pivots: Vec::new(),
29205 comments: Vec::new(),
29206 nesting_group: 0,
29207 directed: false,
29208 }
29209 }
29210
29211 // Collect UNNEST info from SELECT expressions
29212 struct UnnestInfo {
29213 arr_expr: Expression,
29214 col_alias: String,
29215 pos_alias: String,
29216 source_alias: String,
29217 original_expr: Expression,
29218 has_outer_alias: Option<String>,
29219 }
29220
29221 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
29222 let mut col_counter = 0usize;
29223 let mut pos_counter = 1usize;
29224 let mut source_counter = 1usize;
29225
29226 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
29227 match expr {
29228 Expression::Unnest(u) => Some(u.this.clone()),
29229 Expression::Function(f)
29230 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
29231 {
29232 Some(f.args[0].clone())
29233 }
29234 Expression::Alias(a) => extract_unnest_arg(&a.this),
29235 Expression::Add(op)
29236 | Expression::Sub(op)
29237 | Expression::Mul(op)
29238 | Expression::Div(op) => {
29239 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
29240 }
29241 _ => None,
29242 }
29243 }
29244
29245 fn get_alias_name(expr: &Expression) -> Option<String> {
29246 if let Expression::Alias(a) = expr {
29247 Some(a.alias.name.clone())
29248 } else {
29249 None
29250 }
29251 }
29252
29253 for sel_expr in &select.expressions {
29254 if let Some(arr) = extract_unnest_arg(sel_expr) {
29255 col_counter += 1;
29256 pos_counter += 1;
29257 source_counter += 1;
29258
29259 let col_alias = if col_counter == 1 {
29260 "col".to_string()
29261 } else {
29262 format!("col_{}", col_counter)
29263 };
29264 let pos_alias = format!("pos_{}", pos_counter);
29265 let source_alias = format!("_u_{}", source_counter);
29266 let has_outer_alias = get_alias_name(sel_expr);
29267
29268 unnest_infos.push(UnnestInfo {
29269 arr_expr: arr,
29270 col_alias,
29271 pos_alias,
29272 source_alias,
29273 original_expr: sel_expr.clone(),
29274 has_outer_alias,
29275 });
29276 }
29277 }
29278
29279 if unnest_infos.is_empty() {
29280 return None;
29281 }
29282
29283 let series_alias = "pos".to_string();
29284 let series_source_alias = "_u".to_string();
29285 let tbl_ref = if use_table_aliases {
29286 Some(series_source_alias.as_str())
29287 } else {
29288 None
29289 };
29290
29291 // Build new SELECT expressions
29292 let mut new_select_exprs = Vec::new();
29293 for info in &unnest_infos {
29294 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
29295 let src_ref = if use_table_aliases {
29296 Some(info.source_alias.as_str())
29297 } else {
29298 None
29299 };
29300
29301 let pos_col = make_col(&series_alias, tbl_ref);
29302 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
29303 let col_ref = make_col(actual_col_name, src_ref);
29304
29305 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
29306 pos_col.clone(),
29307 unnest_pos_col.clone(),
29308 )));
29309 let mut if_args = vec![eq_cond, col_ref];
29310 if null_third_arg {
29311 if_args.push(Expression::Null(crate::expressions::Null));
29312 }
29313
29314 let if_expr =
29315 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
29316 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
29317
29318 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
29319 final_expr,
29320 Identifier::new(actual_col_name.clone()),
29321 ))));
29322 }
29323
29324 // Build array size expressions for GREATEST
29325 let size_exprs: Vec<Expression> = unnest_infos
29326 .iter()
29327 .map(|info| {
29328 Expression::Function(Box::new(Function::new(
29329 array_length_func.to_string(),
29330 vec![info.arr_expr.clone()],
29331 )))
29332 })
29333 .collect();
29334
29335 let greatest =
29336 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
29337
29338 let series_end = if index_offset == 0 {
29339 Expression::Sub(Box::new(BinaryOp::new(
29340 greatest,
29341 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
29342 )))
29343 } else {
29344 greatest
29345 };
29346
29347 // Build the position array source
29348 let series_unnest_expr = match target {
29349 DialectType::BigQuery => {
29350 let gen_array = Expression::Function(Box::new(Function::new(
29351 "GENERATE_ARRAY".to_string(),
29352 vec![
29353 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
29354 series_end,
29355 ],
29356 )));
29357 Expression::Unnest(Box::new(UnnestFunc {
29358 this: gen_array,
29359 expressions: Vec::new(),
29360 with_ordinality: false,
29361 alias: None,
29362 offset_alias: None,
29363 }))
29364 }
29365 DialectType::Presto | DialectType::Trino => {
29366 let sequence = Expression::Function(Box::new(Function::new(
29367 "SEQUENCE".to_string(),
29368 vec![
29369 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
29370 series_end,
29371 ],
29372 )));
29373 Expression::Unnest(Box::new(UnnestFunc {
29374 this: sequence,
29375 expressions: Vec::new(),
29376 with_ordinality: false,
29377 alias: None,
29378 offset_alias: None,
29379 }))
29380 }
29381 DialectType::Snowflake => {
29382 let range_end = Expression::Add(Box::new(BinaryOp::new(
29383 Expression::Paren(Box::new(crate::expressions::Paren {
29384 this: series_end,
29385 trailing_comments: Vec::new(),
29386 })),
29387 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
29388 )));
29389 let gen_range = Expression::Function(Box::new(Function::new(
29390 "ARRAY_GENERATE_RANGE".to_string(),
29391 vec![
29392 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
29393 range_end,
29394 ],
29395 )));
29396 let flatten_arg =
29397 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
29398 name: Identifier::new("INPUT".to_string()),
29399 value: gen_range,
29400 separator: crate::expressions::NamedArgSeparator::DArrow,
29401 }));
29402 let flatten = Expression::Function(Box::new(Function::new(
29403 "FLATTEN".to_string(),
29404 vec![flatten_arg],
29405 )));
29406 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
29407 }
29408 _ => return None,
29409 };
29410
29411 // Build series alias expression
29412 let series_alias_expr = if use_table_aliases {
29413 let col_aliases = if matches!(target, DialectType::Snowflake) {
29414 vec![
29415 Identifier::new("seq".to_string()),
29416 Identifier::new("key".to_string()),
29417 Identifier::new("path".to_string()),
29418 Identifier::new("index".to_string()),
29419 Identifier::new(series_alias.clone()),
29420 Identifier::new("this".to_string()),
29421 ]
29422 } else {
29423 vec![Identifier::new(series_alias.clone())]
29424 };
29425 Expression::Alias(Box::new(Alias {
29426 this: series_unnest_expr,
29427 alias: Identifier::new(series_source_alias.clone()),
29428 column_aliases: col_aliases,
29429 pre_alias_comments: Vec::new(),
29430 trailing_comments: Vec::new(),
29431 inferred_type: None,
29432 }))
29433 } else {
29434 Expression::Alias(Box::new(Alias::new(
29435 series_unnest_expr,
29436 Identifier::new(series_alias.clone()),
29437 )))
29438 };
29439
29440 // Build CROSS JOINs for each UNNEST
29441 let mut joins = Vec::new();
29442 for info in &unnest_infos {
29443 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
29444
29445 let unnest_join_expr = match target {
29446 DialectType::BigQuery => {
29447 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
29448 let unnest = UnnestFunc {
29449 this: info.arr_expr.clone(),
29450 expressions: Vec::new(),
29451 with_ordinality: true,
29452 alias: Some(Identifier::new(actual_col_name.clone())),
29453 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
29454 };
29455 Expression::Unnest(Box::new(unnest))
29456 }
29457 DialectType::Presto | DialectType::Trino => {
29458 let unnest = UnnestFunc {
29459 this: info.arr_expr.clone(),
29460 expressions: Vec::new(),
29461 with_ordinality: true,
29462 alias: None,
29463 offset_alias: None,
29464 };
29465 Expression::Alias(Box::new(Alias {
29466 this: Expression::Unnest(Box::new(unnest)),
29467 alias: Identifier::new(info.source_alias.clone()),
29468 column_aliases: vec![
29469 Identifier::new(actual_col_name.clone()),
29470 Identifier::new(info.pos_alias.clone()),
29471 ],
29472 pre_alias_comments: Vec::new(),
29473 trailing_comments: Vec::new(),
29474 inferred_type: None,
29475 }))
29476 }
29477 DialectType::Snowflake => {
29478 let flatten_arg =
29479 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
29480 name: Identifier::new("INPUT".to_string()),
29481 value: info.arr_expr.clone(),
29482 separator: crate::expressions::NamedArgSeparator::DArrow,
29483 }));
29484 let flatten = Expression::Function(Box::new(Function::new(
29485 "FLATTEN".to_string(),
29486 vec![flatten_arg],
29487 )));
29488 let table_fn = Expression::Function(Box::new(Function::new(
29489 "TABLE".to_string(),
29490 vec![flatten],
29491 )));
29492 Expression::Alias(Box::new(Alias {
29493 this: table_fn,
29494 alias: Identifier::new(info.source_alias.clone()),
29495 column_aliases: vec![
29496 Identifier::new("seq".to_string()),
29497 Identifier::new("key".to_string()),
29498 Identifier::new("path".to_string()),
29499 Identifier::new(info.pos_alias.clone()),
29500 Identifier::new(actual_col_name.clone()),
29501 Identifier::new("this".to_string()),
29502 ],
29503 pre_alias_comments: Vec::new(),
29504 trailing_comments: Vec::new(),
29505 inferred_type: None,
29506 }))
29507 }
29508 _ => return None,
29509 };
29510
29511 joins.push(make_join(unnest_join_expr));
29512 }
29513
29514 // Build WHERE clause
29515 let mut where_conditions: Vec<Expression> = Vec::new();
29516 for info in &unnest_infos {
29517 let src_ref = if use_table_aliases {
29518 Some(info.source_alias.as_str())
29519 } else {
29520 None
29521 };
29522 let pos_col = make_col(&series_alias, tbl_ref);
29523 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
29524
29525 let arr_size = Expression::Function(Box::new(Function::new(
29526 array_length_func.to_string(),
29527 vec![info.arr_expr.clone()],
29528 )));
29529
29530 let size_ref = if index_offset == 0 {
29531 Expression::Paren(Box::new(crate::expressions::Paren {
29532 this: Expression::Sub(Box::new(BinaryOp::new(
29533 arr_size,
29534 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
29535 ))),
29536 trailing_comments: Vec::new(),
29537 }))
29538 } else {
29539 arr_size
29540 };
29541
29542 let eq = Expression::Eq(Box::new(BinaryOp::new(
29543 pos_col.clone(),
29544 unnest_pos_col.clone(),
29545 )));
29546 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
29547 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
29548 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
29549 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
29550 this: and_cond,
29551 trailing_comments: Vec::new(),
29552 }));
29553 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
29554
29555 where_conditions.push(or_cond);
29556 }
29557
29558 let where_expr = if where_conditions.len() == 1 {
29559 // Single condition: no parens needed
29560 where_conditions.into_iter().next().unwrap()
29561 } else {
29562 // Multiple conditions: wrap each OR in parens, then combine with AND
29563 let wrap = |e: Expression| {
29564 Expression::Paren(Box::new(crate::expressions::Paren {
29565 this: e,
29566 trailing_comments: Vec::new(),
29567 }))
29568 };
29569 let mut iter = where_conditions.into_iter();
29570 let first = wrap(iter.next().unwrap());
29571 let second = wrap(iter.next().unwrap());
29572 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
29573 this: Expression::And(Box::new(BinaryOp::new(first, second))),
29574 trailing_comments: Vec::new(),
29575 }));
29576 for cond in iter {
29577 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
29578 }
29579 combined
29580 };
29581
29582 // Build the new SELECT
29583 let mut new_select = select.clone();
29584 new_select.expressions = new_select_exprs;
29585
29586 if new_select.from.is_some() {
29587 let mut all_joins = vec![make_join(series_alias_expr)];
29588 all_joins.extend(joins);
29589 new_select.joins.extend(all_joins);
29590 } else {
29591 new_select.from = Some(From {
29592 expressions: vec![series_alias_expr],
29593 });
29594 new_select.joins.extend(joins);
29595 }
29596
29597 if let Some(ref existing_where) = new_select.where_clause {
29598 let combined = Expression::And(Box::new(BinaryOp::new(
29599 existing_where.this.clone(),
29600 where_expr,
29601 )));
29602 new_select.where_clause = Some(crate::expressions::Where { this: combined });
29603 } else {
29604 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
29605 }
29606
29607 Some(new_select)
29608 }
29609
29610 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
29611 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
29612 match original {
29613 Expression::Unnest(_) => replacement.clone(),
29614 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
29615 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
29616 Expression::Add(op) => {
29617 let left = Self::replace_unnest_with_if(&op.left, replacement);
29618 let right = Self::replace_unnest_with_if(&op.right, replacement);
29619 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
29620 }
29621 Expression::Sub(op) => {
29622 let left = Self::replace_unnest_with_if(&op.left, replacement);
29623 let right = Self::replace_unnest_with_if(&op.right, replacement);
29624 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
29625 }
29626 Expression::Mul(op) => {
29627 let left = Self::replace_unnest_with_if(&op.left, replacement);
29628 let right = Self::replace_unnest_with_if(&op.right, replacement);
29629 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
29630 }
29631 Expression::Div(op) => {
29632 let left = Self::replace_unnest_with_if(&op.left, replacement);
29633 let right = Self::replace_unnest_with_if(&op.right, replacement);
29634 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
29635 }
29636 _ => original.clone(),
29637 }
29638 }
29639
29640 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
29641 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
29642 fn decompose_json_path(path: &str) -> Vec<String> {
29643 let mut parts = Vec::new();
29644 let path = if path.starts_with("$.") {
29645 &path[2..]
29646 } else if path.starts_with('$') {
29647 &path[1..]
29648 } else {
29649 path
29650 };
29651 if path.is_empty() {
29652 return parts;
29653 }
29654 let mut current = String::new();
29655 let chars: Vec<char> = path.chars().collect();
29656 let mut i = 0;
29657 while i < chars.len() {
29658 match chars[i] {
29659 '.' => {
29660 if !current.is_empty() {
29661 parts.push(current.clone());
29662 current.clear();
29663 }
29664 i += 1;
29665 }
29666 '[' => {
29667 if !current.is_empty() {
29668 parts.push(current.clone());
29669 current.clear();
29670 }
29671 i += 1;
29672 let mut bracket_content = String::new();
29673 while i < chars.len() && chars[i] != ']' {
29674 if chars[i] == '"' || chars[i] == '\'' {
29675 let quote = chars[i];
29676 i += 1;
29677 while i < chars.len() && chars[i] != quote {
29678 bracket_content.push(chars[i]);
29679 i += 1;
29680 }
29681 if i < chars.len() {
29682 i += 1;
29683 }
29684 } else {
29685 bracket_content.push(chars[i]);
29686 i += 1;
29687 }
29688 }
29689 if i < chars.len() {
29690 i += 1;
29691 }
29692 if bracket_content != "*" {
29693 parts.push(bracket_content);
29694 }
29695 }
29696 _ => {
29697 current.push(chars[i]);
29698 i += 1;
29699 }
29700 }
29701 }
29702 if !current.is_empty() {
29703 parts.push(current);
29704 }
29705 parts
29706 }
29707
29708 /// Strip `$` prefix from a JSON path, keeping the rest.
29709 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
29710 fn strip_json_dollar_prefix(path: &str) -> String {
29711 if path.starts_with("$.") {
29712 path[2..].to_string()
29713 } else if path.starts_with('$') {
29714 path[1..].to_string()
29715 } else {
29716 path.to_string()
29717 }
29718 }
29719
29720 /// Strip `[*]` wildcards from a JSON path.
29721 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
29722 fn strip_json_wildcards(path: &str) -> String {
29723 path.replace("[*]", "")
29724 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
29725 .trim_end_matches('.')
29726 .to_string()
29727 }
29728
29729 /// Convert bracket notation to dot notation for JSON paths.
29730 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
29731 fn bracket_to_dot_notation(path: &str) -> String {
29732 let mut result = String::new();
29733 let chars: Vec<char> = path.chars().collect();
29734 let mut i = 0;
29735 while i < chars.len() {
29736 if chars[i] == '[' {
29737 // Read bracket content
29738 i += 1;
29739 let mut bracket_content = String::new();
29740 let mut is_quoted = false;
29741 let mut _quote_char = '"';
29742 while i < chars.len() && chars[i] != ']' {
29743 if chars[i] == '"' || chars[i] == '\'' {
29744 is_quoted = true;
29745 _quote_char = chars[i];
29746 i += 1;
29747 while i < chars.len() && chars[i] != _quote_char {
29748 bracket_content.push(chars[i]);
29749 i += 1;
29750 }
29751 if i < chars.len() {
29752 i += 1;
29753 }
29754 } else {
29755 bracket_content.push(chars[i]);
29756 i += 1;
29757 }
29758 }
29759 if i < chars.len() {
29760 i += 1;
29761 } // skip ]
29762 if bracket_content == "*" {
29763 // Keep wildcard as-is
29764 result.push_str("[*]");
29765 } else if is_quoted {
29766 // Quoted bracket -> dot notation with quotes
29767 result.push('.');
29768 result.push('"');
29769 result.push_str(&bracket_content);
29770 result.push('"');
29771 } else {
29772 // Numeric index -> keep as bracket
29773 result.push('[');
29774 result.push_str(&bracket_content);
29775 result.push(']');
29776 }
29777 } else {
29778 result.push(chars[i]);
29779 i += 1;
29780 }
29781 }
29782 result
29783 }
29784
29785 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
29786 /// `$["a b"]` -> `$['a b']`
29787 fn bracket_to_single_quotes(path: &str) -> String {
29788 let mut result = String::new();
29789 let chars: Vec<char> = path.chars().collect();
29790 let mut i = 0;
29791 while i < chars.len() {
29792 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
29793 result.push('[');
29794 result.push('\'');
29795 i += 2; // skip [ and "
29796 while i < chars.len() && chars[i] != '"' {
29797 result.push(chars[i]);
29798 i += 1;
29799 }
29800 if i < chars.len() {
29801 i += 1;
29802 } // skip closing "
29803 result.push('\'');
29804 } else {
29805 result.push(chars[i]);
29806 i += 1;
29807 }
29808 }
29809 result
29810 }
29811
29812 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
29813 /// or PostgreSQL #temp -> TEMPORARY.
29814 /// Also strips # from INSERT INTO #table for non-TSQL targets.
29815 fn transform_select_into(
29816 expr: Expression,
29817 _source: DialectType,
29818 target: DialectType,
29819 ) -> Expression {
29820 use crate::expressions::{CreateTable, Expression, TableRef};
29821
29822 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
29823 if let Expression::Insert(ref insert) = expr {
29824 if insert.table.name.name.starts_with('#')
29825 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
29826 {
29827 let mut new_insert = insert.clone();
29828 new_insert.table.name.name =
29829 insert.table.name.name.trim_start_matches('#').to_string();
29830 return Expression::Insert(new_insert);
29831 }
29832 return expr;
29833 }
29834
29835 if let Expression::Select(ref select) = expr {
29836 if let Some(ref into) = select.into {
29837 let table_name_raw = match &into.this {
29838 Expression::Table(tr) => tr.name.name.clone(),
29839 Expression::Identifier(id) => id.name.clone(),
29840 _ => String::new(),
29841 };
29842 let is_temp = table_name_raw.starts_with('#') || into.temporary;
29843 let clean_name = table_name_raw.trim_start_matches('#').to_string();
29844
29845 match target {
29846 DialectType::DuckDB | DialectType::Snowflake => {
29847 // SELECT INTO -> CREATE TABLE AS SELECT
29848 let mut new_select = select.clone();
29849 new_select.into = None;
29850 let ct = CreateTable {
29851 name: TableRef::new(clean_name),
29852 on_cluster: None,
29853 columns: Vec::new(),
29854 constraints: Vec::new(),
29855 if_not_exists: false,
29856 temporary: is_temp,
29857 or_replace: false,
29858 table_modifier: None,
29859 as_select: Some(Expression::Select(new_select)),
29860 as_select_parenthesized: false,
29861 on_commit: None,
29862 clone_source: None,
29863 clone_at_clause: None,
29864 shallow_clone: false,
29865 is_copy: false,
29866 leading_comments: Vec::new(),
29867 with_properties: Vec::new(),
29868 teradata_post_name_options: Vec::new(),
29869 with_data: None,
29870 with_statistics: None,
29871 teradata_indexes: Vec::new(),
29872 with_cte: None,
29873 properties: Vec::new(),
29874 partition_of: None,
29875 post_table_properties: Vec::new(),
29876 mysql_table_options: Vec::new(),
29877 inherits: Vec::new(),
29878 on_property: None,
29879 copy_grants: false,
29880 using_template: None,
29881 rollup: None,
29882 uuid: None,
29883 };
29884 return Expression::CreateTable(Box::new(ct));
29885 }
29886 DialectType::PostgreSQL | DialectType::Redshift => {
29887 // PostgreSQL: #foo -> INTO TEMPORARY foo
29888 if is_temp && !into.temporary {
29889 let mut new_select = select.clone();
29890 let mut new_into = into.clone();
29891 new_into.temporary = true;
29892 new_into.unlogged = false;
29893 new_into.this = Expression::Table(Box::new(TableRef::new(clean_name)));
29894 new_select.into = Some(new_into);
29895 Expression::Select(new_select)
29896 } else {
29897 expr
29898 }
29899 }
29900 _ => expr,
29901 }
29902 } else {
29903 expr
29904 }
29905 } else {
29906 expr
29907 }
29908 }
29909
29910 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
29911 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
29912 fn transform_create_table_properties(
29913 ct: &mut crate::expressions::CreateTable,
29914 _source: DialectType,
29915 target: DialectType,
29916 ) {
29917 use crate::expressions::{
29918 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
29919 Properties,
29920 };
29921
29922 // Helper to convert a raw property value string to the correct Expression
29923 let value_to_expr = |v: &str| -> Expression {
29924 let trimmed = v.trim();
29925 // Check if it's a quoted string (starts and ends with ')
29926 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
29927 Expression::Literal(Box::new(Literal::String(
29928 trimmed[1..trimmed.len() - 1].to_string(),
29929 )))
29930 }
29931 // Check if it's a number
29932 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
29933 Expression::Literal(Box::new(Literal::Number(trimmed.to_string())))
29934 }
29935 // Check if it's ARRAY[...] or ARRAY(...)
29936 else if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
29937 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
29938 let inner = trimmed
29939 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
29940 .trim_start_matches('[')
29941 .trim_start_matches('(')
29942 .trim_end_matches(']')
29943 .trim_end_matches(')');
29944 let elements: Vec<Expression> = inner
29945 .split(',')
29946 .map(|e| {
29947 let elem = e.trim().trim_matches('\'');
29948 Expression::Literal(Box::new(Literal::String(elem.to_string())))
29949 })
29950 .collect();
29951 Expression::Function(Box::new(crate::expressions::Function::new(
29952 "ARRAY".to_string(),
29953 elements,
29954 )))
29955 }
29956 // Otherwise, just output as identifier (unquoted)
29957 else {
29958 Expression::Identifier(Identifier::new(trimmed.to_string()))
29959 }
29960 };
29961
29962 if ct.with_properties.is_empty() && ct.properties.is_empty() {
29963 return;
29964 }
29965
29966 // Handle Presto-style WITH properties
29967 if !ct.with_properties.is_empty() {
29968 // Extract FORMAT property and remaining properties
29969 let mut format_value: Option<String> = None;
29970 let mut partitioned_by: Option<String> = None;
29971 let mut other_props: Vec<(String, String)> = Vec::new();
29972
29973 for (key, value) in ct.with_properties.drain(..) {
29974 if key.eq_ignore_ascii_case("FORMAT") {
29975 // Strip surrounding quotes from value if present
29976 format_value = Some(value.trim_matches('\'').to_string());
29977 } else if key.eq_ignore_ascii_case("PARTITIONED_BY") {
29978 partitioned_by = Some(value);
29979 } else {
29980 other_props.push((key, value));
29981 }
29982 }
29983
29984 match target {
29985 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29986 // Presto: keep WITH properties but lowercase 'format' key
29987 if let Some(fmt) = format_value {
29988 ct.with_properties
29989 .push(("format".to_string(), format!("'{}'", fmt)));
29990 }
29991 if let Some(part) = partitioned_by {
29992 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
29993 let trimmed = part.trim();
29994 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
29995 // Also handle ARRAY['...'] format - keep as-is
29996 if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
29997 ct.with_properties
29998 .push(("PARTITIONED_BY".to_string(), part));
29999 } else {
30000 // Parse column names from the parenthesized list
30001 let cols: Vec<&str> = inner
30002 .split(',')
30003 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
30004 .collect();
30005 let array_val = format!(
30006 "ARRAY[{}]",
30007 cols.iter()
30008 .map(|c| format!("'{}'", c))
30009 .collect::<Vec<_>>()
30010 .join(", ")
30011 );
30012 ct.with_properties
30013 .push(("PARTITIONED_BY".to_string(), array_val));
30014 }
30015 }
30016 ct.with_properties.extend(other_props);
30017 }
30018 DialectType::Hive => {
30019 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
30020 if let Some(fmt) = format_value {
30021 ct.properties.push(Expression::FileFormatProperty(Box::new(
30022 FileFormatProperty {
30023 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
30024 expressions: vec![],
30025 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
30026 value: true,
30027 }))),
30028 },
30029 )));
30030 }
30031 if let Some(_part) = partitioned_by {
30032 // PARTITIONED_BY handling is complex - move columns to partitioned by
30033 // For now, the partition columns are extracted from the column list
30034 Self::apply_partitioned_by(ct, &_part, target);
30035 }
30036 if !other_props.is_empty() {
30037 let eq_exprs: Vec<Expression> = other_props
30038 .into_iter()
30039 .map(|(k, v)| {
30040 Expression::Eq(Box::new(BinaryOp::new(
30041 Expression::Literal(Box::new(Literal::String(k))),
30042 value_to_expr(&v),
30043 )))
30044 })
30045 .collect();
30046 ct.properties
30047 .push(Expression::Properties(Box::new(Properties {
30048 expressions: eq_exprs,
30049 })));
30050 }
30051 }
30052 DialectType::Spark | DialectType::Databricks => {
30053 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
30054 if let Some(fmt) = format_value {
30055 ct.properties.push(Expression::FileFormatProperty(Box::new(
30056 FileFormatProperty {
30057 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
30058 expressions: vec![],
30059 hive_format: None, // None means USING syntax
30060 },
30061 )));
30062 }
30063 if let Some(_part) = partitioned_by {
30064 Self::apply_partitioned_by(ct, &_part, target);
30065 }
30066 if !other_props.is_empty() {
30067 let eq_exprs: Vec<Expression> = other_props
30068 .into_iter()
30069 .map(|(k, v)| {
30070 Expression::Eq(Box::new(BinaryOp::new(
30071 Expression::Literal(Box::new(Literal::String(k))),
30072 value_to_expr(&v),
30073 )))
30074 })
30075 .collect();
30076 ct.properties
30077 .push(Expression::Properties(Box::new(Properties {
30078 expressions: eq_exprs,
30079 })));
30080 }
30081 }
30082 DialectType::DuckDB => {
30083 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
30084 // Keep nothing
30085 }
30086 _ => {
30087 // For other dialects, keep WITH properties as-is
30088 if let Some(fmt) = format_value {
30089 ct.with_properties
30090 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
30091 }
30092 if let Some(part) = partitioned_by {
30093 ct.with_properties
30094 .push(("PARTITIONED_BY".to_string(), part));
30095 }
30096 ct.with_properties.extend(other_props);
30097 }
30098 }
30099 }
30100
30101 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
30102 // and Hive STORED AS -> Presto WITH (format=...) conversion
30103 if !ct.properties.is_empty() {
30104 let is_presto_target = matches!(
30105 target,
30106 DialectType::Presto | DialectType::Trino | DialectType::Athena
30107 );
30108 let is_duckdb_target = matches!(target, DialectType::DuckDB);
30109
30110 if is_presto_target || is_duckdb_target {
30111 let mut new_properties = Vec::new();
30112 for prop in ct.properties.drain(..) {
30113 match &prop {
30114 Expression::FileFormatProperty(ffp) => {
30115 if is_presto_target {
30116 // Convert STORED AS/USING to WITH (format=...)
30117 if let Some(ref fmt_expr) = ffp.this {
30118 let fmt_str = match fmt_expr.as_ref() {
30119 Expression::Identifier(id) => id.name.clone(),
30120 Expression::Literal(lit)
30121 if matches!(lit.as_ref(), Literal::String(_)) =>
30122 {
30123 let Literal::String(s) = lit.as_ref() else {
30124 unreachable!()
30125 };
30126 s.clone()
30127 }
30128 _ => {
30129 new_properties.push(prop);
30130 continue;
30131 }
30132 };
30133 ct.with_properties
30134 .push(("format".to_string(), format!("'{}'", fmt_str)));
30135 }
30136 }
30137 // DuckDB: just strip file format properties
30138 }
30139 // Convert TBLPROPERTIES to WITH properties for Presto target
30140 Expression::Properties(props) if is_presto_target => {
30141 for expr in &props.expressions {
30142 if let Expression::Eq(eq) = expr {
30143 // Extract key and value from the Eq expression
30144 let key = match &eq.left {
30145 Expression::Literal(lit)
30146 if matches!(lit.as_ref(), Literal::String(_)) =>
30147 {
30148 let Literal::String(s) = lit.as_ref() else {
30149 unreachable!()
30150 };
30151 s.clone()
30152 }
30153 Expression::Identifier(id) => id.name.clone(),
30154 _ => continue,
30155 };
30156 let value = match &eq.right {
30157 Expression::Literal(lit)
30158 if matches!(lit.as_ref(), Literal::String(_)) =>
30159 {
30160 let Literal::String(s) = lit.as_ref() else {
30161 unreachable!()
30162 };
30163 format!("'{}'", s)
30164 }
30165 Expression::Literal(lit)
30166 if matches!(lit.as_ref(), Literal::Number(_)) =>
30167 {
30168 let Literal::Number(n) = lit.as_ref() else {
30169 unreachable!()
30170 };
30171 n.clone()
30172 }
30173 Expression::Identifier(id) => id.name.clone(),
30174 _ => continue,
30175 };
30176 ct.with_properties.push((key, value));
30177 }
30178 }
30179 }
30180 // Convert PartitionedByProperty for Presto target
30181 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
30182 // Check if it contains ColumnDef expressions (Hive-style with types)
30183 if let Expression::Tuple(ref tuple) = *pbp.this {
30184 let mut col_names: Vec<String> = Vec::new();
30185 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
30186 let mut has_col_defs = false;
30187 for expr in &tuple.expressions {
30188 if let Expression::ColumnDef(ref cd) = expr {
30189 has_col_defs = true;
30190 col_names.push(cd.name.name.clone());
30191 col_defs.push(*cd.clone());
30192 } else if let Expression::Column(ref col) = expr {
30193 col_names.push(col.name.name.clone());
30194 } else if let Expression::Identifier(ref id) = expr {
30195 col_names.push(id.name.clone());
30196 } else {
30197 // For function expressions like MONTHS(y), serialize to SQL
30198 let generic = Dialect::get(DialectType::Generic);
30199 if let Ok(sql) = generic.generate(expr) {
30200 col_names.push(sql);
30201 }
30202 }
30203 }
30204 if has_col_defs {
30205 // Merge partition column defs into the main column list
30206 for cd in col_defs {
30207 ct.columns.push(cd);
30208 }
30209 }
30210 if !col_names.is_empty() {
30211 // Add PARTITIONED_BY property
30212 let array_val = format!(
30213 "ARRAY[{}]",
30214 col_names
30215 .iter()
30216 .map(|n| format!("'{}'", n))
30217 .collect::<Vec<_>>()
30218 .join(", ")
30219 );
30220 ct.with_properties
30221 .push(("PARTITIONED_BY".to_string(), array_val));
30222 }
30223 }
30224 // Skip - don't keep in properties
30225 }
30226 _ => {
30227 if !is_duckdb_target {
30228 new_properties.push(prop);
30229 }
30230 }
30231 }
30232 }
30233 ct.properties = new_properties;
30234 } else {
30235 // For Hive/Spark targets, unquote format names in STORED AS
30236 for prop in &mut ct.properties {
30237 if let Expression::FileFormatProperty(ref mut ffp) = prop {
30238 if let Some(ref mut fmt_expr) = ffp.this {
30239 if let Expression::Literal(lit) = fmt_expr.as_ref() {
30240 if let Literal::String(s) = lit.as_ref() {
30241 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
30242 let unquoted = s.clone();
30243 *fmt_expr =
30244 Box::new(Expression::Identifier(Identifier::new(unquoted)));
30245 }
30246 }
30247 }
30248 }
30249 }
30250 }
30251 }
30252 }
30253
30254 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
30255 fn apply_partitioned_by(
30256 ct: &mut crate::expressions::CreateTable,
30257 partitioned_by_value: &str,
30258 target: DialectType,
30259 ) {
30260 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
30261
30262 // Parse the ARRAY['col1', 'col2'] value to extract column names
30263 let mut col_names: Vec<String> = Vec::new();
30264 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
30265 let inner = partitioned_by_value
30266 .trim()
30267 .trim_start_matches("ARRAY")
30268 .trim_start_matches('[')
30269 .trim_start_matches('(')
30270 .trim_end_matches(']')
30271 .trim_end_matches(')');
30272 for part in inner.split(',') {
30273 let col = part.trim().trim_matches('\'').trim_matches('"');
30274 if !col.is_empty() {
30275 col_names.push(col.to_string());
30276 }
30277 }
30278
30279 if col_names.is_empty() {
30280 return;
30281 }
30282
30283 if matches!(target, DialectType::Hive) {
30284 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
30285 let mut partition_col_defs = Vec::new();
30286 for col_name in &col_names {
30287 // Find and remove from columns
30288 if let Some(pos) = ct
30289 .columns
30290 .iter()
30291 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
30292 {
30293 let col_def = ct.columns.remove(pos);
30294 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
30295 }
30296 }
30297 if !partition_col_defs.is_empty() {
30298 ct.properties
30299 .push(Expression::PartitionedByProperty(Box::new(
30300 PartitionedByProperty {
30301 this: Box::new(Expression::Tuple(Box::new(Tuple {
30302 expressions: partition_col_defs,
30303 }))),
30304 },
30305 )));
30306 }
30307 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
30308 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
30309 // Use quoted identifiers to match the quoting style of the original column definitions
30310 let partition_exprs: Vec<Expression> = col_names
30311 .iter()
30312 .map(|name| {
30313 // Check if the column exists in the column list and use its quoting
30314 let is_quoted = ct
30315 .columns
30316 .iter()
30317 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
30318 let ident = if is_quoted {
30319 Identifier::quoted(name.clone())
30320 } else {
30321 Identifier::new(name.clone())
30322 };
30323 Expression::boxed_column(Column {
30324 name: ident,
30325 table: None,
30326 join_mark: false,
30327 trailing_comments: Vec::new(),
30328 span: None,
30329 inferred_type: None,
30330 })
30331 })
30332 .collect();
30333 ct.properties
30334 .push(Expression::PartitionedByProperty(Box::new(
30335 PartitionedByProperty {
30336 this: Box::new(Expression::Tuple(Box::new(Tuple {
30337 expressions: partition_exprs,
30338 }))),
30339 },
30340 )));
30341 }
30342 // DuckDB: strip partitioned_by entirely (already handled)
30343 }
30344
30345 /// Convert a DataType to Spark's type string format (using angle brackets)
30346 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
30347 use crate::expressions::DataType;
30348 match dt {
30349 DataType::Int { .. } => "INT".to_string(),
30350 DataType::BigInt { .. } => "BIGINT".to_string(),
30351 DataType::SmallInt { .. } => "SMALLINT".to_string(),
30352 DataType::TinyInt { .. } => "TINYINT".to_string(),
30353 DataType::Float { .. } => "FLOAT".to_string(),
30354 DataType::Double { .. } => "DOUBLE".to_string(),
30355 DataType::Decimal {
30356 precision: Some(p),
30357 scale: Some(s),
30358 } => format!("DECIMAL({}, {})", p, s),
30359 DataType::Decimal {
30360 precision: Some(p), ..
30361 } => format!("DECIMAL({})", p),
30362 DataType::Decimal { .. } => "DECIMAL".to_string(),
30363 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
30364 "STRING".to_string()
30365 }
30366 DataType::Char { .. } => "STRING".to_string(),
30367 DataType::Boolean => "BOOLEAN".to_string(),
30368 DataType::Date => "DATE".to_string(),
30369 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
30370 DataType::Json | DataType::JsonB => "STRING".to_string(),
30371 DataType::Binary { .. } => "BINARY".to_string(),
30372 DataType::Array { element_type, .. } => {
30373 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
30374 }
30375 DataType::Map {
30376 key_type,
30377 value_type,
30378 } => format!(
30379 "MAP<{}, {}>",
30380 Self::data_type_to_spark_string(key_type),
30381 Self::data_type_to_spark_string(value_type)
30382 ),
30383 DataType::Struct { fields, .. } => {
30384 let field_strs: Vec<String> = fields
30385 .iter()
30386 .map(|f| {
30387 if f.name.is_empty() {
30388 Self::data_type_to_spark_string(&f.data_type)
30389 } else {
30390 format!(
30391 "{}: {}",
30392 f.name,
30393 Self::data_type_to_spark_string(&f.data_type)
30394 )
30395 }
30396 })
30397 .collect();
30398 format!("STRUCT<{}>", field_strs.join(", "))
30399 }
30400 DataType::Custom { name } => name.clone(),
30401 _ => format!("{:?}", dt),
30402 }
30403 }
30404
30405 /// Extract value and unit from an Interval expression
30406 /// Returns (value_expression, IntervalUnit)
30407 fn extract_interval_parts(
30408 interval_expr: &Expression,
30409 ) -> (Expression, crate::expressions::IntervalUnit) {
30410 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
30411
30412 if let Expression::Interval(iv) = interval_expr {
30413 let val = iv.this.clone().unwrap_or(Expression::number(0));
30414 let unit = match &iv.unit {
30415 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
30416 None => {
30417 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
30418 if let Expression::Literal(lit) = &val {
30419 if let crate::expressions::Literal::String(s) = lit.as_ref() {
30420 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
30421 if parts.len() == 2 {
30422 let unit_str = parts[1].trim().to_ascii_uppercase();
30423 let parsed_unit = match unit_str.as_str() {
30424 "YEAR" | "YEARS" => IntervalUnit::Year,
30425 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
30426 "MONTH" | "MONTHS" => IntervalUnit::Month,
30427 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
30428 "DAY" | "DAYS" => IntervalUnit::Day,
30429 "HOUR" | "HOURS" => IntervalUnit::Hour,
30430 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
30431 "SECOND" | "SECONDS" => IntervalUnit::Second,
30432 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
30433 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
30434 _ => IntervalUnit::Day,
30435 };
30436 // Return just the numeric part as value and parsed unit
30437 return (
30438 Expression::Literal(Box::new(
30439 crate::expressions::Literal::String(parts[0].to_string()),
30440 )),
30441 parsed_unit,
30442 );
30443 }
30444 IntervalUnit::Day
30445 } else {
30446 IntervalUnit::Day
30447 }
30448 } else {
30449 IntervalUnit::Day
30450 }
30451 }
30452 _ => IntervalUnit::Day,
30453 };
30454 (val, unit)
30455 } else {
30456 // Not an interval - pass through
30457 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
30458 }
30459 }
30460
30461 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
30462 fn normalize_bigquery_function(
30463 e: Expression,
30464 source: DialectType,
30465 target: DialectType,
30466 ) -> Result<Expression> {
30467 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
30468
30469 let f = if let Expression::Function(f) = e {
30470 *f
30471 } else {
30472 return Ok(e);
30473 };
30474 let name = f.name.to_ascii_uppercase();
30475 let mut args = f.args;
30476
30477 /// Helper to extract unit string from an identifier, column, or literal expression
30478 fn get_unit_str(expr: &Expression) -> String {
30479 match expr {
30480 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
30481 Expression::Var(v) => v.this.to_ascii_uppercase(),
30482 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
30483 let Literal::String(s) = lit.as_ref() else {
30484 unreachable!()
30485 };
30486 s.to_ascii_uppercase()
30487 }
30488 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
30489 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
30490 Expression::Function(f) => {
30491 let base = f.name.to_ascii_uppercase();
30492 if !f.args.is_empty() {
30493 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
30494 let inner = get_unit_str(&f.args[0]);
30495 format!("{}({})", base, inner)
30496 } else {
30497 base
30498 }
30499 }
30500 _ => "DAY".to_string(),
30501 }
30502 }
30503
30504 /// Parse unit string to IntervalUnit
30505 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
30506 match s {
30507 "YEAR" => crate::expressions::IntervalUnit::Year,
30508 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
30509 "MONTH" => crate::expressions::IntervalUnit::Month,
30510 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
30511 "DAY" => crate::expressions::IntervalUnit::Day,
30512 "HOUR" => crate::expressions::IntervalUnit::Hour,
30513 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30514 "SECOND" => crate::expressions::IntervalUnit::Second,
30515 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
30516 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
30517 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
30518 _ => crate::expressions::IntervalUnit::Day,
30519 }
30520 }
30521
30522 match name.as_str() {
30523 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
30524 // (BigQuery: result = date1 - date2, Standard: result = end - start)
30525 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
30526 let date1 = args.remove(0);
30527 let date2 = args.remove(0);
30528 let unit_expr = args.remove(0);
30529 let unit_str = get_unit_str(&unit_expr);
30530
30531 if matches!(target, DialectType::BigQuery) {
30532 // BigQuery -> BigQuery: just uppercase the unit
30533 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
30534 return Ok(Expression::Function(Box::new(Function::new(
30535 f.name,
30536 vec![date1, date2, unit],
30537 ))));
30538 }
30539
30540 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
30541 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
30542 if matches!(target, DialectType::Snowflake) {
30543 return Ok(Expression::TimestampDiff(Box::new(
30544 crate::expressions::TimestampDiff {
30545 this: Box::new(date2),
30546 expression: Box::new(date1),
30547 unit: Some(unit_str),
30548 },
30549 )));
30550 }
30551
30552 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
30553 if matches!(target, DialectType::DuckDB) {
30554 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
30555 // CAST to TIME
30556 let cast_fn = |e: Expression| -> Expression {
30557 match e {
30558 Expression::Literal(lit)
30559 if matches!(lit.as_ref(), Literal::String(_)) =>
30560 {
30561 let Literal::String(s) = lit.as_ref() else {
30562 unreachable!()
30563 };
30564 Expression::Cast(Box::new(Cast {
30565 this: Expression::Literal(Box::new(Literal::String(
30566 s.clone(),
30567 ))),
30568 to: DataType::Custom {
30569 name: "TIME".to_string(),
30570 },
30571 trailing_comments: vec![],
30572 double_colon_syntax: false,
30573 format: None,
30574 default: None,
30575 inferred_type: None,
30576 }))
30577 }
30578 other => other,
30579 }
30580 };
30581 (cast_fn(date1), cast_fn(date2))
30582 } else if name == "DATETIME_DIFF" {
30583 // CAST to TIMESTAMP
30584 (
30585 Self::ensure_cast_timestamp(date1),
30586 Self::ensure_cast_timestamp(date2),
30587 )
30588 } else {
30589 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
30590 (
30591 Self::ensure_cast_timestamptz(date1),
30592 Self::ensure_cast_timestamptz(date2),
30593 )
30594 };
30595 return Ok(Expression::Function(Box::new(Function::new(
30596 "DATE_DIFF".to_string(),
30597 vec![
30598 Expression::Literal(Box::new(Literal::String(unit_str))),
30599 cast_d2,
30600 cast_d1,
30601 ],
30602 ))));
30603 }
30604
30605 // Convert to standard TIMESTAMPDIFF(unit, start, end)
30606 let unit = Expression::Identifier(Identifier::new(unit_str));
30607 Ok(Expression::Function(Box::new(Function::new(
30608 "TIMESTAMPDIFF".to_string(),
30609 vec![unit, date2, date1],
30610 ))))
30611 }
30612
30613 // DATEDIFF(unit, start, end) -> target-specific form
30614 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
30615 "DATEDIFF" if args.len() == 3 => {
30616 let arg0 = args.remove(0);
30617 let arg1 = args.remove(0);
30618 let arg2 = args.remove(0);
30619 let unit_str = get_unit_str(&arg0);
30620
30621 // Redshift DATEDIFF(unit, start, end) order: result = end - start
30622 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
30623 // TSQL DATEDIFF(unit, start, end) order: result = end - start
30624
30625 if matches!(target, DialectType::Snowflake) {
30626 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
30627 let unit = Expression::Identifier(Identifier::new(unit_str));
30628 return Ok(Expression::Function(Box::new(Function::new(
30629 "DATEDIFF".to_string(),
30630 vec![unit, arg1, arg2],
30631 ))));
30632 }
30633
30634 if matches!(target, DialectType::DuckDB) {
30635 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
30636 let cast_d1 = Self::ensure_cast_timestamp(arg1);
30637 let cast_d2 = Self::ensure_cast_timestamp(arg2);
30638 return Ok(Expression::Function(Box::new(Function::new(
30639 "DATE_DIFF".to_string(),
30640 vec![
30641 Expression::Literal(Box::new(Literal::String(unit_str))),
30642 cast_d1,
30643 cast_d2,
30644 ],
30645 ))));
30646 }
30647
30648 if matches!(target, DialectType::BigQuery) {
30649 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
30650 let cast_d1 = Self::ensure_cast_datetime(arg1);
30651 let cast_d2 = Self::ensure_cast_datetime(arg2);
30652 let unit = Expression::Identifier(Identifier::new(unit_str));
30653 return Ok(Expression::Function(Box::new(Function::new(
30654 "DATE_DIFF".to_string(),
30655 vec![cast_d2, cast_d1, unit],
30656 ))));
30657 }
30658
30659 if matches!(target, DialectType::Spark | DialectType::Databricks) {
30660 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
30661 let unit = Expression::Identifier(Identifier::new(unit_str));
30662 return Ok(Expression::Function(Box::new(Function::new(
30663 "DATEDIFF".to_string(),
30664 vec![unit, arg1, arg2],
30665 ))));
30666 }
30667
30668 if matches!(target, DialectType::Hive) {
30669 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
30670 match unit_str.as_str() {
30671 "MONTH" => {
30672 return Ok(Expression::Function(Box::new(Function::new(
30673 "CAST".to_string(),
30674 vec![Expression::Function(Box::new(Function::new(
30675 "MONTHS_BETWEEN".to_string(),
30676 vec![arg2, arg1],
30677 )))],
30678 ))));
30679 }
30680 "WEEK" => {
30681 return Ok(Expression::Cast(Box::new(Cast {
30682 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
30683 Expression::Function(Box::new(Function::new(
30684 "DATEDIFF".to_string(),
30685 vec![arg2, arg1],
30686 ))),
30687 Expression::Literal(Box::new(Literal::Number("7".to_string()))),
30688 ))),
30689 to: DataType::Int {
30690 length: None,
30691 integer_spelling: false,
30692 },
30693 trailing_comments: vec![],
30694 double_colon_syntax: false,
30695 format: None,
30696 default: None,
30697 inferred_type: None,
30698 })));
30699 }
30700 _ => {
30701 // Default: DATEDIFF(end, start) for DAY
30702 return Ok(Expression::Function(Box::new(Function::new(
30703 "DATEDIFF".to_string(),
30704 vec![arg2, arg1],
30705 ))));
30706 }
30707 }
30708 }
30709
30710 if matches!(
30711 target,
30712 DialectType::Presto | DialectType::Trino | DialectType::Athena
30713 ) {
30714 // Presto/Trino: DATE_DIFF('UNIT', start, end)
30715 return Ok(Expression::Function(Box::new(Function::new(
30716 "DATE_DIFF".to_string(),
30717 vec![
30718 Expression::Literal(Box::new(Literal::String(unit_str))),
30719 arg1,
30720 arg2,
30721 ],
30722 ))));
30723 }
30724
30725 if matches!(target, DialectType::TSQL) {
30726 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
30727 let cast_d2 = Self::ensure_cast_datetime2(arg2);
30728 let unit = Expression::Identifier(Identifier::new(unit_str));
30729 return Ok(Expression::Function(Box::new(Function::new(
30730 "DATEDIFF".to_string(),
30731 vec![unit, arg1, cast_d2],
30732 ))));
30733 }
30734
30735 if matches!(target, DialectType::PostgreSQL) {
30736 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
30737 // For now, use DATEDIFF (passthrough) with uppercased unit
30738 let unit = Expression::Identifier(Identifier::new(unit_str));
30739 return Ok(Expression::Function(Box::new(Function::new(
30740 "DATEDIFF".to_string(),
30741 vec![unit, arg1, arg2],
30742 ))));
30743 }
30744
30745 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
30746 let unit = Expression::Identifier(Identifier::new(unit_str));
30747 Ok(Expression::Function(Box::new(Function::new(
30748 "DATEDIFF".to_string(),
30749 vec![unit, arg1, arg2],
30750 ))))
30751 }
30752
30753 // DATE_DIFF(date1, date2, unit) -> standard form
30754 "DATE_DIFF" if args.len() == 3 => {
30755 let date1 = args.remove(0);
30756 let date2 = args.remove(0);
30757 let unit_expr = args.remove(0);
30758 let unit_str = get_unit_str(&unit_expr);
30759
30760 if matches!(target, DialectType::BigQuery) {
30761 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
30762 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
30763 "WEEK".to_string()
30764 } else {
30765 unit_str
30766 };
30767 let norm_d1 = Self::date_literal_to_cast(date1);
30768 let norm_d2 = Self::date_literal_to_cast(date2);
30769 let unit = Expression::Identifier(Identifier::new(norm_unit));
30770 return Ok(Expression::Function(Box::new(Function::new(
30771 f.name,
30772 vec![norm_d1, norm_d2, unit],
30773 ))));
30774 }
30775
30776 if matches!(target, DialectType::MySQL) {
30777 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
30778 let norm_d1 = Self::date_literal_to_cast(date1);
30779 let norm_d2 = Self::date_literal_to_cast(date2);
30780 return Ok(Expression::Function(Box::new(Function::new(
30781 "DATEDIFF".to_string(),
30782 vec![norm_d1, norm_d2],
30783 ))));
30784 }
30785
30786 if matches!(target, DialectType::StarRocks) {
30787 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
30788 let norm_d1 = Self::date_literal_to_cast(date1);
30789 let norm_d2 = Self::date_literal_to_cast(date2);
30790 return Ok(Expression::Function(Box::new(Function::new(
30791 "DATE_DIFF".to_string(),
30792 vec![
30793 Expression::Literal(Box::new(Literal::String(unit_str))),
30794 norm_d1,
30795 norm_d2,
30796 ],
30797 ))));
30798 }
30799
30800 if matches!(target, DialectType::DuckDB) {
30801 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
30802 let norm_d1 = Self::ensure_cast_date(date1);
30803 let norm_d2 = Self::ensure_cast_date(date2);
30804
30805 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
30806 let is_week_variant = unit_str == "WEEK"
30807 || unit_str.starts_with("WEEK(")
30808 || unit_str == "ISOWEEK";
30809 if is_week_variant {
30810 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
30811 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
30812 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
30813 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
30814 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
30815 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
30816 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
30817 Some("1") // Shift Sunday to Monday alignment
30818 } else if unit_str == "WEEK(SATURDAY)" {
30819 Some("-5")
30820 } else if unit_str == "WEEK(TUESDAY)" {
30821 Some("-1")
30822 } else if unit_str == "WEEK(WEDNESDAY)" {
30823 Some("-2")
30824 } else if unit_str == "WEEK(THURSDAY)" {
30825 Some("-3")
30826 } else if unit_str == "WEEK(FRIDAY)" {
30827 Some("-4")
30828 } else {
30829 Some("1") // default to Sunday
30830 };
30831
30832 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
30833 let shifted = if let Some(off) = offset {
30834 let interval =
30835 Expression::Interval(Box::new(crate::expressions::Interval {
30836 this: Some(Expression::Literal(Box::new(Literal::String(
30837 off.to_string(),
30838 )))),
30839 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30840 unit: crate::expressions::IntervalUnit::Day,
30841 use_plural: false,
30842 }),
30843 }));
30844 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
30845 date, interval,
30846 )))
30847 } else {
30848 date
30849 };
30850 Expression::Function(Box::new(Function::new(
30851 "DATE_TRUNC".to_string(),
30852 vec![
30853 Expression::Literal(Box::new(Literal::String(
30854 "WEEK".to_string(),
30855 ))),
30856 shifted,
30857 ],
30858 )))
30859 };
30860
30861 let trunc_d2 = make_trunc(norm_d2, day_offset);
30862 let trunc_d1 = make_trunc(norm_d1, day_offset);
30863 return Ok(Expression::Function(Box::new(Function::new(
30864 "DATE_DIFF".to_string(),
30865 vec![
30866 Expression::Literal(Box::new(Literal::String("WEEK".to_string()))),
30867 trunc_d2,
30868 trunc_d1,
30869 ],
30870 ))));
30871 }
30872
30873 return Ok(Expression::Function(Box::new(Function::new(
30874 "DATE_DIFF".to_string(),
30875 vec![
30876 Expression::Literal(Box::new(Literal::String(unit_str))),
30877 norm_d2,
30878 norm_d1,
30879 ],
30880 ))));
30881 }
30882
30883 // Default: DATEDIFF(unit, date2, date1)
30884 let unit = Expression::Identifier(Identifier::new(unit_str));
30885 Ok(Expression::Function(Box::new(Function::new(
30886 "DATEDIFF".to_string(),
30887 vec![unit, date2, date1],
30888 ))))
30889 }
30890
30891 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
30892 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
30893 let ts = args.remove(0);
30894 let interval_expr = args.remove(0);
30895 let (val, unit) = Self::extract_interval_parts(&interval_expr);
30896
30897 match target {
30898 DialectType::Snowflake => {
30899 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
30900 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
30901 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
30902 let unit_str = Self::interval_unit_to_string(&unit);
30903 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
30904 Ok(Expression::TimestampAdd(Box::new(
30905 crate::expressions::TimestampAdd {
30906 this: Box::new(val),
30907 expression: Box::new(cast_ts),
30908 unit: Some(unit_str.to_string()),
30909 },
30910 )))
30911 }
30912 DialectType::Spark | DialectType::Databricks => {
30913 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
30914 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
30915 let interval =
30916 Expression::Interval(Box::new(crate::expressions::Interval {
30917 this: Some(val),
30918 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30919 unit,
30920 use_plural: false,
30921 }),
30922 }));
30923 Ok(Expression::Add(Box::new(
30924 crate::expressions::BinaryOp::new(ts, interval),
30925 )))
30926 } else if name == "DATETIME_ADD"
30927 && matches!(target, DialectType::Databricks)
30928 {
30929 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
30930 let unit_str = Self::interval_unit_to_string(&unit);
30931 Ok(Expression::Function(Box::new(Function::new(
30932 "TIMESTAMPADD".to_string(),
30933 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
30934 ))))
30935 } else {
30936 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
30937 let unit_str = Self::interval_unit_to_string(&unit);
30938 let cast_ts =
30939 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
30940 Self::maybe_cast_ts(ts)
30941 } else {
30942 ts
30943 };
30944 Ok(Expression::Function(Box::new(Function::new(
30945 "DATE_ADD".to_string(),
30946 vec![
30947 Expression::Identifier(Identifier::new(unit_str)),
30948 val,
30949 cast_ts,
30950 ],
30951 ))))
30952 }
30953 }
30954 DialectType::MySQL => {
30955 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
30956 let mysql_ts = if name.starts_with("TIMESTAMP") {
30957 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
30958 match &ts {
30959 Expression::Function(ref inner_f)
30960 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
30961 {
30962 // Already wrapped, keep as-is
30963 ts
30964 }
30965 _ => {
30966 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
30967 let unwrapped = match ts {
30968 Expression::Literal(lit)
30969 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
30970 {
30971 let Literal::Timestamp(s) = lit.as_ref() else {
30972 unreachable!()
30973 };
30974 Expression::Literal(Box::new(Literal::String(
30975 s.clone(),
30976 )))
30977 }
30978 other => other,
30979 };
30980 Expression::Function(Box::new(Function::new(
30981 "TIMESTAMP".to_string(),
30982 vec![unwrapped],
30983 )))
30984 }
30985 }
30986 } else {
30987 ts
30988 };
30989 Ok(Expression::DateAdd(Box::new(
30990 crate::expressions::DateAddFunc {
30991 this: mysql_ts,
30992 interval: val,
30993 unit,
30994 },
30995 )))
30996 }
30997 _ => {
30998 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
30999 let cast_ts = if matches!(target, DialectType::DuckDB) {
31000 if name == "DATETIME_ADD" {
31001 Self::ensure_cast_timestamp(ts)
31002 } else if name.starts_with("TIMESTAMP") {
31003 Self::maybe_cast_ts_to_tz(ts, &name)
31004 } else {
31005 ts
31006 }
31007 } else {
31008 ts
31009 };
31010 Ok(Expression::DateAdd(Box::new(
31011 crate::expressions::DateAddFunc {
31012 this: cast_ts,
31013 interval: val,
31014 unit,
31015 },
31016 )))
31017 }
31018 }
31019 }
31020
31021 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
31022 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
31023 let ts = args.remove(0);
31024 let interval_expr = args.remove(0);
31025 let (val, unit) = Self::extract_interval_parts(&interval_expr);
31026
31027 match target {
31028 DialectType::Snowflake => {
31029 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
31030 let unit_str = Self::interval_unit_to_string(&unit);
31031 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
31032 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
31033 val,
31034 Expression::Neg(Box::new(crate::expressions::UnaryOp {
31035 this: Expression::number(1),
31036 inferred_type: None,
31037 })),
31038 )));
31039 Ok(Expression::TimestampAdd(Box::new(
31040 crate::expressions::TimestampAdd {
31041 this: Box::new(neg_val),
31042 expression: Box::new(cast_ts),
31043 unit: Some(unit_str.to_string()),
31044 },
31045 )))
31046 }
31047 DialectType::Spark | DialectType::Databricks => {
31048 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
31049 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
31050 {
31051 // Spark: ts - INTERVAL val UNIT
31052 let cast_ts = if name.starts_with("TIMESTAMP") {
31053 Self::maybe_cast_ts(ts)
31054 } else {
31055 ts
31056 };
31057 let interval =
31058 Expression::Interval(Box::new(crate::expressions::Interval {
31059 this: Some(val),
31060 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31061 unit,
31062 use_plural: false,
31063 }),
31064 }));
31065 Ok(Expression::Sub(Box::new(
31066 crate::expressions::BinaryOp::new(cast_ts, interval),
31067 )))
31068 } else {
31069 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
31070 let unit_str = Self::interval_unit_to_string(&unit);
31071 let neg_val =
31072 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
31073 val,
31074 Expression::Neg(Box::new(crate::expressions::UnaryOp {
31075 this: Expression::number(1),
31076 inferred_type: None,
31077 })),
31078 )));
31079 Ok(Expression::Function(Box::new(Function::new(
31080 "TIMESTAMPADD".to_string(),
31081 vec![
31082 Expression::Identifier(Identifier::new(unit_str)),
31083 neg_val,
31084 ts,
31085 ],
31086 ))))
31087 }
31088 }
31089 DialectType::MySQL => {
31090 let mysql_ts = if name.starts_with("TIMESTAMP") {
31091 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
31092 match &ts {
31093 Expression::Function(ref inner_f)
31094 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
31095 {
31096 // Already wrapped, keep as-is
31097 ts
31098 }
31099 _ => {
31100 let unwrapped = match ts {
31101 Expression::Literal(lit)
31102 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
31103 {
31104 let Literal::Timestamp(s) = lit.as_ref() else {
31105 unreachable!()
31106 };
31107 Expression::Literal(Box::new(Literal::String(
31108 s.clone(),
31109 )))
31110 }
31111 other => other,
31112 };
31113 Expression::Function(Box::new(Function::new(
31114 "TIMESTAMP".to_string(),
31115 vec![unwrapped],
31116 )))
31117 }
31118 }
31119 } else {
31120 ts
31121 };
31122 Ok(Expression::DateSub(Box::new(
31123 crate::expressions::DateAddFunc {
31124 this: mysql_ts,
31125 interval: val,
31126 unit,
31127 },
31128 )))
31129 }
31130 _ => {
31131 let cast_ts = if matches!(target, DialectType::DuckDB) {
31132 if name == "DATETIME_SUB" {
31133 Self::ensure_cast_timestamp(ts)
31134 } else if name.starts_with("TIMESTAMP") {
31135 Self::maybe_cast_ts_to_tz(ts, &name)
31136 } else {
31137 ts
31138 }
31139 } else {
31140 ts
31141 };
31142 Ok(Expression::DateSub(Box::new(
31143 crate::expressions::DateAddFunc {
31144 this: cast_ts,
31145 interval: val,
31146 unit,
31147 },
31148 )))
31149 }
31150 }
31151 }
31152
31153 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
31154 "DATE_SUB" if args.len() == 2 => {
31155 let date = args.remove(0);
31156 let interval_expr = args.remove(0);
31157 let (val, unit) = Self::extract_interval_parts(&interval_expr);
31158
31159 match target {
31160 DialectType::Databricks | DialectType::Spark => {
31161 // Databricks/Spark: DATE_ADD(date, -val)
31162 // Use DateAdd expression with negative val so it generates correctly
31163 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
31164 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
31165 // Instead, we directly output as a simple negated DateSub
31166 Ok(Expression::DateSub(Box::new(
31167 crate::expressions::DateAddFunc {
31168 this: date,
31169 interval: val,
31170 unit,
31171 },
31172 )))
31173 }
31174 DialectType::DuckDB => {
31175 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
31176 let cast_date = Self::ensure_cast_date(date);
31177 let interval =
31178 Expression::Interval(Box::new(crate::expressions::Interval {
31179 this: Some(val),
31180 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31181 unit,
31182 use_plural: false,
31183 }),
31184 }));
31185 Ok(Expression::Sub(Box::new(
31186 crate::expressions::BinaryOp::new(cast_date, interval),
31187 )))
31188 }
31189 DialectType::Snowflake => {
31190 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
31191 // Just ensure the date is cast properly
31192 let cast_date = Self::ensure_cast_date(date);
31193 Ok(Expression::DateSub(Box::new(
31194 crate::expressions::DateAddFunc {
31195 this: cast_date,
31196 interval: val,
31197 unit,
31198 },
31199 )))
31200 }
31201 DialectType::PostgreSQL => {
31202 // PostgreSQL: date - INTERVAL 'val UNIT'
31203 let unit_str = Self::interval_unit_to_string(&unit);
31204 let interval =
31205 Expression::Interval(Box::new(crate::expressions::Interval {
31206 this: Some(Expression::Literal(Box::new(Literal::String(
31207 format!("{} {}", Self::expr_to_string(&val), unit_str),
31208 )))),
31209 unit: None,
31210 }));
31211 Ok(Expression::Sub(Box::new(
31212 crate::expressions::BinaryOp::new(date, interval),
31213 )))
31214 }
31215 _ => Ok(Expression::DateSub(Box::new(
31216 crate::expressions::DateAddFunc {
31217 this: date,
31218 interval: val,
31219 unit,
31220 },
31221 ))),
31222 }
31223 }
31224
31225 // DATEADD(unit, val, date) -> target-specific form
31226 // Used by: Redshift, Snowflake, TSQL, ClickHouse
31227 "DATEADD" if args.len() == 3 => {
31228 let arg0 = args.remove(0);
31229 let arg1 = args.remove(0);
31230 let arg2 = args.remove(0);
31231 let unit_str = get_unit_str(&arg0);
31232
31233 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
31234 // Keep DATEADD(UNIT, val, date) with uppercased unit
31235 let unit = Expression::Identifier(Identifier::new(unit_str));
31236 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
31237 let date = if matches!(target, DialectType::TSQL)
31238 && !matches!(
31239 source,
31240 DialectType::Spark | DialectType::Databricks | DialectType::Hive
31241 ) {
31242 Self::ensure_cast_datetime2(arg2)
31243 } else {
31244 arg2
31245 };
31246 return Ok(Expression::Function(Box::new(Function::new(
31247 "DATEADD".to_string(),
31248 vec![unit, arg1, date],
31249 ))));
31250 }
31251
31252 if matches!(target, DialectType::DuckDB) {
31253 // DuckDB: date + INTERVAL 'val' UNIT
31254 let iu = parse_interval_unit(&unit_str);
31255 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31256 this: Some(arg1),
31257 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31258 unit: iu,
31259 use_plural: false,
31260 }),
31261 }));
31262 let cast_date = Self::ensure_cast_timestamp(arg2);
31263 return Ok(Expression::Add(Box::new(
31264 crate::expressions::BinaryOp::new(cast_date, interval),
31265 )));
31266 }
31267
31268 if matches!(target, DialectType::BigQuery) {
31269 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
31270 let iu = parse_interval_unit(&unit_str);
31271 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31272 this: Some(arg1),
31273 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31274 unit: iu,
31275 use_plural: false,
31276 }),
31277 }));
31278 return Ok(Expression::Function(Box::new(Function::new(
31279 "DATE_ADD".to_string(),
31280 vec![arg2, interval],
31281 ))));
31282 }
31283
31284 if matches!(target, DialectType::Databricks) {
31285 // Databricks: keep DATEADD(UNIT, val, date) format
31286 let unit = Expression::Identifier(Identifier::new(unit_str));
31287 return Ok(Expression::Function(Box::new(Function::new(
31288 "DATEADD".to_string(),
31289 vec![unit, arg1, arg2],
31290 ))));
31291 }
31292
31293 if matches!(target, DialectType::Spark) {
31294 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
31295 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
31296 if let Expression::Literal(lit) = &expr {
31297 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
31298 if let Ok(val) = n.parse::<i64>() {
31299 return Expression::Literal(Box::new(
31300 crate::expressions::Literal::Number(
31301 (val * factor).to_string(),
31302 ),
31303 ));
31304 }
31305 }
31306 }
31307 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
31308 expr,
31309 Expression::Literal(Box::new(crate::expressions::Literal::Number(
31310 factor.to_string(),
31311 ))),
31312 )))
31313 }
31314 match unit_str.as_str() {
31315 "YEAR" => {
31316 let months = multiply_expr_dateadd(arg1, 12);
31317 return Ok(Expression::Function(Box::new(Function::new(
31318 "ADD_MONTHS".to_string(),
31319 vec![arg2, months],
31320 ))));
31321 }
31322 "QUARTER" => {
31323 let months = multiply_expr_dateadd(arg1, 3);
31324 return Ok(Expression::Function(Box::new(Function::new(
31325 "ADD_MONTHS".to_string(),
31326 vec![arg2, months],
31327 ))));
31328 }
31329 "MONTH" => {
31330 return Ok(Expression::Function(Box::new(Function::new(
31331 "ADD_MONTHS".to_string(),
31332 vec![arg2, arg1],
31333 ))));
31334 }
31335 "WEEK" => {
31336 let days = multiply_expr_dateadd(arg1, 7);
31337 return Ok(Expression::Function(Box::new(Function::new(
31338 "DATE_ADD".to_string(),
31339 vec![arg2, days],
31340 ))));
31341 }
31342 "DAY" => {
31343 return Ok(Expression::Function(Box::new(Function::new(
31344 "DATE_ADD".to_string(),
31345 vec![arg2, arg1],
31346 ))));
31347 }
31348 _ => {
31349 let unit = Expression::Identifier(Identifier::new(unit_str));
31350 return Ok(Expression::Function(Box::new(Function::new(
31351 "DATE_ADD".to_string(),
31352 vec![unit, arg1, arg2],
31353 ))));
31354 }
31355 }
31356 }
31357
31358 if matches!(target, DialectType::Hive) {
31359 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
31360 match unit_str.as_str() {
31361 "DAY" => {
31362 return Ok(Expression::Function(Box::new(Function::new(
31363 "DATE_ADD".to_string(),
31364 vec![arg2, arg1],
31365 ))));
31366 }
31367 "MONTH" => {
31368 return Ok(Expression::Function(Box::new(Function::new(
31369 "ADD_MONTHS".to_string(),
31370 vec![arg2, arg1],
31371 ))));
31372 }
31373 _ => {
31374 let iu = parse_interval_unit(&unit_str);
31375 let interval =
31376 Expression::Interval(Box::new(crate::expressions::Interval {
31377 this: Some(arg1),
31378 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31379 unit: iu,
31380 use_plural: false,
31381 }),
31382 }));
31383 return Ok(Expression::Add(Box::new(
31384 crate::expressions::BinaryOp::new(arg2, interval),
31385 )));
31386 }
31387 }
31388 }
31389
31390 if matches!(target, DialectType::PostgreSQL) {
31391 // PostgreSQL: date + INTERVAL 'val UNIT'
31392 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31393 this: Some(Expression::Literal(Box::new(Literal::String(format!(
31394 "{} {}",
31395 Self::expr_to_string(&arg1),
31396 unit_str
31397 ))))),
31398 unit: None,
31399 }));
31400 return Ok(Expression::Add(Box::new(
31401 crate::expressions::BinaryOp::new(arg2, interval),
31402 )));
31403 }
31404
31405 if matches!(
31406 target,
31407 DialectType::Presto | DialectType::Trino | DialectType::Athena
31408 ) {
31409 // Presto/Trino: DATE_ADD('UNIT', val, date)
31410 return Ok(Expression::Function(Box::new(Function::new(
31411 "DATE_ADD".to_string(),
31412 vec![
31413 Expression::Literal(Box::new(Literal::String(unit_str))),
31414 arg1,
31415 arg2,
31416 ],
31417 ))));
31418 }
31419
31420 if matches!(target, DialectType::ClickHouse) {
31421 // ClickHouse: DATE_ADD(UNIT, val, date)
31422 let unit = Expression::Identifier(Identifier::new(unit_str));
31423 return Ok(Expression::Function(Box::new(Function::new(
31424 "DATE_ADD".to_string(),
31425 vec![unit, arg1, arg2],
31426 ))));
31427 }
31428
31429 // Default: keep DATEADD with uppercased unit
31430 let unit = Expression::Identifier(Identifier::new(unit_str));
31431 Ok(Expression::Function(Box::new(Function::new(
31432 "DATEADD".to_string(),
31433 vec![unit, arg1, arg2],
31434 ))))
31435 }
31436
31437 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
31438 "DATE_ADD" if args.len() == 3 => {
31439 let arg0 = args.remove(0);
31440 let arg1 = args.remove(0);
31441 let arg2 = args.remove(0);
31442 let unit_str = get_unit_str(&arg0);
31443
31444 if matches!(
31445 target,
31446 DialectType::Presto | DialectType::Trino | DialectType::Athena
31447 ) {
31448 // Presto/Trino: DATE_ADD('UNIT', val, date)
31449 return Ok(Expression::Function(Box::new(Function::new(
31450 "DATE_ADD".to_string(),
31451 vec![
31452 Expression::Literal(Box::new(Literal::String(unit_str))),
31453 arg1,
31454 arg2,
31455 ],
31456 ))));
31457 }
31458
31459 if matches!(
31460 target,
31461 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
31462 ) {
31463 // DATEADD(UNIT, val, date)
31464 let unit = Expression::Identifier(Identifier::new(unit_str));
31465 let date = if matches!(target, DialectType::TSQL) {
31466 Self::ensure_cast_datetime2(arg2)
31467 } else {
31468 arg2
31469 };
31470 return Ok(Expression::Function(Box::new(Function::new(
31471 "DATEADD".to_string(),
31472 vec![unit, arg1, date],
31473 ))));
31474 }
31475
31476 if matches!(target, DialectType::DuckDB) {
31477 // DuckDB: date + INTERVAL val UNIT
31478 let iu = parse_interval_unit(&unit_str);
31479 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31480 this: Some(arg1),
31481 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31482 unit: iu,
31483 use_plural: false,
31484 }),
31485 }));
31486 return Ok(Expression::Add(Box::new(
31487 crate::expressions::BinaryOp::new(arg2, interval),
31488 )));
31489 }
31490
31491 if matches!(target, DialectType::Spark | DialectType::Databricks) {
31492 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
31493 let unit = Expression::Identifier(Identifier::new(unit_str));
31494 return Ok(Expression::Function(Box::new(Function::new(
31495 "DATE_ADD".to_string(),
31496 vec![unit, arg1, arg2],
31497 ))));
31498 }
31499
31500 // Default: DATE_ADD(UNIT, val, date)
31501 let unit = Expression::Identifier(Identifier::new(unit_str));
31502 Ok(Expression::Function(Box::new(Function::new(
31503 "DATE_ADD".to_string(),
31504 vec![unit, arg1, arg2],
31505 ))))
31506 }
31507
31508 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
31509 "DATE_ADD" if args.len() == 2 => {
31510 let date = args.remove(0);
31511 let interval_expr = args.remove(0);
31512 let (val, unit) = Self::extract_interval_parts(&interval_expr);
31513 let unit_str = Self::interval_unit_to_string(&unit);
31514
31515 match target {
31516 DialectType::DuckDB => {
31517 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
31518 let cast_date = Self::ensure_cast_date(date);
31519 let quoted_val = Self::quote_interval_val(&val);
31520 let interval =
31521 Expression::Interval(Box::new(crate::expressions::Interval {
31522 this: Some(quoted_val),
31523 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31524 unit,
31525 use_plural: false,
31526 }),
31527 }));
31528 Ok(Expression::Add(Box::new(
31529 crate::expressions::BinaryOp::new(cast_date, interval),
31530 )))
31531 }
31532 DialectType::PostgreSQL => {
31533 // PostgreSQL: date + INTERVAL 'val UNIT'
31534 let interval =
31535 Expression::Interval(Box::new(crate::expressions::Interval {
31536 this: Some(Expression::Literal(Box::new(Literal::String(
31537 format!("{} {}", Self::expr_to_string(&val), unit_str),
31538 )))),
31539 unit: None,
31540 }));
31541 Ok(Expression::Add(Box::new(
31542 crate::expressions::BinaryOp::new(date, interval),
31543 )))
31544 }
31545 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31546 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
31547 let val_str = Self::expr_to_string(&val);
31548 Ok(Expression::Function(Box::new(Function::new(
31549 "DATE_ADD".to_string(),
31550 vec![
31551 Expression::Literal(Box::new(Literal::String(
31552 unit_str.to_string(),
31553 ))),
31554 Expression::Cast(Box::new(Cast {
31555 this: Expression::Literal(Box::new(Literal::String(val_str))),
31556 to: DataType::BigInt { length: None },
31557 trailing_comments: vec![],
31558 double_colon_syntax: false,
31559 format: None,
31560 default: None,
31561 inferred_type: None,
31562 })),
31563 date,
31564 ],
31565 ))))
31566 }
31567 DialectType::Spark | DialectType::Hive => {
31568 // Spark/Hive: DATE_ADD(date, val) for DAY
31569 match unit_str {
31570 "DAY" => Ok(Expression::Function(Box::new(Function::new(
31571 "DATE_ADD".to_string(),
31572 vec![date, val],
31573 )))),
31574 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
31575 "ADD_MONTHS".to_string(),
31576 vec![date, val],
31577 )))),
31578 _ => {
31579 let iu = parse_interval_unit(&unit_str);
31580 let interval =
31581 Expression::Interval(Box::new(crate::expressions::Interval {
31582 this: Some(val),
31583 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31584 unit: iu,
31585 use_plural: false,
31586 }),
31587 }));
31588 Ok(Expression::Function(Box::new(Function::new(
31589 "DATE_ADD".to_string(),
31590 vec![date, interval],
31591 ))))
31592 }
31593 }
31594 }
31595 DialectType::Snowflake => {
31596 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
31597 let cast_date = Self::ensure_cast_date(date);
31598 let val_str = Self::expr_to_string(&val);
31599 Ok(Expression::Function(Box::new(Function::new(
31600 "DATEADD".to_string(),
31601 vec![
31602 Expression::Identifier(Identifier::new(unit_str)),
31603 Expression::Literal(Box::new(Literal::String(val_str))),
31604 cast_date,
31605 ],
31606 ))))
31607 }
31608 DialectType::TSQL | DialectType::Fabric => {
31609 let cast_date = Self::ensure_cast_datetime2(date);
31610 Ok(Expression::Function(Box::new(Function::new(
31611 "DATEADD".to_string(),
31612 vec![
31613 Expression::Identifier(Identifier::new(unit_str)),
31614 val,
31615 cast_date,
31616 ],
31617 ))))
31618 }
31619 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
31620 "DATEADD".to_string(),
31621 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
31622 )))),
31623 DialectType::MySQL => {
31624 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
31625 let quoted_val = Self::quote_interval_val(&val);
31626 let iu = parse_interval_unit(&unit_str);
31627 let interval =
31628 Expression::Interval(Box::new(crate::expressions::Interval {
31629 this: Some(quoted_val),
31630 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31631 unit: iu,
31632 use_plural: false,
31633 }),
31634 }));
31635 Ok(Expression::Function(Box::new(Function::new(
31636 "DATE_ADD".to_string(),
31637 vec![date, interval],
31638 ))))
31639 }
31640 DialectType::BigQuery => {
31641 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
31642 let quoted_val = Self::quote_interval_val(&val);
31643 let iu = parse_interval_unit(&unit_str);
31644 let interval =
31645 Expression::Interval(Box::new(crate::expressions::Interval {
31646 this: Some(quoted_val),
31647 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31648 unit: iu,
31649 use_plural: false,
31650 }),
31651 }));
31652 Ok(Expression::Function(Box::new(Function::new(
31653 "DATE_ADD".to_string(),
31654 vec![date, interval],
31655 ))))
31656 }
31657 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
31658 "DATEADD".to_string(),
31659 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
31660 )))),
31661 _ => {
31662 // Default: keep as DATE_ADD with decomposed interval
31663 Ok(Expression::DateAdd(Box::new(
31664 crate::expressions::DateAddFunc {
31665 this: date,
31666 interval: val,
31667 unit,
31668 },
31669 )))
31670 }
31671 }
31672 }
31673
31674 // ADD_MONTHS(date, val) -> target-specific form
31675 "ADD_MONTHS" if args.len() == 2 => {
31676 let date = args.remove(0);
31677 let val = args.remove(0);
31678
31679 if matches!(target, DialectType::TSQL) {
31680 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
31681 let cast_date = Self::ensure_cast_datetime2(date);
31682 return Ok(Expression::Function(Box::new(Function::new(
31683 "DATEADD".to_string(),
31684 vec![
31685 Expression::Identifier(Identifier::new("MONTH")),
31686 val,
31687 cast_date,
31688 ],
31689 ))));
31690 }
31691
31692 if matches!(target, DialectType::DuckDB) {
31693 // DuckDB: date + INTERVAL val MONTH
31694 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
31695 this: Some(val),
31696 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31697 unit: crate::expressions::IntervalUnit::Month,
31698 use_plural: false,
31699 }),
31700 }));
31701 return Ok(Expression::Add(Box::new(
31702 crate::expressions::BinaryOp::new(date, interval),
31703 )));
31704 }
31705
31706 if matches!(target, DialectType::Snowflake) {
31707 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
31708 if matches!(source, DialectType::Snowflake) {
31709 return Ok(Expression::Function(Box::new(Function::new(
31710 "ADD_MONTHS".to_string(),
31711 vec![date, val],
31712 ))));
31713 }
31714 return Ok(Expression::Function(Box::new(Function::new(
31715 "DATEADD".to_string(),
31716 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
31717 ))));
31718 }
31719
31720 if matches!(target, DialectType::Spark | DialectType::Databricks) {
31721 // Spark: ADD_MONTHS(date, val) - keep as is
31722 return Ok(Expression::Function(Box::new(Function::new(
31723 "ADD_MONTHS".to_string(),
31724 vec![date, val],
31725 ))));
31726 }
31727
31728 if matches!(target, DialectType::Hive) {
31729 return Ok(Expression::Function(Box::new(Function::new(
31730 "ADD_MONTHS".to_string(),
31731 vec![date, val],
31732 ))));
31733 }
31734
31735 if matches!(
31736 target,
31737 DialectType::Presto | DialectType::Trino | DialectType::Athena
31738 ) {
31739 // Presto: DATE_ADD('MONTH', val, date)
31740 return Ok(Expression::Function(Box::new(Function::new(
31741 "DATE_ADD".to_string(),
31742 vec![
31743 Expression::Literal(Box::new(Literal::String("MONTH".to_string()))),
31744 val,
31745 date,
31746 ],
31747 ))));
31748 }
31749
31750 // Default: keep ADD_MONTHS
31751 Ok(Expression::Function(Box::new(Function::new(
31752 "ADD_MONTHS".to_string(),
31753 vec![date, val],
31754 ))))
31755 }
31756
31757 // SAFE_DIVIDE(x, y) -> target-specific form directly
31758 "SAFE_DIVIDE" if args.len() == 2 => {
31759 let x = args.remove(0);
31760 let y = args.remove(0);
31761 // Wrap x and y in parens if they're complex expressions
31762 let y_ref = match &y {
31763 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
31764 y.clone()
31765 }
31766 _ => Expression::Paren(Box::new(Paren {
31767 this: y.clone(),
31768 trailing_comments: vec![],
31769 })),
31770 };
31771 let x_ref = match &x {
31772 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
31773 x.clone()
31774 }
31775 _ => Expression::Paren(Box::new(Paren {
31776 this: x.clone(),
31777 trailing_comments: vec![],
31778 })),
31779 };
31780 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
31781 y_ref.clone(),
31782 Expression::number(0),
31783 )));
31784 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
31785 x_ref.clone(),
31786 y_ref.clone(),
31787 )));
31788
31789 match target {
31790 DialectType::DuckDB | DialectType::PostgreSQL => {
31791 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
31792 let result_div = if matches!(target, DialectType::PostgreSQL) {
31793 let cast_x = Expression::Cast(Box::new(Cast {
31794 this: x_ref,
31795 to: DataType::Custom {
31796 name: "DOUBLE PRECISION".to_string(),
31797 },
31798 trailing_comments: vec![],
31799 double_colon_syntax: false,
31800 format: None,
31801 default: None,
31802 inferred_type: None,
31803 }));
31804 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
31805 cast_x, y_ref,
31806 )))
31807 } else {
31808 div_expr
31809 };
31810 Ok(Expression::Case(Box::new(crate::expressions::Case {
31811 operand: None,
31812 whens: vec![(condition, result_div)],
31813 else_: Some(Expression::Null(crate::expressions::Null)),
31814 comments: Vec::new(),
31815 inferred_type: None,
31816 })))
31817 }
31818 DialectType::Snowflake => {
31819 // IFF(y <> 0, x / y, NULL)
31820 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
31821 condition,
31822 true_value: div_expr,
31823 false_value: Some(Expression::Null(crate::expressions::Null)),
31824 original_name: Some("IFF".to_string()),
31825 inferred_type: None,
31826 })))
31827 }
31828 DialectType::Presto | DialectType::Trino => {
31829 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
31830 let cast_x = Expression::Cast(Box::new(Cast {
31831 this: x_ref,
31832 to: DataType::Double {
31833 precision: None,
31834 scale: None,
31835 },
31836 trailing_comments: vec![],
31837 double_colon_syntax: false,
31838 format: None,
31839 default: None,
31840 inferred_type: None,
31841 }));
31842 let cast_div = Expression::Div(Box::new(
31843 crate::expressions::BinaryOp::new(cast_x, y_ref),
31844 ));
31845 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
31846 condition,
31847 true_value: cast_div,
31848 false_value: Some(Expression::Null(crate::expressions::Null)),
31849 original_name: None,
31850 inferred_type: None,
31851 })))
31852 }
31853 _ => {
31854 // IF(y <> 0, x / y, NULL)
31855 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
31856 condition,
31857 true_value: div_expr,
31858 false_value: Some(Expression::Null(crate::expressions::Null)),
31859 original_name: None,
31860 inferred_type: None,
31861 })))
31862 }
31863 }
31864 }
31865
31866 // GENERATE_UUID() -> UUID() with CAST to string
31867 "GENERATE_UUID" => {
31868 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
31869 this: None,
31870 name: None,
31871 is_string: None,
31872 }));
31873 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
31874 let cast_type = match target {
31875 DialectType::DuckDB => Some(DataType::Text),
31876 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
31877 length: None,
31878 parenthesized_length: false,
31879 }),
31880 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
31881 Some(DataType::String { length: None })
31882 }
31883 _ => None,
31884 };
31885 if let Some(dt) = cast_type {
31886 Ok(Expression::Cast(Box::new(Cast {
31887 this: uuid_expr,
31888 to: dt,
31889 trailing_comments: vec![],
31890 double_colon_syntax: false,
31891 format: None,
31892 default: None,
31893 inferred_type: None,
31894 })))
31895 } else {
31896 Ok(uuid_expr)
31897 }
31898 }
31899
31900 // COUNTIF(x) -> CountIf expression
31901 "COUNTIF" if args.len() == 1 => {
31902 let arg = args.remove(0);
31903 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
31904 this: arg,
31905 distinct: false,
31906 filter: None,
31907 order_by: vec![],
31908 name: None,
31909 ignore_nulls: None,
31910 having_max: None,
31911 limit: None,
31912 inferred_type: None,
31913 })))
31914 }
31915
31916 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
31917 "EDIT_DISTANCE" => {
31918 // Strip named arguments (max_distance => N) and pass as positional
31919 let mut positional_args: Vec<Expression> = vec![];
31920 for arg in args {
31921 match arg {
31922 Expression::NamedArgument(na) => {
31923 positional_args.push(na.value);
31924 }
31925 other => positional_args.push(other),
31926 }
31927 }
31928 if positional_args.len() >= 2 {
31929 let col1 = positional_args.remove(0);
31930 let col2 = positional_args.remove(0);
31931 let levenshtein = crate::expressions::BinaryFunc {
31932 this: col1,
31933 expression: col2,
31934 original_name: None,
31935 inferred_type: None,
31936 };
31937 // Pass extra args through a function wrapper with all args
31938 if !positional_args.is_empty() {
31939 let max_dist = positional_args.remove(0);
31940 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
31941 if matches!(target, DialectType::DuckDB) {
31942 let lev = Expression::Function(Box::new(Function::new(
31943 "LEVENSHTEIN".to_string(),
31944 vec![levenshtein.this, levenshtein.expression],
31945 )));
31946 let lev_is_null =
31947 Expression::IsNull(Box::new(crate::expressions::IsNull {
31948 this: lev.clone(),
31949 not: false,
31950 postfix_form: false,
31951 }));
31952 let max_is_null =
31953 Expression::IsNull(Box::new(crate::expressions::IsNull {
31954 this: max_dist.clone(),
31955 not: false,
31956 postfix_form: false,
31957 }));
31958 let null_check =
31959 Expression::Or(Box::new(crate::expressions::BinaryOp {
31960 left: lev_is_null,
31961 right: max_is_null,
31962 left_comments: Vec::new(),
31963 operator_comments: Vec::new(),
31964 trailing_comments: Vec::new(),
31965 inferred_type: None,
31966 }));
31967 let least =
31968 Expression::Least(Box::new(crate::expressions::VarArgFunc {
31969 expressions: vec![lev, max_dist],
31970 original_name: None,
31971 inferred_type: None,
31972 }));
31973 return Ok(Expression::Case(Box::new(crate::expressions::Case {
31974 operand: None,
31975 whens: vec![(
31976 null_check,
31977 Expression::Null(crate::expressions::Null),
31978 )],
31979 else_: Some(least),
31980 comments: Vec::new(),
31981 inferred_type: None,
31982 })));
31983 }
31984 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
31985 all_args.extend(positional_args);
31986 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
31987 let func_name = if matches!(target, DialectType::PostgreSQL) {
31988 "LEVENSHTEIN_LESS_EQUAL"
31989 } else {
31990 "LEVENSHTEIN"
31991 };
31992 return Ok(Expression::Function(Box::new(Function::new(
31993 func_name.to_string(),
31994 all_args,
31995 ))));
31996 }
31997 Ok(Expression::Levenshtein(Box::new(levenshtein)))
31998 } else {
31999 Ok(Expression::Function(Box::new(Function::new(
32000 "EDIT_DISTANCE".to_string(),
32001 positional_args,
32002 ))))
32003 }
32004 }
32005
32006 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
32007 "TIMESTAMP_SECONDS" if args.len() == 1 => {
32008 let arg = args.remove(0);
32009 Ok(Expression::UnixToTime(Box::new(
32010 crate::expressions::UnixToTime {
32011 this: Box::new(arg),
32012 scale: Some(0),
32013 zone: None,
32014 hours: None,
32015 minutes: None,
32016 format: None,
32017 target_type: None,
32018 },
32019 )))
32020 }
32021
32022 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
32023 "TIMESTAMP_MILLIS" if args.len() == 1 => {
32024 let arg = args.remove(0);
32025 Ok(Expression::UnixToTime(Box::new(
32026 crate::expressions::UnixToTime {
32027 this: Box::new(arg),
32028 scale: Some(3),
32029 zone: None,
32030 hours: None,
32031 minutes: None,
32032 format: None,
32033 target_type: None,
32034 },
32035 )))
32036 }
32037
32038 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
32039 "TIMESTAMP_MICROS" if args.len() == 1 => {
32040 let arg = args.remove(0);
32041 Ok(Expression::UnixToTime(Box::new(
32042 crate::expressions::UnixToTime {
32043 this: Box::new(arg),
32044 scale: Some(6),
32045 zone: None,
32046 hours: None,
32047 minutes: None,
32048 format: None,
32049 target_type: None,
32050 },
32051 )))
32052 }
32053
32054 // DIV(x, y) -> IntDiv expression
32055 "DIV" if args.len() == 2 => {
32056 let x = args.remove(0);
32057 let y = args.remove(0);
32058 Ok(Expression::IntDiv(Box::new(
32059 crate::expressions::BinaryFunc {
32060 this: x,
32061 expression: y,
32062 original_name: None,
32063 inferred_type: None,
32064 },
32065 )))
32066 }
32067
32068 // TO_HEX(x) -> target-specific form
32069 "TO_HEX" if args.len() == 1 => {
32070 let arg = args.remove(0);
32071 // Check if inner function already returns hex string in certain targets
32072 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
32073 if matches!(target, DialectType::BigQuery) {
32074 // BQ->BQ: keep as TO_HEX
32075 Ok(Expression::Function(Box::new(Function::new(
32076 "TO_HEX".to_string(),
32077 vec![arg],
32078 ))))
32079 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
32080 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
32081 Ok(arg)
32082 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
32083 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
32084 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
32085 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
32086 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
32087 if let Expression::Function(ref inner_f) = arg {
32088 let inner_args = inner_f.args.clone();
32089 let binary_func = match inner_f.name.to_ascii_uppercase().as_str() {
32090 "SHA1" => Expression::Function(Box::new(Function::new(
32091 "SHA1_BINARY".to_string(),
32092 inner_args,
32093 ))),
32094 "MD5" => Expression::Function(Box::new(Function::new(
32095 "MD5_BINARY".to_string(),
32096 inner_args,
32097 ))),
32098 "SHA256" => {
32099 let mut a = inner_args;
32100 a.push(Expression::number(256));
32101 Expression::Function(Box::new(Function::new(
32102 "SHA2_BINARY".to_string(),
32103 a,
32104 )))
32105 }
32106 "SHA512" => {
32107 let mut a = inner_args;
32108 a.push(Expression::number(512));
32109 Expression::Function(Box::new(Function::new(
32110 "SHA2_BINARY".to_string(),
32111 a,
32112 )))
32113 }
32114 _ => arg.clone(),
32115 };
32116 Ok(Expression::Function(Box::new(Function::new(
32117 "TO_CHAR".to_string(),
32118 vec![binary_func],
32119 ))))
32120 } else {
32121 let inner = Expression::Function(Box::new(Function::new(
32122 "HEX".to_string(),
32123 vec![arg],
32124 )));
32125 Ok(Expression::Lower(Box::new(
32126 crate::expressions::UnaryFunc::new(inner),
32127 )))
32128 }
32129 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
32130 let inner = Expression::Function(Box::new(Function::new(
32131 "TO_HEX".to_string(),
32132 vec![arg],
32133 )));
32134 Ok(Expression::Lower(Box::new(
32135 crate::expressions::UnaryFunc::new(inner),
32136 )))
32137 } else {
32138 let inner =
32139 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
32140 Ok(Expression::Lower(Box::new(
32141 crate::expressions::UnaryFunc::new(inner),
32142 )))
32143 }
32144 }
32145
32146 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
32147 "LAST_DAY" if args.len() == 2 => {
32148 let date = args.remove(0);
32149 let _unit = args.remove(0); // Strip the unit (MONTH is default)
32150 Ok(Expression::Function(Box::new(Function::new(
32151 "LAST_DAY".to_string(),
32152 vec![date],
32153 ))))
32154 }
32155
32156 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
32157 "GENERATE_ARRAY" => {
32158 let start = args.get(0).cloned();
32159 let end = args.get(1).cloned();
32160 let step = args.get(2).cloned();
32161 Ok(Expression::GenerateSeries(Box::new(
32162 crate::expressions::GenerateSeries {
32163 start: start.map(Box::new),
32164 end: end.map(Box::new),
32165 step: step.map(Box::new),
32166 is_end_exclusive: None,
32167 },
32168 )))
32169 }
32170
32171 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
32172 "GENERATE_TIMESTAMP_ARRAY" => {
32173 let start = args.get(0).cloned();
32174 let end = args.get(1).cloned();
32175 let step = args.get(2).cloned();
32176
32177 if matches!(target, DialectType::DuckDB) {
32178 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
32179 // Only cast string literals - leave columns/expressions as-is
32180 let maybe_cast_ts = |expr: Expression| -> Expression {
32181 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
32182 {
32183 Expression::Cast(Box::new(Cast {
32184 this: expr,
32185 to: DataType::Timestamp {
32186 precision: None,
32187 timezone: false,
32188 },
32189 trailing_comments: vec![],
32190 double_colon_syntax: false,
32191 format: None,
32192 default: None,
32193 inferred_type: None,
32194 }))
32195 } else {
32196 expr
32197 }
32198 };
32199 let cast_start = start.map(maybe_cast_ts);
32200 let cast_end = end.map(maybe_cast_ts);
32201 Ok(Expression::GenerateSeries(Box::new(
32202 crate::expressions::GenerateSeries {
32203 start: cast_start.map(Box::new),
32204 end: cast_end.map(Box::new),
32205 step: step.map(Box::new),
32206 is_end_exclusive: None,
32207 },
32208 )))
32209 } else {
32210 Ok(Expression::GenerateSeries(Box::new(
32211 crate::expressions::GenerateSeries {
32212 start: start.map(Box::new),
32213 end: end.map(Box::new),
32214 step: step.map(Box::new),
32215 is_end_exclusive: None,
32216 },
32217 )))
32218 }
32219 }
32220
32221 // TO_JSON(x) -> target-specific (from Spark/Hive)
32222 "TO_JSON" => {
32223 match target {
32224 DialectType::Presto | DialectType::Trino => {
32225 // JSON_FORMAT(CAST(x AS JSON))
32226 let arg = args
32227 .into_iter()
32228 .next()
32229 .unwrap_or(Expression::Null(crate::expressions::Null));
32230 let cast_json = Expression::Cast(Box::new(Cast {
32231 this: arg,
32232 to: DataType::Custom {
32233 name: "JSON".to_string(),
32234 },
32235 trailing_comments: vec![],
32236 double_colon_syntax: false,
32237 format: None,
32238 default: None,
32239 inferred_type: None,
32240 }));
32241 Ok(Expression::Function(Box::new(Function::new(
32242 "JSON_FORMAT".to_string(),
32243 vec![cast_json],
32244 ))))
32245 }
32246 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
32247 "TO_JSON_STRING".to_string(),
32248 args,
32249 )))),
32250 DialectType::DuckDB => {
32251 // CAST(TO_JSON(x) AS TEXT)
32252 let arg = args
32253 .into_iter()
32254 .next()
32255 .unwrap_or(Expression::Null(crate::expressions::Null));
32256 let to_json = Expression::Function(Box::new(Function::new(
32257 "TO_JSON".to_string(),
32258 vec![arg],
32259 )));
32260 Ok(Expression::Cast(Box::new(Cast {
32261 this: to_json,
32262 to: DataType::Text,
32263 trailing_comments: vec![],
32264 double_colon_syntax: false,
32265 format: None,
32266 default: None,
32267 inferred_type: None,
32268 })))
32269 }
32270 _ => Ok(Expression::Function(Box::new(Function::new(
32271 "TO_JSON".to_string(),
32272 args,
32273 )))),
32274 }
32275 }
32276
32277 // TO_JSON_STRING(x) -> target-specific
32278 "TO_JSON_STRING" => {
32279 match target {
32280 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
32281 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
32282 ),
32283 DialectType::Presto | DialectType::Trino => {
32284 // JSON_FORMAT(CAST(x AS JSON))
32285 let arg = args
32286 .into_iter()
32287 .next()
32288 .unwrap_or(Expression::Null(crate::expressions::Null));
32289 let cast_json = Expression::Cast(Box::new(Cast {
32290 this: arg,
32291 to: DataType::Custom {
32292 name: "JSON".to_string(),
32293 },
32294 trailing_comments: vec![],
32295 double_colon_syntax: false,
32296 format: None,
32297 default: None,
32298 inferred_type: None,
32299 }));
32300 Ok(Expression::Function(Box::new(Function::new(
32301 "JSON_FORMAT".to_string(),
32302 vec![cast_json],
32303 ))))
32304 }
32305 DialectType::DuckDB => {
32306 // CAST(TO_JSON(x) AS TEXT)
32307 let arg = args
32308 .into_iter()
32309 .next()
32310 .unwrap_or(Expression::Null(crate::expressions::Null));
32311 let to_json = Expression::Function(Box::new(Function::new(
32312 "TO_JSON".to_string(),
32313 vec![arg],
32314 )));
32315 Ok(Expression::Cast(Box::new(Cast {
32316 this: to_json,
32317 to: DataType::Text,
32318 trailing_comments: vec![],
32319 double_colon_syntax: false,
32320 format: None,
32321 default: None,
32322 inferred_type: None,
32323 })))
32324 }
32325 DialectType::Snowflake => {
32326 // TO_JSON(x)
32327 Ok(Expression::Function(Box::new(Function::new(
32328 "TO_JSON".to_string(),
32329 args,
32330 ))))
32331 }
32332 _ => Ok(Expression::Function(Box::new(Function::new(
32333 "TO_JSON_STRING".to_string(),
32334 args,
32335 )))),
32336 }
32337 }
32338
32339 // SAFE_ADD(x, y) -> SafeAdd expression
32340 "SAFE_ADD" if args.len() == 2 => {
32341 let x = args.remove(0);
32342 let y = args.remove(0);
32343 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
32344 this: Box::new(x),
32345 expression: Box::new(y),
32346 })))
32347 }
32348
32349 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
32350 "SAFE_SUBTRACT" if args.len() == 2 => {
32351 let x = args.remove(0);
32352 let y = args.remove(0);
32353 Ok(Expression::SafeSubtract(Box::new(
32354 crate::expressions::SafeSubtract {
32355 this: Box::new(x),
32356 expression: Box::new(y),
32357 },
32358 )))
32359 }
32360
32361 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
32362 "SAFE_MULTIPLY" if args.len() == 2 => {
32363 let x = args.remove(0);
32364 let y = args.remove(0);
32365 Ok(Expression::SafeMultiply(Box::new(
32366 crate::expressions::SafeMultiply {
32367 this: Box::new(x),
32368 expression: Box::new(y),
32369 },
32370 )))
32371 }
32372
32373 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
32374 "REGEXP_CONTAINS" if args.len() == 2 => {
32375 let str_expr = args.remove(0);
32376 let pattern = args.remove(0);
32377 Ok(Expression::RegexpLike(Box::new(
32378 crate::expressions::RegexpFunc {
32379 this: str_expr,
32380 pattern,
32381 flags: None,
32382 },
32383 )))
32384 }
32385
32386 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
32387 "CONTAINS_SUBSTR" if args.len() == 2 => {
32388 let a = args.remove(0);
32389 let b = args.remove(0);
32390 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
32391 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
32392 Ok(Expression::Function(Box::new(Function::new(
32393 "CONTAINS".to_string(),
32394 vec![lower_a, lower_b],
32395 ))))
32396 }
32397
32398 // INT64(x) -> CAST(x AS BIGINT)
32399 "INT64" if args.len() == 1 => {
32400 let arg = args.remove(0);
32401 Ok(Expression::Cast(Box::new(Cast {
32402 this: arg,
32403 to: DataType::BigInt { length: None },
32404 trailing_comments: vec![],
32405 double_colon_syntax: false,
32406 format: None,
32407 default: None,
32408 inferred_type: None,
32409 })))
32410 }
32411
32412 // INSTR(str, substr) -> target-specific
32413 "INSTR" if args.len() >= 2 => {
32414 let str_expr = args.remove(0);
32415 let substr = args.remove(0);
32416 if matches!(target, DialectType::Snowflake) {
32417 // CHARINDEX(substr, str)
32418 Ok(Expression::Function(Box::new(Function::new(
32419 "CHARINDEX".to_string(),
32420 vec![substr, str_expr],
32421 ))))
32422 } else if matches!(target, DialectType::BigQuery) {
32423 // Keep as INSTR
32424 Ok(Expression::Function(Box::new(Function::new(
32425 "INSTR".to_string(),
32426 vec![str_expr, substr],
32427 ))))
32428 } else {
32429 // Default: keep as INSTR
32430 Ok(Expression::Function(Box::new(Function::new(
32431 "INSTR".to_string(),
32432 vec![str_expr, substr],
32433 ))))
32434 }
32435 }
32436
32437 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
32438 "DATE_TRUNC" if args.len() == 2 => {
32439 let expr = args.remove(0);
32440 let unit_expr = args.remove(0);
32441 let unit_str = get_unit_str(&unit_expr);
32442
32443 match target {
32444 DialectType::DuckDB
32445 | DialectType::Snowflake
32446 | DialectType::PostgreSQL
32447 | DialectType::Presto
32448 | DialectType::Trino
32449 | DialectType::Databricks
32450 | DialectType::Spark
32451 | DialectType::Redshift
32452 | DialectType::ClickHouse
32453 | DialectType::TSQL => {
32454 // Standard: DATE_TRUNC('UNIT', expr)
32455 Ok(Expression::Function(Box::new(Function::new(
32456 "DATE_TRUNC".to_string(),
32457 vec![
32458 Expression::Literal(Box::new(Literal::String(unit_str))),
32459 expr,
32460 ],
32461 ))))
32462 }
32463 _ => {
32464 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
32465 Ok(Expression::Function(Box::new(Function::new(
32466 "DATE_TRUNC".to_string(),
32467 vec![expr, unit_expr],
32468 ))))
32469 }
32470 }
32471 }
32472
32473 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
32474 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
32475 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
32476 let ts = args.remove(0);
32477 let unit_expr = args.remove(0);
32478 let tz = if !args.is_empty() {
32479 Some(args.remove(0))
32480 } else {
32481 None
32482 };
32483 let unit_str = get_unit_str(&unit_expr);
32484
32485 match target {
32486 DialectType::DuckDB => {
32487 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
32488 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
32489 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
32490 let is_coarse = matches!(
32491 unit_str.as_str(),
32492 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
32493 );
32494 // For DATETIME_TRUNC, cast string args to TIMESTAMP
32495 let cast_ts = if name == "DATETIME_TRUNC" {
32496 match ts {
32497 Expression::Literal(ref lit)
32498 if matches!(lit.as_ref(), Literal::String(ref _s)) =>
32499 {
32500 Expression::Cast(Box::new(Cast {
32501 this: ts,
32502 to: DataType::Timestamp {
32503 precision: None,
32504 timezone: false,
32505 },
32506 trailing_comments: vec![],
32507 double_colon_syntax: false,
32508 format: None,
32509 default: None,
32510 inferred_type: None,
32511 }))
32512 }
32513 _ => Self::maybe_cast_ts_to_tz(ts, &name),
32514 }
32515 } else {
32516 Self::maybe_cast_ts_to_tz(ts, &name)
32517 };
32518
32519 if let Some(tz_arg) = tz {
32520 if is_coarse {
32521 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
32522 let at_tz = Expression::AtTimeZone(Box::new(
32523 crate::expressions::AtTimeZone {
32524 this: cast_ts,
32525 zone: tz_arg.clone(),
32526 },
32527 ));
32528 let date_trunc = Expression::Function(Box::new(Function::new(
32529 "DATE_TRUNC".to_string(),
32530 vec![
32531 Expression::Literal(Box::new(Literal::String(unit_str))),
32532 at_tz,
32533 ],
32534 )));
32535 Ok(Expression::AtTimeZone(Box::new(
32536 crate::expressions::AtTimeZone {
32537 this: date_trunc,
32538 zone: tz_arg,
32539 },
32540 )))
32541 } else {
32542 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
32543 Ok(Expression::Function(Box::new(Function::new(
32544 "DATE_TRUNC".to_string(),
32545 vec![
32546 Expression::Literal(Box::new(Literal::String(unit_str))),
32547 cast_ts,
32548 ],
32549 ))))
32550 }
32551 } else {
32552 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
32553 Ok(Expression::Function(Box::new(Function::new(
32554 "DATE_TRUNC".to_string(),
32555 vec![
32556 Expression::Literal(Box::new(Literal::String(unit_str))),
32557 cast_ts,
32558 ],
32559 ))))
32560 }
32561 }
32562 DialectType::Databricks | DialectType::Spark => {
32563 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
32564 Ok(Expression::Function(Box::new(Function::new(
32565 "DATE_TRUNC".to_string(),
32566 vec![Expression::Literal(Box::new(Literal::String(unit_str))), ts],
32567 ))))
32568 }
32569 _ => {
32570 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
32571 let unit = Expression::Literal(Box::new(Literal::String(unit_str)));
32572 let mut date_trunc_args = vec![unit, ts];
32573 if let Some(tz_arg) = tz {
32574 date_trunc_args.push(tz_arg);
32575 }
32576 Ok(Expression::Function(Box::new(Function::new(
32577 "TIMESTAMP_TRUNC".to_string(),
32578 date_trunc_args,
32579 ))))
32580 }
32581 }
32582 }
32583
32584 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
32585 "TIME" => {
32586 if args.len() == 3 {
32587 // TIME(h, m, s) constructor
32588 match target {
32589 DialectType::TSQL => {
32590 // TIMEFROMPARTS(h, m, s, 0, 0)
32591 args.push(Expression::number(0));
32592 args.push(Expression::number(0));
32593 Ok(Expression::Function(Box::new(Function::new(
32594 "TIMEFROMPARTS".to_string(),
32595 args,
32596 ))))
32597 }
32598 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
32599 "MAKETIME".to_string(),
32600 args,
32601 )))),
32602 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
32603 Function::new("MAKE_TIME".to_string(), args),
32604 ))),
32605 _ => Ok(Expression::Function(Box::new(Function::new(
32606 "TIME".to_string(),
32607 args,
32608 )))),
32609 }
32610 } else if args.len() == 1 {
32611 let arg = args.remove(0);
32612 if matches!(target, DialectType::Spark) {
32613 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
32614 Ok(Expression::Cast(Box::new(Cast {
32615 this: arg,
32616 to: DataType::Timestamp {
32617 timezone: false,
32618 precision: None,
32619 },
32620 trailing_comments: vec![],
32621 double_colon_syntax: false,
32622 format: None,
32623 default: None,
32624 inferred_type: None,
32625 })))
32626 } else {
32627 // Most targets: CAST(x AS TIME)
32628 Ok(Expression::Cast(Box::new(Cast {
32629 this: arg,
32630 to: DataType::Time {
32631 precision: None,
32632 timezone: false,
32633 },
32634 trailing_comments: vec![],
32635 double_colon_syntax: false,
32636 format: None,
32637 default: None,
32638 inferred_type: None,
32639 })))
32640 }
32641 } else if args.len() == 2 {
32642 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
32643 let expr = args.remove(0);
32644 let tz = args.remove(0);
32645 let cast_tstz = Expression::Cast(Box::new(Cast {
32646 this: expr,
32647 to: DataType::Timestamp {
32648 timezone: true,
32649 precision: None,
32650 },
32651 trailing_comments: vec![],
32652 double_colon_syntax: false,
32653 format: None,
32654 default: None,
32655 inferred_type: None,
32656 }));
32657 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
32658 this: cast_tstz,
32659 zone: tz,
32660 }));
32661 Ok(Expression::Cast(Box::new(Cast {
32662 this: at_tz,
32663 to: DataType::Time {
32664 precision: None,
32665 timezone: false,
32666 },
32667 trailing_comments: vec![],
32668 double_colon_syntax: false,
32669 format: None,
32670 default: None,
32671 inferred_type: None,
32672 })))
32673 } else {
32674 Ok(Expression::Function(Box::new(Function::new(
32675 "TIME".to_string(),
32676 args,
32677 ))))
32678 }
32679 }
32680
32681 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
32682 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
32683 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
32684 // DATETIME(y, m, d, h, min, s) -> target-specific
32685 "DATETIME" => {
32686 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
32687 if matches!(target, DialectType::BigQuery) {
32688 if args.len() == 2 {
32689 let has_time_literal = matches!(&args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
32690 if has_time_literal {
32691 let first = args.remove(0);
32692 let second = args.remove(0);
32693 let time_as_cast = match second {
32694 Expression::Literal(lit)
32695 if matches!(lit.as_ref(), Literal::Time(_)) =>
32696 {
32697 let Literal::Time(s) = lit.as_ref() else {
32698 unreachable!()
32699 };
32700 Expression::Cast(Box::new(Cast {
32701 this: Expression::Literal(Box::new(Literal::String(
32702 s.clone(),
32703 ))),
32704 to: DataType::Time {
32705 precision: None,
32706 timezone: false,
32707 },
32708 trailing_comments: vec![],
32709 double_colon_syntax: false,
32710 format: None,
32711 default: None,
32712 inferred_type: None,
32713 }))
32714 }
32715 other => other,
32716 };
32717 return Ok(Expression::Function(Box::new(Function::new(
32718 "DATETIME".to_string(),
32719 vec![first, time_as_cast],
32720 ))));
32721 }
32722 }
32723 return Ok(Expression::Function(Box::new(Function::new(
32724 "DATETIME".to_string(),
32725 args,
32726 ))));
32727 }
32728
32729 if args.len() == 1 {
32730 let arg = args.remove(0);
32731 Ok(Expression::Cast(Box::new(Cast {
32732 this: arg,
32733 to: DataType::Timestamp {
32734 timezone: false,
32735 precision: None,
32736 },
32737 trailing_comments: vec![],
32738 double_colon_syntax: false,
32739 format: None,
32740 default: None,
32741 inferred_type: None,
32742 })))
32743 } else if args.len() == 2 {
32744 let first = args.remove(0);
32745 let second = args.remove(0);
32746 // Check if second arg is a TIME literal
32747 let is_time_literal = matches!(&second, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
32748 if is_time_literal {
32749 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
32750 let cast_date = Expression::Cast(Box::new(Cast {
32751 this: first,
32752 to: DataType::Date,
32753 trailing_comments: vec![],
32754 double_colon_syntax: false,
32755 format: None,
32756 default: None,
32757 inferred_type: None,
32758 }));
32759 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
32760 let time_as_string = match second {
32761 Expression::Literal(lit)
32762 if matches!(lit.as_ref(), Literal::Time(_)) =>
32763 {
32764 let Literal::Time(s) = lit.as_ref() else {
32765 unreachable!()
32766 };
32767 Expression::Literal(Box::new(Literal::String(s.clone())))
32768 }
32769 other => other,
32770 };
32771 let cast_time = Expression::Cast(Box::new(Cast {
32772 this: time_as_string,
32773 to: DataType::Time {
32774 precision: None,
32775 timezone: false,
32776 },
32777 trailing_comments: vec![],
32778 double_colon_syntax: false,
32779 format: None,
32780 default: None,
32781 inferred_type: None,
32782 }));
32783 let add_expr =
32784 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
32785 Ok(Expression::Cast(Box::new(Cast {
32786 this: add_expr,
32787 to: DataType::Timestamp {
32788 timezone: false,
32789 precision: None,
32790 },
32791 trailing_comments: vec![],
32792 double_colon_syntax: false,
32793 format: None,
32794 default: None,
32795 inferred_type: None,
32796 })))
32797 } else {
32798 // DATETIME('string', 'timezone')
32799 let cast_tstz = Expression::Cast(Box::new(Cast {
32800 this: first,
32801 to: DataType::Timestamp {
32802 timezone: true,
32803 precision: None,
32804 },
32805 trailing_comments: vec![],
32806 double_colon_syntax: false,
32807 format: None,
32808 default: None,
32809 inferred_type: None,
32810 }));
32811 let at_tz =
32812 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
32813 this: cast_tstz,
32814 zone: second,
32815 }));
32816 Ok(Expression::Cast(Box::new(Cast {
32817 this: at_tz,
32818 to: DataType::Timestamp {
32819 timezone: false,
32820 precision: None,
32821 },
32822 trailing_comments: vec![],
32823 double_colon_syntax: false,
32824 format: None,
32825 default: None,
32826 inferred_type: None,
32827 })))
32828 }
32829 } else if args.len() >= 3 {
32830 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
32831 // For other targets, use MAKE_TIMESTAMP or similar
32832 if matches!(target, DialectType::Snowflake) {
32833 Ok(Expression::Function(Box::new(Function::new(
32834 "TIMESTAMP_FROM_PARTS".to_string(),
32835 args,
32836 ))))
32837 } else {
32838 Ok(Expression::Function(Box::new(Function::new(
32839 "DATETIME".to_string(),
32840 args,
32841 ))))
32842 }
32843 } else {
32844 Ok(Expression::Function(Box::new(Function::new(
32845 "DATETIME".to_string(),
32846 args,
32847 ))))
32848 }
32849 }
32850
32851 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
32852 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
32853 "TIMESTAMP" => {
32854 if args.len() == 1 {
32855 let arg = args.remove(0);
32856 Ok(Expression::Cast(Box::new(Cast {
32857 this: arg,
32858 to: DataType::Timestamp {
32859 timezone: true,
32860 precision: None,
32861 },
32862 trailing_comments: vec![],
32863 double_colon_syntax: false,
32864 format: None,
32865 default: None,
32866 inferred_type: None,
32867 })))
32868 } else if args.len() == 2 {
32869 let arg = args.remove(0);
32870 let tz = args.remove(0);
32871 let cast_ts = Expression::Cast(Box::new(Cast {
32872 this: arg,
32873 to: DataType::Timestamp {
32874 timezone: false,
32875 precision: None,
32876 },
32877 trailing_comments: vec![],
32878 double_colon_syntax: false,
32879 format: None,
32880 default: None,
32881 inferred_type: None,
32882 }));
32883 if matches!(target, DialectType::Snowflake) {
32884 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
32885 Ok(Expression::Function(Box::new(Function::new(
32886 "CONVERT_TIMEZONE".to_string(),
32887 vec![tz, cast_ts],
32888 ))))
32889 } else {
32890 Ok(Expression::AtTimeZone(Box::new(
32891 crate::expressions::AtTimeZone {
32892 this: cast_ts,
32893 zone: tz,
32894 },
32895 )))
32896 }
32897 } else {
32898 Ok(Expression::Function(Box::new(Function::new(
32899 "TIMESTAMP".to_string(),
32900 args,
32901 ))))
32902 }
32903 }
32904
32905 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
32906 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
32907 "STRING" => {
32908 if args.len() == 1 {
32909 let arg = args.remove(0);
32910 let cast_type = match target {
32911 DialectType::DuckDB => DataType::Text,
32912 _ => DataType::VarChar {
32913 length: None,
32914 parenthesized_length: false,
32915 },
32916 };
32917 Ok(Expression::Cast(Box::new(Cast {
32918 this: arg,
32919 to: cast_type,
32920 trailing_comments: vec![],
32921 double_colon_syntax: false,
32922 format: None,
32923 default: None,
32924 inferred_type: None,
32925 })))
32926 } else if args.len() == 2 {
32927 let arg = args.remove(0);
32928 let tz = args.remove(0);
32929 let cast_type = match target {
32930 DialectType::DuckDB => DataType::Text,
32931 _ => DataType::VarChar {
32932 length: None,
32933 parenthesized_length: false,
32934 },
32935 };
32936 if matches!(target, DialectType::Snowflake) {
32937 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
32938 let convert_tz = Expression::Function(Box::new(Function::new(
32939 "CONVERT_TIMEZONE".to_string(),
32940 vec![
32941 Expression::Literal(Box::new(Literal::String("UTC".to_string()))),
32942 tz,
32943 arg,
32944 ],
32945 )));
32946 Ok(Expression::Cast(Box::new(Cast {
32947 this: convert_tz,
32948 to: cast_type,
32949 trailing_comments: vec![],
32950 double_colon_syntax: false,
32951 format: None,
32952 default: None,
32953 inferred_type: None,
32954 })))
32955 } else {
32956 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
32957 let cast_ts = Expression::Cast(Box::new(Cast {
32958 this: arg,
32959 to: DataType::Timestamp {
32960 timezone: false,
32961 precision: None,
32962 },
32963 trailing_comments: vec![],
32964 double_colon_syntax: false,
32965 format: None,
32966 default: None,
32967 inferred_type: None,
32968 }));
32969 let at_utc =
32970 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
32971 this: cast_ts,
32972 zone: Expression::Literal(Box::new(Literal::String(
32973 "UTC".to_string(),
32974 ))),
32975 }));
32976 let at_tz =
32977 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
32978 this: at_utc,
32979 zone: tz,
32980 }));
32981 Ok(Expression::Cast(Box::new(Cast {
32982 this: at_tz,
32983 to: cast_type,
32984 trailing_comments: vec![],
32985 double_colon_syntax: false,
32986 format: None,
32987 default: None,
32988 inferred_type: None,
32989 })))
32990 }
32991 } else {
32992 Ok(Expression::Function(Box::new(Function::new(
32993 "STRING".to_string(),
32994 args,
32995 ))))
32996 }
32997 }
32998
32999 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
33000 "UNIX_SECONDS" if args.len() == 1 => {
33001 let ts = args.remove(0);
33002 match target {
33003 DialectType::DuckDB => {
33004 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
33005 let cast_ts = Self::ensure_cast_timestamptz(ts);
33006 let epoch = Expression::Function(Box::new(Function::new(
33007 "EPOCH".to_string(),
33008 vec![cast_ts],
33009 )));
33010 Ok(Expression::Cast(Box::new(Cast {
33011 this: epoch,
33012 to: DataType::BigInt { length: None },
33013 trailing_comments: vec![],
33014 double_colon_syntax: false,
33015 format: None,
33016 default: None,
33017 inferred_type: None,
33018 })))
33019 }
33020 DialectType::Snowflake => {
33021 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
33022 let epoch = Expression::Cast(Box::new(Cast {
33023 this: Expression::Literal(Box::new(Literal::String(
33024 "1970-01-01 00:00:00+00".to_string(),
33025 ))),
33026 to: DataType::Timestamp {
33027 timezone: true,
33028 precision: None,
33029 },
33030 trailing_comments: vec![],
33031 double_colon_syntax: false,
33032 format: None,
33033 default: None,
33034 inferred_type: None,
33035 }));
33036 Ok(Expression::TimestampDiff(Box::new(
33037 crate::expressions::TimestampDiff {
33038 this: Box::new(epoch),
33039 expression: Box::new(ts),
33040 unit: Some("SECONDS".to_string()),
33041 },
33042 )))
33043 }
33044 _ => Ok(Expression::Function(Box::new(Function::new(
33045 "UNIX_SECONDS".to_string(),
33046 vec![ts],
33047 )))),
33048 }
33049 }
33050
33051 "UNIX_MILLIS" if args.len() == 1 => {
33052 let ts = args.remove(0);
33053 match target {
33054 DialectType::DuckDB => {
33055 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
33056 let cast_ts = Self::ensure_cast_timestamptz(ts);
33057 Ok(Expression::Function(Box::new(Function::new(
33058 "EPOCH_MS".to_string(),
33059 vec![cast_ts],
33060 ))))
33061 }
33062 _ => Ok(Expression::Function(Box::new(Function::new(
33063 "UNIX_MILLIS".to_string(),
33064 vec![ts],
33065 )))),
33066 }
33067 }
33068
33069 "UNIX_MICROS" if args.len() == 1 => {
33070 let ts = args.remove(0);
33071 match target {
33072 DialectType::DuckDB => {
33073 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
33074 let cast_ts = Self::ensure_cast_timestamptz(ts);
33075 Ok(Expression::Function(Box::new(Function::new(
33076 "EPOCH_US".to_string(),
33077 vec![cast_ts],
33078 ))))
33079 }
33080 _ => Ok(Expression::Function(Box::new(Function::new(
33081 "UNIX_MICROS".to_string(),
33082 vec![ts],
33083 )))),
33084 }
33085 }
33086
33087 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
33088 "ARRAY_CONCAT" | "LIST_CONCAT" => {
33089 match target {
33090 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
33091 // CONCAT(arr1, arr2, ...)
33092 Ok(Expression::Function(Box::new(Function::new(
33093 "CONCAT".to_string(),
33094 args,
33095 ))))
33096 }
33097 DialectType::Presto | DialectType::Trino => {
33098 // CONCAT(arr1, arr2, ...)
33099 Ok(Expression::Function(Box::new(Function::new(
33100 "CONCAT".to_string(),
33101 args,
33102 ))))
33103 }
33104 DialectType::Snowflake => {
33105 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
33106 if args.len() == 1 {
33107 // ARRAY_CAT requires 2 args, add empty array as []
33108 let empty_arr = Expression::ArrayFunc(Box::new(
33109 crate::expressions::ArrayConstructor {
33110 expressions: vec![],
33111 bracket_notation: true,
33112 use_list_keyword: false,
33113 },
33114 ));
33115 let mut new_args = args;
33116 new_args.push(empty_arr);
33117 Ok(Expression::Function(Box::new(Function::new(
33118 "ARRAY_CAT".to_string(),
33119 new_args,
33120 ))))
33121 } else if args.is_empty() {
33122 Ok(Expression::Function(Box::new(Function::new(
33123 "ARRAY_CAT".to_string(),
33124 args,
33125 ))))
33126 } else {
33127 let mut it = args.into_iter().rev();
33128 let mut result = it.next().unwrap();
33129 for arr in it {
33130 result = Expression::Function(Box::new(Function::new(
33131 "ARRAY_CAT".to_string(),
33132 vec![arr, result],
33133 )));
33134 }
33135 Ok(result)
33136 }
33137 }
33138 DialectType::PostgreSQL => {
33139 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
33140 if args.len() <= 1 {
33141 Ok(Expression::Function(Box::new(Function::new(
33142 "ARRAY_CAT".to_string(),
33143 args,
33144 ))))
33145 } else {
33146 let mut it = args.into_iter().rev();
33147 let mut result = it.next().unwrap();
33148 for arr in it {
33149 result = Expression::Function(Box::new(Function::new(
33150 "ARRAY_CAT".to_string(),
33151 vec![arr, result],
33152 )));
33153 }
33154 Ok(result)
33155 }
33156 }
33157 DialectType::Redshift => {
33158 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
33159 if args.len() <= 2 {
33160 Ok(Expression::Function(Box::new(Function::new(
33161 "ARRAY_CONCAT".to_string(),
33162 args,
33163 ))))
33164 } else {
33165 let mut it = args.into_iter().rev();
33166 let mut result = it.next().unwrap();
33167 for arr in it {
33168 result = Expression::Function(Box::new(Function::new(
33169 "ARRAY_CONCAT".to_string(),
33170 vec![arr, result],
33171 )));
33172 }
33173 Ok(result)
33174 }
33175 }
33176 DialectType::DuckDB => {
33177 // LIST_CONCAT supports multiple args natively in DuckDB
33178 Ok(Expression::Function(Box::new(Function::new(
33179 "LIST_CONCAT".to_string(),
33180 args,
33181 ))))
33182 }
33183 _ => Ok(Expression::Function(Box::new(Function::new(
33184 "ARRAY_CONCAT".to_string(),
33185 args,
33186 )))),
33187 }
33188 }
33189
33190 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
33191 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
33192 let arg = args.remove(0);
33193 match target {
33194 DialectType::Snowflake => {
33195 let array_agg =
33196 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
33197 this: arg,
33198 distinct: false,
33199 filter: None,
33200 order_by: vec![],
33201 name: None,
33202 ignore_nulls: None,
33203 having_max: None,
33204 limit: None,
33205 inferred_type: None,
33206 }));
33207 Ok(Expression::Function(Box::new(Function::new(
33208 "ARRAY_FLATTEN".to_string(),
33209 vec![array_agg],
33210 ))))
33211 }
33212 _ => Ok(Expression::Function(Box::new(Function::new(
33213 "ARRAY_CONCAT_AGG".to_string(),
33214 vec![arg],
33215 )))),
33216 }
33217 }
33218
33219 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
33220 "MD5" if args.len() == 1 => {
33221 let arg = args.remove(0);
33222 match target {
33223 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
33224 // UNHEX(MD5(x))
33225 let md5 = Expression::Function(Box::new(Function::new(
33226 "MD5".to_string(),
33227 vec![arg],
33228 )));
33229 Ok(Expression::Function(Box::new(Function::new(
33230 "UNHEX".to_string(),
33231 vec![md5],
33232 ))))
33233 }
33234 DialectType::Snowflake => {
33235 // MD5_BINARY(x)
33236 Ok(Expression::Function(Box::new(Function::new(
33237 "MD5_BINARY".to_string(),
33238 vec![arg],
33239 ))))
33240 }
33241 _ => Ok(Expression::Function(Box::new(Function::new(
33242 "MD5".to_string(),
33243 vec![arg],
33244 )))),
33245 }
33246 }
33247
33248 "SHA1" if args.len() == 1 => {
33249 let arg = args.remove(0);
33250 match target {
33251 DialectType::DuckDB => {
33252 // UNHEX(SHA1(x))
33253 let sha1 = Expression::Function(Box::new(Function::new(
33254 "SHA1".to_string(),
33255 vec![arg],
33256 )));
33257 Ok(Expression::Function(Box::new(Function::new(
33258 "UNHEX".to_string(),
33259 vec![sha1],
33260 ))))
33261 }
33262 _ => Ok(Expression::Function(Box::new(Function::new(
33263 "SHA1".to_string(),
33264 vec![arg],
33265 )))),
33266 }
33267 }
33268
33269 "SHA256" if args.len() == 1 => {
33270 let arg = args.remove(0);
33271 match target {
33272 DialectType::DuckDB => {
33273 // UNHEX(SHA256(x))
33274 let sha = Expression::Function(Box::new(Function::new(
33275 "SHA256".to_string(),
33276 vec![arg],
33277 )));
33278 Ok(Expression::Function(Box::new(Function::new(
33279 "UNHEX".to_string(),
33280 vec![sha],
33281 ))))
33282 }
33283 DialectType::Snowflake => {
33284 // SHA2_BINARY(x, 256)
33285 Ok(Expression::Function(Box::new(Function::new(
33286 "SHA2_BINARY".to_string(),
33287 vec![arg, Expression::number(256)],
33288 ))))
33289 }
33290 DialectType::Redshift | DialectType::Spark => {
33291 // SHA2(x, 256)
33292 Ok(Expression::Function(Box::new(Function::new(
33293 "SHA2".to_string(),
33294 vec![arg, Expression::number(256)],
33295 ))))
33296 }
33297 _ => Ok(Expression::Function(Box::new(Function::new(
33298 "SHA256".to_string(),
33299 vec![arg],
33300 )))),
33301 }
33302 }
33303
33304 "SHA512" if args.len() == 1 => {
33305 let arg = args.remove(0);
33306 match target {
33307 DialectType::Snowflake => {
33308 // SHA2_BINARY(x, 512)
33309 Ok(Expression::Function(Box::new(Function::new(
33310 "SHA2_BINARY".to_string(),
33311 vec![arg, Expression::number(512)],
33312 ))))
33313 }
33314 DialectType::Redshift | DialectType::Spark => {
33315 // SHA2(x, 512)
33316 Ok(Expression::Function(Box::new(Function::new(
33317 "SHA2".to_string(),
33318 vec![arg, Expression::number(512)],
33319 ))))
33320 }
33321 _ => Ok(Expression::Function(Box::new(Function::new(
33322 "SHA512".to_string(),
33323 vec![arg],
33324 )))),
33325 }
33326 }
33327
33328 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
33329 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
33330 let str_expr = args.remove(0);
33331 let pattern = args.remove(0);
33332
33333 // Check if pattern contains capturing groups (parentheses)
33334 let has_groups = match &pattern {
33335 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
33336 let Literal::String(s) = lit.as_ref() else {
33337 unreachable!()
33338 };
33339 s.contains('(') && s.contains(')')
33340 }
33341 _ => false,
33342 };
33343
33344 match target {
33345 DialectType::DuckDB => {
33346 let group = if has_groups {
33347 Expression::number(1)
33348 } else {
33349 Expression::number(0)
33350 };
33351 Ok(Expression::Function(Box::new(Function::new(
33352 "REGEXP_EXTRACT_ALL".to_string(),
33353 vec![str_expr, pattern, group],
33354 ))))
33355 }
33356 DialectType::Spark | DialectType::Databricks => {
33357 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
33358 if has_groups {
33359 Ok(Expression::Function(Box::new(Function::new(
33360 "REGEXP_EXTRACT_ALL".to_string(),
33361 vec![str_expr, pattern],
33362 ))))
33363 } else {
33364 Ok(Expression::Function(Box::new(Function::new(
33365 "REGEXP_EXTRACT_ALL".to_string(),
33366 vec![str_expr, pattern, Expression::number(0)],
33367 ))))
33368 }
33369 }
33370 DialectType::Presto | DialectType::Trino => {
33371 if has_groups {
33372 Ok(Expression::Function(Box::new(Function::new(
33373 "REGEXP_EXTRACT_ALL".to_string(),
33374 vec![str_expr, pattern, Expression::number(1)],
33375 ))))
33376 } else {
33377 Ok(Expression::Function(Box::new(Function::new(
33378 "REGEXP_EXTRACT_ALL".to_string(),
33379 vec![str_expr, pattern],
33380 ))))
33381 }
33382 }
33383 DialectType::Snowflake => {
33384 if has_groups {
33385 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
33386 Ok(Expression::Function(Box::new(Function::new(
33387 "REGEXP_EXTRACT_ALL".to_string(),
33388 vec![
33389 str_expr,
33390 pattern,
33391 Expression::number(1),
33392 Expression::number(1),
33393 Expression::Literal(Box::new(Literal::String("c".to_string()))),
33394 Expression::number(1),
33395 ],
33396 ))))
33397 } else {
33398 Ok(Expression::Function(Box::new(Function::new(
33399 "REGEXP_EXTRACT_ALL".to_string(),
33400 vec![str_expr, pattern],
33401 ))))
33402 }
33403 }
33404 _ => Ok(Expression::Function(Box::new(Function::new(
33405 "REGEXP_EXTRACT_ALL".to_string(),
33406 vec![str_expr, pattern],
33407 )))),
33408 }
33409 }
33410
33411 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
33412 "MOD" if args.len() == 2 => {
33413 match target {
33414 DialectType::PostgreSQL
33415 | DialectType::DuckDB
33416 | DialectType::Presto
33417 | DialectType::Trino
33418 | DialectType::Athena
33419 | DialectType::Snowflake => {
33420 let x = args.remove(0);
33421 let y = args.remove(0);
33422 // Wrap complex expressions in parens to preserve precedence
33423 let needs_paren = |e: &Expression| {
33424 matches!(
33425 e,
33426 Expression::Add(_)
33427 | Expression::Sub(_)
33428 | Expression::Mul(_)
33429 | Expression::Div(_)
33430 )
33431 };
33432 let x = if needs_paren(&x) {
33433 Expression::Paren(Box::new(crate::expressions::Paren {
33434 this: x,
33435 trailing_comments: vec![],
33436 }))
33437 } else {
33438 x
33439 };
33440 let y = if needs_paren(&y) {
33441 Expression::Paren(Box::new(crate::expressions::Paren {
33442 this: y,
33443 trailing_comments: vec![],
33444 }))
33445 } else {
33446 y
33447 };
33448 Ok(Expression::Mod(Box::new(
33449 crate::expressions::BinaryOp::new(x, y),
33450 )))
33451 }
33452 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
33453 // Hive/Spark: a % b
33454 let x = args.remove(0);
33455 let y = args.remove(0);
33456 let needs_paren = |e: &Expression| {
33457 matches!(
33458 e,
33459 Expression::Add(_)
33460 | Expression::Sub(_)
33461 | Expression::Mul(_)
33462 | Expression::Div(_)
33463 )
33464 };
33465 let x = if needs_paren(&x) {
33466 Expression::Paren(Box::new(crate::expressions::Paren {
33467 this: x,
33468 trailing_comments: vec![],
33469 }))
33470 } else {
33471 x
33472 };
33473 let y = if needs_paren(&y) {
33474 Expression::Paren(Box::new(crate::expressions::Paren {
33475 this: y,
33476 trailing_comments: vec![],
33477 }))
33478 } else {
33479 y
33480 };
33481 Ok(Expression::Mod(Box::new(
33482 crate::expressions::BinaryOp::new(x, y),
33483 )))
33484 }
33485 _ => Ok(Expression::Function(Box::new(Function::new(
33486 "MOD".to_string(),
33487 args,
33488 )))),
33489 }
33490 }
33491
33492 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
33493 "ARRAY_FILTER" if args.len() == 2 => {
33494 let name = match target {
33495 DialectType::DuckDB => "LIST_FILTER",
33496 DialectType::StarRocks => "ARRAY_FILTER",
33497 _ => "FILTER",
33498 };
33499 Ok(Expression::Function(Box::new(Function::new(
33500 name.to_string(),
33501 args,
33502 ))))
33503 }
33504 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
33505 "FILTER" if args.len() == 2 => {
33506 let name = match target {
33507 DialectType::DuckDB => "LIST_FILTER",
33508 DialectType::StarRocks => "ARRAY_FILTER",
33509 _ => "FILTER",
33510 };
33511 Ok(Expression::Function(Box::new(Function::new(
33512 name.to_string(),
33513 args,
33514 ))))
33515 }
33516 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
33517 "REDUCE" if args.len() >= 3 => {
33518 let name = match target {
33519 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
33520 _ => "REDUCE",
33521 };
33522 Ok(Expression::Function(Box::new(Function::new(
33523 name.to_string(),
33524 args,
33525 ))))
33526 }
33527 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
33528 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
33529 Function::new("ARRAY_REVERSE".to_string(), args),
33530 ))),
33531
33532 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
33533 "CONCAT" if args.len() > 2 => match target {
33534 DialectType::DuckDB => {
33535 let mut it = args.into_iter();
33536 let mut result = it.next().unwrap();
33537 for arg in it {
33538 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
33539 this: Box::new(result),
33540 expression: Box::new(arg),
33541 safe: None,
33542 }));
33543 }
33544 Ok(result)
33545 }
33546 _ => Ok(Expression::Function(Box::new(Function::new(
33547 "CONCAT".to_string(),
33548 args,
33549 )))),
33550 },
33551
33552 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
33553 "GENERATE_DATE_ARRAY" => {
33554 if matches!(target, DialectType::BigQuery) {
33555 // BQ->BQ: add default interval if not present
33556 if args.len() == 2 {
33557 let start = args.remove(0);
33558 let end = args.remove(0);
33559 let default_interval =
33560 Expression::Interval(Box::new(crate::expressions::Interval {
33561 this: Some(Expression::Literal(Box::new(Literal::String(
33562 "1".to_string(),
33563 )))),
33564 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33565 unit: crate::expressions::IntervalUnit::Day,
33566 use_plural: false,
33567 }),
33568 }));
33569 Ok(Expression::Function(Box::new(Function::new(
33570 "GENERATE_DATE_ARRAY".to_string(),
33571 vec![start, end, default_interval],
33572 ))))
33573 } else {
33574 Ok(Expression::Function(Box::new(Function::new(
33575 "GENERATE_DATE_ARRAY".to_string(),
33576 args,
33577 ))))
33578 }
33579 } else if matches!(target, DialectType::DuckDB) {
33580 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
33581 let start = args.get(0).cloned();
33582 let end = args.get(1).cloned();
33583 let step = args.get(2).cloned().or_else(|| {
33584 Some(Expression::Interval(Box::new(
33585 crate::expressions::Interval {
33586 this: Some(Expression::Literal(Box::new(Literal::String(
33587 "1".to_string(),
33588 )))),
33589 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33590 unit: crate::expressions::IntervalUnit::Day,
33591 use_plural: false,
33592 }),
33593 },
33594 )))
33595 });
33596
33597 // Wrap start/end in CAST(... AS DATE) only for string literals
33598 let maybe_cast_date = |expr: Expression| -> Expression {
33599 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
33600 {
33601 Expression::Cast(Box::new(Cast {
33602 this: expr,
33603 to: DataType::Date,
33604 trailing_comments: vec![],
33605 double_colon_syntax: false,
33606 format: None,
33607 default: None,
33608 inferred_type: None,
33609 }))
33610 } else {
33611 expr
33612 }
33613 };
33614 let cast_start = start.map(maybe_cast_date);
33615 let cast_end = end.map(maybe_cast_date);
33616
33617 let gen_series =
33618 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
33619 start: cast_start.map(Box::new),
33620 end: cast_end.map(Box::new),
33621 step: step.map(Box::new),
33622 is_end_exclusive: None,
33623 }));
33624
33625 // Wrap in CAST(... AS DATE[])
33626 Ok(Expression::Cast(Box::new(Cast {
33627 this: gen_series,
33628 to: DataType::Array {
33629 element_type: Box::new(DataType::Date),
33630 dimension: None,
33631 },
33632 trailing_comments: vec![],
33633 double_colon_syntax: false,
33634 format: None,
33635 default: None,
33636 inferred_type: None,
33637 })))
33638 } else if matches!(target, DialectType::Snowflake) {
33639 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
33640 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
33641 if args.len() == 2 {
33642 let start = args.remove(0);
33643 let end = args.remove(0);
33644 let default_interval =
33645 Expression::Interval(Box::new(crate::expressions::Interval {
33646 this: Some(Expression::Literal(Box::new(Literal::String(
33647 "1".to_string(),
33648 )))),
33649 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33650 unit: crate::expressions::IntervalUnit::Day,
33651 use_plural: false,
33652 }),
33653 }));
33654 Ok(Expression::Function(Box::new(Function::new(
33655 "GENERATE_DATE_ARRAY".to_string(),
33656 vec![start, end, default_interval],
33657 ))))
33658 } else {
33659 Ok(Expression::Function(Box::new(Function::new(
33660 "GENERATE_DATE_ARRAY".to_string(),
33661 args,
33662 ))))
33663 }
33664 } else {
33665 // Convert to GenerateSeries for other targets
33666 let start = args.get(0).cloned();
33667 let end = args.get(1).cloned();
33668 let step = args.get(2).cloned().or_else(|| {
33669 Some(Expression::Interval(Box::new(
33670 crate::expressions::Interval {
33671 this: Some(Expression::Literal(Box::new(Literal::String(
33672 "1".to_string(),
33673 )))),
33674 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33675 unit: crate::expressions::IntervalUnit::Day,
33676 use_plural: false,
33677 }),
33678 },
33679 )))
33680 });
33681 Ok(Expression::GenerateSeries(Box::new(
33682 crate::expressions::GenerateSeries {
33683 start: start.map(Box::new),
33684 end: end.map(Box::new),
33685 step: step.map(Box::new),
33686 is_end_exclusive: None,
33687 },
33688 )))
33689 }
33690 }
33691
33692 // PARSE_DATE(format, str) -> target-specific
33693 "PARSE_DATE" if args.len() == 2 => {
33694 let format = args.remove(0);
33695 let str_expr = args.remove(0);
33696 match target {
33697 DialectType::DuckDB => {
33698 // CAST(STRPTIME(str, duck_format) AS DATE)
33699 let duck_format = Self::bq_format_to_duckdb(&format);
33700 let strptime = Expression::Function(Box::new(Function::new(
33701 "STRPTIME".to_string(),
33702 vec![str_expr, duck_format],
33703 )));
33704 Ok(Expression::Cast(Box::new(Cast {
33705 this: strptime,
33706 to: DataType::Date,
33707 trailing_comments: vec![],
33708 double_colon_syntax: false,
33709 format: None,
33710 default: None,
33711 inferred_type: None,
33712 })))
33713 }
33714 DialectType::Snowflake => {
33715 // _POLYGLOT_DATE(str, snowflake_format)
33716 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
33717 let sf_format = Self::bq_format_to_snowflake(&format);
33718 Ok(Expression::Function(Box::new(Function::new(
33719 "_POLYGLOT_DATE".to_string(),
33720 vec![str_expr, sf_format],
33721 ))))
33722 }
33723 _ => Ok(Expression::Function(Box::new(Function::new(
33724 "PARSE_DATE".to_string(),
33725 vec![format, str_expr],
33726 )))),
33727 }
33728 }
33729
33730 // PARSE_TIMESTAMP(format, str) -> target-specific
33731 "PARSE_TIMESTAMP" if args.len() >= 2 => {
33732 let format = args.remove(0);
33733 let str_expr = args.remove(0);
33734 let tz = if !args.is_empty() {
33735 Some(args.remove(0))
33736 } else {
33737 None
33738 };
33739 match target {
33740 DialectType::DuckDB => {
33741 let duck_format = Self::bq_format_to_duckdb(&format);
33742 let strptime = Expression::Function(Box::new(Function::new(
33743 "STRPTIME".to_string(),
33744 vec![str_expr, duck_format],
33745 )));
33746 Ok(strptime)
33747 }
33748 _ => {
33749 let mut result_args = vec![format, str_expr];
33750 if let Some(tz_arg) = tz {
33751 result_args.push(tz_arg);
33752 }
33753 Ok(Expression::Function(Box::new(Function::new(
33754 "PARSE_TIMESTAMP".to_string(),
33755 result_args,
33756 ))))
33757 }
33758 }
33759 }
33760
33761 // FORMAT_DATE(format, date) -> target-specific
33762 "FORMAT_DATE" if args.len() == 2 => {
33763 let format = args.remove(0);
33764 let date_expr = args.remove(0);
33765 match target {
33766 DialectType::DuckDB => {
33767 // STRFTIME(CAST(date AS DATE), format)
33768 let cast_date = Expression::Cast(Box::new(Cast {
33769 this: date_expr,
33770 to: DataType::Date,
33771 trailing_comments: vec![],
33772 double_colon_syntax: false,
33773 format: None,
33774 default: None,
33775 inferred_type: None,
33776 }));
33777 Ok(Expression::Function(Box::new(Function::new(
33778 "STRFTIME".to_string(),
33779 vec![cast_date, format],
33780 ))))
33781 }
33782 _ => Ok(Expression::Function(Box::new(Function::new(
33783 "FORMAT_DATE".to_string(),
33784 vec![format, date_expr],
33785 )))),
33786 }
33787 }
33788
33789 // FORMAT_DATETIME(format, datetime) -> target-specific
33790 "FORMAT_DATETIME" if args.len() == 2 => {
33791 let format = args.remove(0);
33792 let dt_expr = args.remove(0);
33793
33794 if matches!(target, DialectType::BigQuery) {
33795 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
33796 let norm_format = Self::bq_format_normalize_bq(&format);
33797 // Also strip DATETIME keyword from typed literals
33798 let norm_dt = match dt_expr {
33799 Expression::Literal(lit)
33800 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
33801 {
33802 let Literal::Timestamp(s) = lit.as_ref() else {
33803 unreachable!()
33804 };
33805 Expression::Cast(Box::new(Cast {
33806 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
33807 to: DataType::Custom {
33808 name: "DATETIME".to_string(),
33809 },
33810 trailing_comments: vec![],
33811 double_colon_syntax: false,
33812 format: None,
33813 default: None,
33814 inferred_type: None,
33815 }))
33816 }
33817 other => other,
33818 };
33819 return Ok(Expression::Function(Box::new(Function::new(
33820 "FORMAT_DATETIME".to_string(),
33821 vec![norm_format, norm_dt],
33822 ))));
33823 }
33824
33825 match target {
33826 DialectType::DuckDB => {
33827 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
33828 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
33829 let duck_format = Self::bq_format_to_duckdb(&format);
33830 Ok(Expression::Function(Box::new(Function::new(
33831 "STRFTIME".to_string(),
33832 vec![cast_dt, duck_format],
33833 ))))
33834 }
33835 _ => Ok(Expression::Function(Box::new(Function::new(
33836 "FORMAT_DATETIME".to_string(),
33837 vec![format, dt_expr],
33838 )))),
33839 }
33840 }
33841
33842 // FORMAT_TIMESTAMP(format, ts) -> target-specific
33843 "FORMAT_TIMESTAMP" if args.len() == 2 => {
33844 let format = args.remove(0);
33845 let ts_expr = args.remove(0);
33846 match target {
33847 DialectType::DuckDB => {
33848 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
33849 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
33850 let cast_ts = Expression::Cast(Box::new(Cast {
33851 this: cast_tstz,
33852 to: DataType::Timestamp {
33853 timezone: false,
33854 precision: None,
33855 },
33856 trailing_comments: vec![],
33857 double_colon_syntax: false,
33858 format: None,
33859 default: None,
33860 inferred_type: None,
33861 }));
33862 Ok(Expression::Function(Box::new(Function::new(
33863 "STRFTIME".to_string(),
33864 vec![cast_ts, format],
33865 ))))
33866 }
33867 DialectType::Snowflake => {
33868 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
33869 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
33870 let cast_ts = Expression::Cast(Box::new(Cast {
33871 this: cast_tstz,
33872 to: DataType::Timestamp {
33873 timezone: false,
33874 precision: None,
33875 },
33876 trailing_comments: vec![],
33877 double_colon_syntax: false,
33878 format: None,
33879 default: None,
33880 inferred_type: None,
33881 }));
33882 let sf_format = Self::bq_format_to_snowflake(&format);
33883 Ok(Expression::Function(Box::new(Function::new(
33884 "TO_CHAR".to_string(),
33885 vec![cast_ts, sf_format],
33886 ))))
33887 }
33888 _ => Ok(Expression::Function(Box::new(Function::new(
33889 "FORMAT_TIMESTAMP".to_string(),
33890 vec![format, ts_expr],
33891 )))),
33892 }
33893 }
33894
33895 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
33896 "UNIX_DATE" if args.len() == 1 => {
33897 let date = args.remove(0);
33898 match target {
33899 DialectType::DuckDB => {
33900 let epoch = Expression::Cast(Box::new(Cast {
33901 this: Expression::Literal(Box::new(Literal::String(
33902 "1970-01-01".to_string(),
33903 ))),
33904 to: DataType::Date,
33905 trailing_comments: vec![],
33906 double_colon_syntax: false,
33907 format: None,
33908 default: None,
33909 inferred_type: None,
33910 }));
33911 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
33912 // Need to convert DATE literal to CAST
33913 let norm_date = Self::date_literal_to_cast(date);
33914 Ok(Expression::Function(Box::new(Function::new(
33915 "DATE_DIFF".to_string(),
33916 vec![
33917 Expression::Literal(Box::new(Literal::String("DAY".to_string()))),
33918 epoch,
33919 norm_date,
33920 ],
33921 ))))
33922 }
33923 _ => Ok(Expression::Function(Box::new(Function::new(
33924 "UNIX_DATE".to_string(),
33925 vec![date],
33926 )))),
33927 }
33928 }
33929
33930 // UNIX_SECONDS(ts) -> target-specific
33931 "UNIX_SECONDS" if args.len() == 1 => {
33932 let ts = args.remove(0);
33933 match target {
33934 DialectType::DuckDB => {
33935 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
33936 let norm_ts = Self::ts_literal_to_cast_tz(ts);
33937 let epoch = Expression::Function(Box::new(Function::new(
33938 "EPOCH".to_string(),
33939 vec![norm_ts],
33940 )));
33941 Ok(Expression::Cast(Box::new(Cast {
33942 this: epoch,
33943 to: DataType::BigInt { length: None },
33944 trailing_comments: vec![],
33945 double_colon_syntax: false,
33946 format: None,
33947 default: None,
33948 inferred_type: None,
33949 })))
33950 }
33951 DialectType::Snowflake => {
33952 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
33953 let epoch = Expression::Cast(Box::new(Cast {
33954 this: Expression::Literal(Box::new(Literal::String(
33955 "1970-01-01 00:00:00+00".to_string(),
33956 ))),
33957 to: DataType::Timestamp {
33958 timezone: true,
33959 precision: None,
33960 },
33961 trailing_comments: vec![],
33962 double_colon_syntax: false,
33963 format: None,
33964 default: None,
33965 inferred_type: None,
33966 }));
33967 Ok(Expression::Function(Box::new(Function::new(
33968 "TIMESTAMPDIFF".to_string(),
33969 vec![
33970 Expression::Identifier(Identifier::new("SECONDS".to_string())),
33971 epoch,
33972 ts,
33973 ],
33974 ))))
33975 }
33976 _ => Ok(Expression::Function(Box::new(Function::new(
33977 "UNIX_SECONDS".to_string(),
33978 vec![ts],
33979 )))),
33980 }
33981 }
33982
33983 // UNIX_MILLIS(ts) -> target-specific
33984 "UNIX_MILLIS" if args.len() == 1 => {
33985 let ts = args.remove(0);
33986 match target {
33987 DialectType::DuckDB => {
33988 let norm_ts = Self::ts_literal_to_cast_tz(ts);
33989 Ok(Expression::Function(Box::new(Function::new(
33990 "EPOCH_MS".to_string(),
33991 vec![norm_ts],
33992 ))))
33993 }
33994 _ => Ok(Expression::Function(Box::new(Function::new(
33995 "UNIX_MILLIS".to_string(),
33996 vec![ts],
33997 )))),
33998 }
33999 }
34000
34001 // UNIX_MICROS(ts) -> target-specific
34002 "UNIX_MICROS" if args.len() == 1 => {
34003 let ts = args.remove(0);
34004 match target {
34005 DialectType::DuckDB => {
34006 let norm_ts = Self::ts_literal_to_cast_tz(ts);
34007 Ok(Expression::Function(Box::new(Function::new(
34008 "EPOCH_US".to_string(),
34009 vec![norm_ts],
34010 ))))
34011 }
34012 _ => Ok(Expression::Function(Box::new(Function::new(
34013 "UNIX_MICROS".to_string(),
34014 vec![ts],
34015 )))),
34016 }
34017 }
34018
34019 // INSTR(str, substr) -> target-specific
34020 "INSTR" => {
34021 if matches!(target, DialectType::BigQuery) {
34022 // BQ->BQ: keep as INSTR
34023 Ok(Expression::Function(Box::new(Function::new(
34024 "INSTR".to_string(),
34025 args,
34026 ))))
34027 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
34028 // Snowflake: CHARINDEX(substr, str) - swap args
34029 let str_expr = args.remove(0);
34030 let substr = args.remove(0);
34031 Ok(Expression::Function(Box::new(Function::new(
34032 "CHARINDEX".to_string(),
34033 vec![substr, str_expr],
34034 ))))
34035 } else {
34036 // Keep as INSTR for other targets
34037 Ok(Expression::Function(Box::new(Function::new(
34038 "INSTR".to_string(),
34039 args,
34040 ))))
34041 }
34042 }
34043
34044 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
34045 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
34046 if matches!(target, DialectType::BigQuery) {
34047 // BQ->BQ: always output with parens (function form), keep any timezone arg
34048 Ok(Expression::Function(Box::new(Function::new(name, args))))
34049 } else if name == "CURRENT_DATE" && args.len() == 1 {
34050 // CURRENT_DATE('UTC') - has timezone arg
34051 let tz_arg = args.remove(0);
34052 match target {
34053 DialectType::DuckDB => {
34054 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
34055 let ct = Expression::CurrentTimestamp(
34056 crate::expressions::CurrentTimestamp {
34057 precision: None,
34058 sysdate: false,
34059 },
34060 );
34061 let at_tz =
34062 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34063 this: ct,
34064 zone: tz_arg,
34065 }));
34066 Ok(Expression::Cast(Box::new(Cast {
34067 this: at_tz,
34068 to: DataType::Date,
34069 trailing_comments: vec![],
34070 double_colon_syntax: false,
34071 format: None,
34072 default: None,
34073 inferred_type: None,
34074 })))
34075 }
34076 DialectType::Snowflake => {
34077 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
34078 let ct = Expression::Function(Box::new(Function::new(
34079 "CURRENT_TIMESTAMP".to_string(),
34080 vec![],
34081 )));
34082 let convert = Expression::Function(Box::new(Function::new(
34083 "CONVERT_TIMEZONE".to_string(),
34084 vec![tz_arg, ct],
34085 )));
34086 Ok(Expression::Cast(Box::new(Cast {
34087 this: convert,
34088 to: DataType::Date,
34089 trailing_comments: vec![],
34090 double_colon_syntax: false,
34091 format: None,
34092 default: None,
34093 inferred_type: None,
34094 })))
34095 }
34096 _ => {
34097 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
34098 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
34099 Ok(Expression::AtTimeZone(Box::new(
34100 crate::expressions::AtTimeZone {
34101 this: cd,
34102 zone: tz_arg,
34103 },
34104 )))
34105 }
34106 }
34107 } else if (name == "CURRENT_TIMESTAMP"
34108 || name == "CURRENT_TIME"
34109 || name == "CURRENT_DATE")
34110 && args.is_empty()
34111 && matches!(
34112 target,
34113 DialectType::PostgreSQL
34114 | DialectType::DuckDB
34115 | DialectType::Presto
34116 | DialectType::Trino
34117 )
34118 {
34119 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
34120 if name == "CURRENT_TIMESTAMP" {
34121 Ok(Expression::CurrentTimestamp(
34122 crate::expressions::CurrentTimestamp {
34123 precision: None,
34124 sysdate: false,
34125 },
34126 ))
34127 } else if name == "CURRENT_DATE" {
34128 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
34129 } else {
34130 // CURRENT_TIME
34131 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
34132 precision: None,
34133 }))
34134 }
34135 } else {
34136 // All other targets: keep as function (with parens)
34137 Ok(Expression::Function(Box::new(Function::new(name, args))))
34138 }
34139 }
34140
34141 // JSON_QUERY(json, path) -> target-specific
34142 "JSON_QUERY" if args.len() == 2 => {
34143 match target {
34144 DialectType::DuckDB | DialectType::SQLite => {
34145 // json -> path syntax
34146 let json_expr = args.remove(0);
34147 let path = args.remove(0);
34148 Ok(Expression::JsonExtract(Box::new(
34149 crate::expressions::JsonExtractFunc {
34150 this: json_expr,
34151 path,
34152 returning: None,
34153 arrow_syntax: true,
34154 hash_arrow_syntax: false,
34155 wrapper_option: None,
34156 quotes_option: None,
34157 on_scalar_string: false,
34158 on_error: None,
34159 },
34160 )))
34161 }
34162 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
34163 Ok(Expression::Function(Box::new(Function::new(
34164 "GET_JSON_OBJECT".to_string(),
34165 args,
34166 ))))
34167 }
34168 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
34169 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
34170 )),
34171 _ => Ok(Expression::Function(Box::new(Function::new(
34172 "JSON_QUERY".to_string(),
34173 args,
34174 )))),
34175 }
34176 }
34177
34178 // JSON_VALUE_ARRAY(json, path) -> target-specific
34179 "JSON_VALUE_ARRAY" if args.len() == 2 => {
34180 match target {
34181 DialectType::DuckDB => {
34182 // CAST(json -> path AS TEXT[])
34183 let json_expr = args.remove(0);
34184 let path = args.remove(0);
34185 let arrow = Expression::JsonExtract(Box::new(
34186 crate::expressions::JsonExtractFunc {
34187 this: json_expr,
34188 path,
34189 returning: None,
34190 arrow_syntax: true,
34191 hash_arrow_syntax: false,
34192 wrapper_option: None,
34193 quotes_option: None,
34194 on_scalar_string: false,
34195 on_error: None,
34196 },
34197 ));
34198 Ok(Expression::Cast(Box::new(Cast {
34199 this: arrow,
34200 to: DataType::Array {
34201 element_type: Box::new(DataType::Text),
34202 dimension: None,
34203 },
34204 trailing_comments: vec![],
34205 double_colon_syntax: false,
34206 format: None,
34207 default: None,
34208 inferred_type: None,
34209 })))
34210 }
34211 DialectType::Snowflake => {
34212 let json_expr = args.remove(0);
34213 let path_expr = args.remove(0);
34214 // Convert JSON path from $.path to just path
34215 let sf_path = if let Expression::Literal(ref lit) = path_expr {
34216 if let Literal::String(ref s) = lit.as_ref() {
34217 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
34218 Expression::Literal(Box::new(Literal::String(trimmed.to_string())))
34219 } else {
34220 path_expr.clone()
34221 }
34222 } else {
34223 path_expr
34224 };
34225 let parse_json = Expression::Function(Box::new(Function::new(
34226 "PARSE_JSON".to_string(),
34227 vec![json_expr],
34228 )));
34229 let get_path = Expression::Function(Box::new(Function::new(
34230 "GET_PATH".to_string(),
34231 vec![parse_json, sf_path],
34232 )));
34233 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
34234 let cast_expr = Expression::Cast(Box::new(Cast {
34235 this: Expression::Identifier(Identifier::new("x")),
34236 to: DataType::VarChar {
34237 length: None,
34238 parenthesized_length: false,
34239 },
34240 trailing_comments: vec![],
34241 double_colon_syntax: false,
34242 format: None,
34243 default: None,
34244 inferred_type: None,
34245 }));
34246 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
34247 parameters: vec![Identifier::new("x")],
34248 body: cast_expr,
34249 colon: false,
34250 parameter_types: vec![],
34251 }));
34252 Ok(Expression::Function(Box::new(Function::new(
34253 "TRANSFORM".to_string(),
34254 vec![get_path, lambda],
34255 ))))
34256 }
34257 _ => Ok(Expression::Function(Box::new(Function::new(
34258 "JSON_VALUE_ARRAY".to_string(),
34259 args,
34260 )))),
34261 }
34262 }
34263
34264 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
34265 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
34266 // This is different from Hive/Spark where 3rd arg is "group_index"
34267 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
34268 match target {
34269 DialectType::DuckDB
34270 | DialectType::Presto
34271 | DialectType::Trino
34272 | DialectType::Athena => {
34273 if args.len() == 2 {
34274 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
34275 args.push(Expression::number(1));
34276 Ok(Expression::Function(Box::new(Function::new(
34277 "REGEXP_EXTRACT".to_string(),
34278 args,
34279 ))))
34280 } else if args.len() == 3 {
34281 let val = args.remove(0);
34282 let regex = args.remove(0);
34283 let position = args.remove(0);
34284 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
34285 if is_pos_1 {
34286 Ok(Expression::Function(Box::new(Function::new(
34287 "REGEXP_EXTRACT".to_string(),
34288 vec![val, regex, Expression::number(1)],
34289 ))))
34290 } else {
34291 let substring_expr = Expression::Function(Box::new(Function::new(
34292 "SUBSTRING".to_string(),
34293 vec![val, position],
34294 )));
34295 let nullif_expr = Expression::Function(Box::new(Function::new(
34296 "NULLIF".to_string(),
34297 vec![
34298 substring_expr,
34299 Expression::Literal(Box::new(Literal::String(
34300 String::new(),
34301 ))),
34302 ],
34303 )));
34304 Ok(Expression::Function(Box::new(Function::new(
34305 "REGEXP_EXTRACT".to_string(),
34306 vec![nullif_expr, regex, Expression::number(1)],
34307 ))))
34308 }
34309 } else if args.len() == 4 {
34310 let val = args.remove(0);
34311 let regex = args.remove(0);
34312 let position = args.remove(0);
34313 let occurrence = args.remove(0);
34314 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
34315 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
34316 if is_pos_1 && is_occ_1 {
34317 Ok(Expression::Function(Box::new(Function::new(
34318 "REGEXP_EXTRACT".to_string(),
34319 vec![val, regex, Expression::number(1)],
34320 ))))
34321 } else {
34322 let subject = if is_pos_1 {
34323 val
34324 } else {
34325 let substring_expr = Expression::Function(Box::new(
34326 Function::new("SUBSTRING".to_string(), vec![val, position]),
34327 ));
34328 Expression::Function(Box::new(Function::new(
34329 "NULLIF".to_string(),
34330 vec![
34331 substring_expr,
34332 Expression::Literal(Box::new(Literal::String(
34333 String::new(),
34334 ))),
34335 ],
34336 )))
34337 };
34338 let extract_all = Expression::Function(Box::new(Function::new(
34339 "REGEXP_EXTRACT_ALL".to_string(),
34340 vec![subject, regex, Expression::number(1)],
34341 )));
34342 Ok(Expression::Function(Box::new(Function::new(
34343 "ARRAY_EXTRACT".to_string(),
34344 vec![extract_all, occurrence],
34345 ))))
34346 }
34347 } else {
34348 Ok(Expression::Function(Box::new(Function {
34349 name: f.name,
34350 args,
34351 distinct: f.distinct,
34352 trailing_comments: f.trailing_comments,
34353 use_bracket_syntax: f.use_bracket_syntax,
34354 no_parens: f.no_parens,
34355 quoted: f.quoted,
34356 span: None,
34357 inferred_type: None,
34358 })))
34359 }
34360 }
34361 DialectType::Snowflake => {
34362 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
34363 Ok(Expression::Function(Box::new(Function::new(
34364 "REGEXP_SUBSTR".to_string(),
34365 args,
34366 ))))
34367 }
34368 _ => {
34369 // For other targets (Hive/Spark/BigQuery): pass through as-is
34370 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
34371 Ok(Expression::Function(Box::new(Function {
34372 name: f.name,
34373 args,
34374 distinct: f.distinct,
34375 trailing_comments: f.trailing_comments,
34376 use_bracket_syntax: f.use_bracket_syntax,
34377 no_parens: f.no_parens,
34378 quoted: f.quoted,
34379 span: None,
34380 inferred_type: None,
34381 })))
34382 }
34383 }
34384 }
34385
34386 // BigQuery STRUCT(args) -> target-specific struct expression
34387 "STRUCT" => {
34388 // Convert Function args to Struct fields
34389 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
34390 for (i, arg) in args.into_iter().enumerate() {
34391 match arg {
34392 Expression::Alias(a) => {
34393 // Named field: expr AS name
34394 fields.push((Some(a.alias.name.clone()), a.this));
34395 }
34396 other => {
34397 // Unnamed field: for Spark/Hive, keep as None
34398 // For Snowflake, auto-name as _N
34399 // For DuckDB, use column name for column refs, _N for others
34400 if matches!(target, DialectType::Snowflake) {
34401 fields.push((Some(format!("_{}", i)), other));
34402 } else if matches!(target, DialectType::DuckDB) {
34403 let auto_name = match &other {
34404 Expression::Column(col) => col.name.name.clone(),
34405 _ => format!("_{}", i),
34406 };
34407 fields.push((Some(auto_name), other));
34408 } else {
34409 fields.push((None, other));
34410 }
34411 }
34412 }
34413 }
34414
34415 match target {
34416 DialectType::Snowflake => {
34417 // OBJECT_CONSTRUCT('name', value, ...)
34418 let mut oc_args = Vec::new();
34419 for (name, val) in &fields {
34420 if let Some(n) = name {
34421 oc_args.push(Expression::Literal(Box::new(Literal::String(
34422 n.clone(),
34423 ))));
34424 oc_args.push(val.clone());
34425 } else {
34426 oc_args.push(val.clone());
34427 }
34428 }
34429 Ok(Expression::Function(Box::new(Function::new(
34430 "OBJECT_CONSTRUCT".to_string(),
34431 oc_args,
34432 ))))
34433 }
34434 DialectType::DuckDB => {
34435 // {'name': value, ...}
34436 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
34437 fields,
34438 })))
34439 }
34440 DialectType::Hive => {
34441 // STRUCT(val1, val2, ...) - strip aliases
34442 let hive_fields: Vec<(Option<String>, Expression)> =
34443 fields.into_iter().map(|(_, v)| (None, v)).collect();
34444 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
34445 fields: hive_fields,
34446 })))
34447 }
34448 DialectType::Spark | DialectType::Databricks => {
34449 // Use Expression::Struct to bypass Spark target transform auto-naming
34450 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
34451 fields,
34452 })))
34453 }
34454 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
34455 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
34456 let all_named =
34457 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
34458 let all_types_inferable = all_named
34459 && fields
34460 .iter()
34461 .all(|(_, val)| Self::can_infer_presto_type(val));
34462 let row_args: Vec<Expression> =
34463 fields.iter().map(|(_, v)| v.clone()).collect();
34464 let row_expr = Expression::Function(Box::new(Function::new(
34465 "ROW".to_string(),
34466 row_args,
34467 )));
34468 if all_named && all_types_inferable {
34469 // Build ROW type with inferred types
34470 let mut row_type_fields = Vec::new();
34471 for (name, val) in &fields {
34472 if let Some(n) = name {
34473 let type_str = Self::infer_sql_type_for_presto(val);
34474 row_type_fields.push(crate::expressions::StructField::new(
34475 n.clone(),
34476 crate::expressions::DataType::Custom { name: type_str },
34477 ));
34478 }
34479 }
34480 let row_type = crate::expressions::DataType::Struct {
34481 fields: row_type_fields,
34482 nested: true,
34483 };
34484 Ok(Expression::Cast(Box::new(Cast {
34485 this: row_expr,
34486 to: row_type,
34487 trailing_comments: Vec::new(),
34488 double_colon_syntax: false,
34489 format: None,
34490 default: None,
34491 inferred_type: None,
34492 })))
34493 } else {
34494 Ok(row_expr)
34495 }
34496 }
34497 _ => {
34498 // Default: keep as STRUCT function with original args
34499 let mut new_args = Vec::new();
34500 for (name, val) in fields {
34501 if let Some(n) = name {
34502 new_args.push(Expression::Alias(Box::new(
34503 crate::expressions::Alias::new(val, Identifier::new(n)),
34504 )));
34505 } else {
34506 new_args.push(val);
34507 }
34508 }
34509 Ok(Expression::Function(Box::new(Function::new(
34510 "STRUCT".to_string(),
34511 new_args,
34512 ))))
34513 }
34514 }
34515 }
34516
34517 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
34518 "ROUND" if args.len() == 3 => {
34519 let x = args.remove(0);
34520 let n = args.remove(0);
34521 let mode = args.remove(0);
34522 // Check if mode is 'ROUND_HALF_EVEN'
34523 let is_half_even = matches!(&mode, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN")));
34524 if is_half_even && matches!(target, DialectType::DuckDB) {
34525 Ok(Expression::Function(Box::new(Function::new(
34526 "ROUND_EVEN".to_string(),
34527 vec![x, n],
34528 ))))
34529 } else {
34530 // Pass through with all args
34531 Ok(Expression::Function(Box::new(Function::new(
34532 "ROUND".to_string(),
34533 vec![x, n, mode],
34534 ))))
34535 }
34536 }
34537
34538 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
34539 "MAKE_INTERVAL" => {
34540 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
34541 // The positional args are: year, month
34542 // Named args are: day =>, minute =>, etc.
34543 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
34544 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
34545 // For BigQuery->BigQuery: reorder named args (day before minute)
34546 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
34547 let mut parts: Vec<(String, String)> = Vec::new();
34548 let mut pos_idx = 0;
34549 let pos_units = ["year", "month"];
34550 for arg in &args {
34551 if let Expression::NamedArgument(na) = arg {
34552 // Named arg like minute => 5
34553 let unit = na.name.name.clone();
34554 if let Expression::Literal(lit) = &na.value {
34555 if let Literal::Number(n) = lit.as_ref() {
34556 parts.push((unit, n.clone()));
34557 }
34558 }
34559 } else if pos_idx < pos_units.len() {
34560 if let Expression::Literal(lit) = arg {
34561 if let Literal::Number(n) = lit.as_ref() {
34562 parts.push((pos_units[pos_idx].to_string(), n.clone()));
34563 }
34564 }
34565 pos_idx += 1;
34566 }
34567 }
34568 // Don't sort - preserve original argument order
34569 let separator = if matches!(target, DialectType::Snowflake) {
34570 ", "
34571 } else {
34572 " "
34573 };
34574 let interval_str = parts
34575 .iter()
34576 .map(|(u, v)| format!("{} {}", v, u))
34577 .collect::<Vec<_>>()
34578 .join(separator);
34579 Ok(Expression::Interval(Box::new(
34580 crate::expressions::Interval {
34581 this: Some(Expression::Literal(Box::new(Literal::String(
34582 interval_str,
34583 )))),
34584 unit: None,
34585 },
34586 )))
34587 } else if matches!(target, DialectType::BigQuery) {
34588 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
34589 let mut positional = Vec::new();
34590 let mut named: Vec<(
34591 String,
34592 Expression,
34593 crate::expressions::NamedArgSeparator,
34594 )> = Vec::new();
34595 let _pos_units = ["year", "month"];
34596 let mut _pos_idx = 0;
34597 for arg in args {
34598 if let Expression::NamedArgument(na) = arg {
34599 named.push((na.name.name.clone(), na.value, na.separator));
34600 } else {
34601 positional.push(arg);
34602 _pos_idx += 1;
34603 }
34604 }
34605 // Sort named args by: day, hour, minute, second
34606 let unit_order = |u: &str| -> usize {
34607 match u.to_ascii_lowercase().as_str() {
34608 "day" => 0,
34609 "hour" => 1,
34610 "minute" => 2,
34611 "second" => 3,
34612 _ => 4,
34613 }
34614 };
34615 named.sort_by_key(|(u, _, _)| unit_order(u));
34616 let mut result_args = positional;
34617 for (name, value, sep) in named {
34618 result_args.push(Expression::NamedArgument(Box::new(
34619 crate::expressions::NamedArgument {
34620 name: Identifier::new(&name),
34621 value,
34622 separator: sep,
34623 },
34624 )));
34625 }
34626 Ok(Expression::Function(Box::new(Function::new(
34627 "MAKE_INTERVAL".to_string(),
34628 result_args,
34629 ))))
34630 } else {
34631 Ok(Expression::Function(Box::new(Function::new(
34632 "MAKE_INTERVAL".to_string(),
34633 args,
34634 ))))
34635 }
34636 }
34637
34638 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
34639 "ARRAY_TO_STRING" if args.len() == 3 => {
34640 let arr = args.remove(0);
34641 let sep = args.remove(0);
34642 let null_text = args.remove(0);
34643 match target {
34644 DialectType::DuckDB => {
34645 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
34646 let _lambda_param =
34647 Expression::Identifier(crate::expressions::Identifier::new("x"));
34648 let coalesce =
34649 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
34650 original_name: None,
34651 expressions: vec![
34652 Expression::Identifier(crate::expressions::Identifier::new(
34653 "x",
34654 )),
34655 null_text,
34656 ],
34657 inferred_type: None,
34658 }));
34659 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
34660 parameters: vec![crate::expressions::Identifier::new("x")],
34661 body: coalesce,
34662 colon: false,
34663 parameter_types: vec![],
34664 }));
34665 let list_transform = Expression::Function(Box::new(Function::new(
34666 "LIST_TRANSFORM".to_string(),
34667 vec![arr, lambda],
34668 )));
34669 Ok(Expression::Function(Box::new(Function::new(
34670 "ARRAY_TO_STRING".to_string(),
34671 vec![list_transform, sep],
34672 ))))
34673 }
34674 _ => Ok(Expression::Function(Box::new(Function::new(
34675 "ARRAY_TO_STRING".to_string(),
34676 vec![arr, sep, null_text],
34677 )))),
34678 }
34679 }
34680
34681 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
34682 "LENGTH" if args.len() == 1 => {
34683 let arg = args.remove(0);
34684 match target {
34685 DialectType::DuckDB => {
34686 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
34687 let typeof_func = Expression::Function(Box::new(Function::new(
34688 "TYPEOF".to_string(),
34689 vec![arg.clone()],
34690 )));
34691 let blob_cast = Expression::Cast(Box::new(Cast {
34692 this: arg.clone(),
34693 to: DataType::VarBinary { length: None },
34694 trailing_comments: vec![],
34695 double_colon_syntax: false,
34696 format: None,
34697 default: None,
34698 inferred_type: None,
34699 }));
34700 let octet_length = Expression::Function(Box::new(Function::new(
34701 "OCTET_LENGTH".to_string(),
34702 vec![blob_cast],
34703 )));
34704 let text_cast = Expression::Cast(Box::new(Cast {
34705 this: arg,
34706 to: DataType::Text,
34707 trailing_comments: vec![],
34708 double_colon_syntax: false,
34709 format: None,
34710 default: None,
34711 inferred_type: None,
34712 }));
34713 let length_text = Expression::Function(Box::new(Function::new(
34714 "LENGTH".to_string(),
34715 vec![text_cast],
34716 )));
34717 Ok(Expression::Case(Box::new(crate::expressions::Case {
34718 operand: Some(typeof_func),
34719 whens: vec![(
34720 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
34721 octet_length,
34722 )],
34723 else_: Some(length_text),
34724 comments: Vec::new(),
34725 inferred_type: None,
34726 })))
34727 }
34728 _ => Ok(Expression::Function(Box::new(Function::new(
34729 "LENGTH".to_string(),
34730 vec![arg],
34731 )))),
34732 }
34733 }
34734
34735 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
34736 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
34737 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
34738 // The args should be [x, fraction] with the null handling stripped
34739 // For DuckDB: QUANTILE_CONT(x, fraction)
34740 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
34741 match target {
34742 DialectType::DuckDB => {
34743 // Strip down to just 2 args, rename to QUANTILE_CONT
34744 let x = args[0].clone();
34745 let frac = args[1].clone();
34746 Ok(Expression::Function(Box::new(Function::new(
34747 "QUANTILE_CONT".to_string(),
34748 vec![x, frac],
34749 ))))
34750 }
34751 _ => Ok(Expression::Function(Box::new(Function::new(
34752 "PERCENTILE_CONT".to_string(),
34753 args,
34754 )))),
34755 }
34756 }
34757
34758 // All others: pass through
34759 _ => Ok(Expression::Function(Box::new(Function {
34760 name: f.name,
34761 args,
34762 distinct: f.distinct,
34763 trailing_comments: f.trailing_comments,
34764 use_bracket_syntax: f.use_bracket_syntax,
34765 no_parens: f.no_parens,
34766 quoted: f.quoted,
34767 span: None,
34768 inferred_type: None,
34769 }))),
34770 }
34771 }
34772
34773 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
34774 /// Returns false for column references and other non-literal expressions where the type is unknown.
34775 fn can_infer_presto_type(expr: &Expression) -> bool {
34776 match expr {
34777 Expression::Literal(_) => true,
34778 Expression::Boolean(_) => true,
34779 Expression::Array(_) | Expression::ArrayFunc(_) => true,
34780 Expression::Struct(_) | Expression::StructFunc(_) => true,
34781 Expression::Function(f) => {
34782 f.name.eq_ignore_ascii_case("STRUCT")
34783 || f.name.eq_ignore_ascii_case("ROW")
34784 || f.name.eq_ignore_ascii_case("CURRENT_DATE")
34785 || f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
34786 || f.name.eq_ignore_ascii_case("NOW")
34787 }
34788 Expression::Cast(_) => true,
34789 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
34790 _ => false,
34791 }
34792 }
34793
34794 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
34795 fn infer_sql_type_for_presto(expr: &Expression) -> String {
34796 use crate::expressions::Literal;
34797 match expr {
34798 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
34799 "VARCHAR".to_string()
34800 }
34801 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
34802 let Literal::Number(n) = lit.as_ref() else {
34803 unreachable!()
34804 };
34805 if n.contains('.') {
34806 "DOUBLE".to_string()
34807 } else {
34808 "INTEGER".to_string()
34809 }
34810 }
34811 Expression::Boolean(_) => "BOOLEAN".to_string(),
34812 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
34813 "DATE".to_string()
34814 }
34815 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
34816 "TIMESTAMP".to_string()
34817 }
34818 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
34819 "TIMESTAMP".to_string()
34820 }
34821 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
34822 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
34823 Expression::Function(f) => {
34824 if f.name.eq_ignore_ascii_case("STRUCT") || f.name.eq_ignore_ascii_case("ROW") {
34825 "ROW".to_string()
34826 } else if f.name.eq_ignore_ascii_case("CURRENT_DATE") {
34827 "DATE".to_string()
34828 } else if f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
34829 || f.name.eq_ignore_ascii_case("NOW")
34830 {
34831 "TIMESTAMP".to_string()
34832 } else {
34833 "VARCHAR".to_string()
34834 }
34835 }
34836 Expression::Cast(c) => {
34837 // If already cast, use the target type
34838 Self::data_type_to_presto_string(&c.to)
34839 }
34840 _ => "VARCHAR".to_string(),
34841 }
34842 }
34843
34844 /// Convert a DataType to its Presto/Trino string representation for ROW type
34845 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
34846 use crate::expressions::DataType;
34847 match dt {
34848 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
34849 "VARCHAR".to_string()
34850 }
34851 DataType::Int { .. }
34852 | DataType::BigInt { .. }
34853 | DataType::SmallInt { .. }
34854 | DataType::TinyInt { .. } => "INTEGER".to_string(),
34855 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
34856 DataType::Boolean => "BOOLEAN".to_string(),
34857 DataType::Date => "DATE".to_string(),
34858 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
34859 DataType::Struct { fields, .. } => {
34860 let field_strs: Vec<String> = fields
34861 .iter()
34862 .map(|f| {
34863 format!(
34864 "{} {}",
34865 f.name,
34866 Self::data_type_to_presto_string(&f.data_type)
34867 )
34868 })
34869 .collect();
34870 format!("ROW({})", field_strs.join(", "))
34871 }
34872 DataType::Array { element_type, .. } => {
34873 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
34874 }
34875 DataType::Custom { name } => {
34876 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
34877 name.clone()
34878 }
34879 _ => "VARCHAR".to_string(),
34880 }
34881 }
34882
34883 /// Convert IntervalUnit to string
34884 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> &'static str {
34885 match unit {
34886 crate::expressions::IntervalUnit::Year => "YEAR",
34887 crate::expressions::IntervalUnit::Quarter => "QUARTER",
34888 crate::expressions::IntervalUnit::Month => "MONTH",
34889 crate::expressions::IntervalUnit::Week => "WEEK",
34890 crate::expressions::IntervalUnit::Day => "DAY",
34891 crate::expressions::IntervalUnit::Hour => "HOUR",
34892 crate::expressions::IntervalUnit::Minute => "MINUTE",
34893 crate::expressions::IntervalUnit::Second => "SECOND",
34894 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
34895 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
34896 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
34897 }
34898 }
34899
34900 /// Extract unit string from an expression (uppercased)
34901 fn get_unit_str_static(expr: &Expression) -> String {
34902 use crate::expressions::Literal;
34903 match expr {
34904 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
34905 Expression::Var(v) => v.this.to_ascii_uppercase(),
34906 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
34907 let Literal::String(s) = lit.as_ref() else {
34908 unreachable!()
34909 };
34910 s.to_ascii_uppercase()
34911 }
34912 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
34913 Expression::Function(f) => {
34914 let base = f.name.to_ascii_uppercase();
34915 if !f.args.is_empty() {
34916 let inner = Self::get_unit_str_static(&f.args[0]);
34917 format!("{}({})", base, inner)
34918 } else {
34919 base
34920 }
34921 }
34922 _ => "DAY".to_string(),
34923 }
34924 }
34925
34926 /// Parse unit string to IntervalUnit
34927 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
34928 match s {
34929 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
34930 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
34931 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
34932 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
34933 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
34934 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
34935 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
34936 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
34937 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
34938 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
34939 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
34940 _ => crate::expressions::IntervalUnit::Day,
34941 }
34942 }
34943
34944 /// Convert expression to simple string for interval building
34945 fn expr_to_string_static(expr: &Expression) -> String {
34946 use crate::expressions::Literal;
34947 match expr {
34948 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
34949 let Literal::Number(s) = lit.as_ref() else {
34950 unreachable!()
34951 };
34952 s.clone()
34953 }
34954 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
34955 let Literal::String(s) = lit.as_ref() else {
34956 unreachable!()
34957 };
34958 s.clone()
34959 }
34960 Expression::Identifier(id) => id.name.clone(),
34961 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
34962 _ => "1".to_string(),
34963 }
34964 }
34965
34966 /// Extract a simple string representation from a literal expression
34967 fn expr_to_string(expr: &Expression) -> String {
34968 use crate::expressions::Literal;
34969 match expr {
34970 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
34971 let Literal::Number(s) = lit.as_ref() else {
34972 unreachable!()
34973 };
34974 s.clone()
34975 }
34976 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
34977 let Literal::String(s) = lit.as_ref() else {
34978 unreachable!()
34979 };
34980 s.clone()
34981 }
34982 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
34983 Expression::Identifier(id) => id.name.clone(),
34984 _ => "1".to_string(),
34985 }
34986 }
34987
34988 /// Quote an interval value expression as a string literal if it's a number (or negated number)
34989 fn quote_interval_val(expr: &Expression) -> Expression {
34990 use crate::expressions::Literal;
34991 match expr {
34992 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
34993 let Literal::Number(n) = lit.as_ref() else {
34994 unreachable!()
34995 };
34996 Expression::Literal(Box::new(Literal::String(n.clone())))
34997 }
34998 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => expr.clone(),
34999 Expression::Neg(inner) => {
35000 if let Expression::Literal(lit) = &inner.this {
35001 if let Literal::Number(n) = lit.as_ref() {
35002 Expression::Literal(Box::new(Literal::String(format!("-{}", n))))
35003 } else {
35004 inner.this.clone()
35005 }
35006 } else {
35007 expr.clone()
35008 }
35009 }
35010 _ => expr.clone(),
35011 }
35012 }
35013
35014 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
35015 fn timestamp_string_has_timezone(ts: &str) -> bool {
35016 let trimmed = ts.trim();
35017 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
35018 if let Some(last_space) = trimmed.rfind(' ') {
35019 let suffix = &trimmed[last_space + 1..];
35020 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
35021 let rest = &suffix[1..];
35022 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
35023 return true;
35024 }
35025 }
35026 }
35027 // Check for named timezone abbreviations
35028 let ts_lower = trimmed.to_ascii_lowercase();
35029 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
35030 for abbrev in &tz_abbrevs {
35031 if ts_lower.ends_with(abbrev) {
35032 return true;
35033 }
35034 }
35035 false
35036 }
35037
35038 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
35039 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
35040 use crate::expressions::{Cast, DataType, Literal};
35041 match expr {
35042 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
35043 let Literal::Timestamp(s) = lit.as_ref() else {
35044 unreachable!()
35045 };
35046 let tz = func_name.starts_with("TIMESTAMP");
35047 Expression::Cast(Box::new(Cast {
35048 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35049 to: if tz {
35050 DataType::Timestamp {
35051 timezone: true,
35052 precision: None,
35053 }
35054 } else {
35055 DataType::Timestamp {
35056 timezone: false,
35057 precision: None,
35058 }
35059 },
35060 trailing_comments: vec![],
35061 double_colon_syntax: false,
35062 format: None,
35063 default: None,
35064 inferred_type: None,
35065 }))
35066 }
35067 other => other,
35068 }
35069 }
35070
35071 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
35072 fn maybe_cast_ts(expr: Expression) -> Expression {
35073 use crate::expressions::{Cast, DataType, Literal};
35074 match expr {
35075 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
35076 let Literal::Timestamp(s) = lit.as_ref() else {
35077 unreachable!()
35078 };
35079 Expression::Cast(Box::new(Cast {
35080 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35081 to: DataType::Timestamp {
35082 timezone: false,
35083 precision: None,
35084 },
35085 trailing_comments: vec![],
35086 double_colon_syntax: false,
35087 format: None,
35088 default: None,
35089 inferred_type: None,
35090 }))
35091 }
35092 other => other,
35093 }
35094 }
35095
35096 /// Convert DATE 'x' literal to CAST('x' AS DATE)
35097 fn date_literal_to_cast(expr: Expression) -> Expression {
35098 use crate::expressions::{Cast, DataType, Literal};
35099 match expr {
35100 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
35101 let Literal::Date(s) = lit.as_ref() else {
35102 unreachable!()
35103 };
35104 Expression::Cast(Box::new(Cast {
35105 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35106 to: DataType::Date,
35107 trailing_comments: vec![],
35108 double_colon_syntax: false,
35109 format: None,
35110 default: None,
35111 inferred_type: None,
35112 }))
35113 }
35114 other => other,
35115 }
35116 }
35117
35118 /// Ensure an expression that should be a date is CAST(... AS DATE).
35119 /// Handles both DATE literals and string literals that look like dates.
35120 fn ensure_cast_date(expr: Expression) -> Expression {
35121 use crate::expressions::{Cast, DataType, Literal};
35122 match expr {
35123 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
35124 let Literal::Date(s) = lit.as_ref() else {
35125 unreachable!()
35126 };
35127 Expression::Cast(Box::new(Cast {
35128 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35129 to: DataType::Date,
35130 trailing_comments: vec![],
35131 double_colon_syntax: false,
35132 format: None,
35133 default: None,
35134 inferred_type: None,
35135 }))
35136 }
35137 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35138 // String literal that should be a date -> CAST('s' AS DATE)
35139 Expression::Cast(Box::new(Cast {
35140 this: expr,
35141 to: DataType::Date,
35142 trailing_comments: vec![],
35143 double_colon_syntax: false,
35144 format: None,
35145 default: None,
35146 inferred_type: None,
35147 }))
35148 }
35149 // Already a CAST or other expression -> leave as-is
35150 other => other,
35151 }
35152 }
35153
35154 /// Force CAST(expr AS DATE) for any expression (not just literals)
35155 /// Skips if the expression is already a CAST to DATE
35156 fn force_cast_date(expr: Expression) -> Expression {
35157 use crate::expressions::{Cast, DataType};
35158 // If it's already a CAST to DATE, don't double-wrap
35159 if let Expression::Cast(ref c) = expr {
35160 if matches!(c.to, DataType::Date) {
35161 return expr;
35162 }
35163 }
35164 Expression::Cast(Box::new(Cast {
35165 this: expr,
35166 to: DataType::Date,
35167 trailing_comments: vec![],
35168 double_colon_syntax: false,
35169 format: None,
35170 default: None,
35171 inferred_type: None,
35172 }))
35173 }
35174
35175 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
35176 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
35177 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
35178 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
35179
35180 fn ensure_to_date_preserved(expr: Expression) -> Expression {
35181 use crate::expressions::{Function, Literal};
35182 if matches!(expr, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)))
35183 {
35184 Expression::Function(Box::new(Function::new(
35185 Self::PRESERVED_TO_DATE.to_string(),
35186 vec![expr],
35187 )))
35188 } else {
35189 expr
35190 }
35191 }
35192
35193 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
35194 fn try_cast_date(expr: Expression) -> Expression {
35195 use crate::expressions::{Cast, DataType};
35196 Expression::TryCast(Box::new(Cast {
35197 this: expr,
35198 to: DataType::Date,
35199 trailing_comments: vec![],
35200 double_colon_syntax: false,
35201 format: None,
35202 default: None,
35203 inferred_type: None,
35204 }))
35205 }
35206
35207 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
35208 fn double_cast_timestamp_date(expr: Expression) -> Expression {
35209 use crate::expressions::{Cast, DataType};
35210 let inner = Expression::Cast(Box::new(Cast {
35211 this: expr,
35212 to: DataType::Timestamp {
35213 timezone: false,
35214 precision: None,
35215 },
35216 trailing_comments: vec![],
35217 double_colon_syntax: false,
35218 format: None,
35219 default: None,
35220 inferred_type: None,
35221 }));
35222 Expression::Cast(Box::new(Cast {
35223 this: inner,
35224 to: DataType::Date,
35225 trailing_comments: vec![],
35226 double_colon_syntax: false,
35227 format: None,
35228 default: None,
35229 inferred_type: None,
35230 }))
35231 }
35232
35233 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
35234 fn double_cast_datetime_date(expr: Expression) -> Expression {
35235 use crate::expressions::{Cast, DataType};
35236 let inner = Expression::Cast(Box::new(Cast {
35237 this: expr,
35238 to: DataType::Custom {
35239 name: "DATETIME".to_string(),
35240 },
35241 trailing_comments: vec![],
35242 double_colon_syntax: false,
35243 format: None,
35244 default: None,
35245 inferred_type: None,
35246 }));
35247 Expression::Cast(Box::new(Cast {
35248 this: inner,
35249 to: DataType::Date,
35250 trailing_comments: vec![],
35251 double_colon_syntax: false,
35252 format: None,
35253 default: None,
35254 inferred_type: None,
35255 }))
35256 }
35257
35258 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
35259 fn double_cast_datetime2_date(expr: Expression) -> Expression {
35260 use crate::expressions::{Cast, DataType};
35261 let inner = Expression::Cast(Box::new(Cast {
35262 this: expr,
35263 to: DataType::Custom {
35264 name: "DATETIME2".to_string(),
35265 },
35266 trailing_comments: vec![],
35267 double_colon_syntax: false,
35268 format: None,
35269 default: None,
35270 inferred_type: None,
35271 }));
35272 Expression::Cast(Box::new(Cast {
35273 this: inner,
35274 to: DataType::Date,
35275 trailing_comments: vec![],
35276 double_colon_syntax: false,
35277 format: None,
35278 default: None,
35279 inferred_type: None,
35280 }))
35281 }
35282
35283 /// Convert Hive/Java-style date format strings to C-style (strftime) format
35284 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
35285 fn hive_format_to_c_format(fmt: &str) -> String {
35286 let mut result = String::new();
35287 let chars: Vec<char> = fmt.chars().collect();
35288 let mut i = 0;
35289 while i < chars.len() {
35290 match chars[i] {
35291 'y' => {
35292 let mut count = 0;
35293 while i < chars.len() && chars[i] == 'y' {
35294 count += 1;
35295 i += 1;
35296 }
35297 if count >= 4 {
35298 result.push_str("%Y");
35299 } else if count == 2 {
35300 result.push_str("%y");
35301 } else {
35302 result.push_str("%Y");
35303 }
35304 }
35305 'M' => {
35306 let mut count = 0;
35307 while i < chars.len() && chars[i] == 'M' {
35308 count += 1;
35309 i += 1;
35310 }
35311 if count >= 3 {
35312 result.push_str("%b");
35313 } else if count == 2 {
35314 result.push_str("%m");
35315 } else {
35316 result.push_str("%m");
35317 }
35318 }
35319 'd' => {
35320 let mut _count = 0;
35321 while i < chars.len() && chars[i] == 'd' {
35322 _count += 1;
35323 i += 1;
35324 }
35325 result.push_str("%d");
35326 }
35327 'H' => {
35328 let mut _count = 0;
35329 while i < chars.len() && chars[i] == 'H' {
35330 _count += 1;
35331 i += 1;
35332 }
35333 result.push_str("%H");
35334 }
35335 'h' => {
35336 let mut _count = 0;
35337 while i < chars.len() && chars[i] == 'h' {
35338 _count += 1;
35339 i += 1;
35340 }
35341 result.push_str("%I");
35342 }
35343 'm' => {
35344 let mut _count = 0;
35345 while i < chars.len() && chars[i] == 'm' {
35346 _count += 1;
35347 i += 1;
35348 }
35349 result.push_str("%M");
35350 }
35351 's' => {
35352 let mut _count = 0;
35353 while i < chars.len() && chars[i] == 's' {
35354 _count += 1;
35355 i += 1;
35356 }
35357 result.push_str("%S");
35358 }
35359 'S' => {
35360 // Fractional seconds - skip
35361 while i < chars.len() && chars[i] == 'S' {
35362 i += 1;
35363 }
35364 result.push_str("%f");
35365 }
35366 'a' => {
35367 // AM/PM
35368 while i < chars.len() && chars[i] == 'a' {
35369 i += 1;
35370 }
35371 result.push_str("%p");
35372 }
35373 'E' => {
35374 let mut count = 0;
35375 while i < chars.len() && chars[i] == 'E' {
35376 count += 1;
35377 i += 1;
35378 }
35379 if count >= 4 {
35380 result.push_str("%A");
35381 } else {
35382 result.push_str("%a");
35383 }
35384 }
35385 '\'' => {
35386 // Quoted literal text - pass through the quotes and content
35387 result.push('\'');
35388 i += 1;
35389 while i < chars.len() && chars[i] != '\'' {
35390 result.push(chars[i]);
35391 i += 1;
35392 }
35393 if i < chars.len() {
35394 result.push('\'');
35395 i += 1;
35396 }
35397 }
35398 c => {
35399 result.push(c);
35400 i += 1;
35401 }
35402 }
35403 }
35404 result
35405 }
35406
35407 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
35408 fn hive_format_to_presto_format(fmt: &str) -> String {
35409 let c_fmt = Self::hive_format_to_c_format(fmt);
35410 // Presto uses %T for HH:MM:SS
35411 c_fmt.replace("%H:%M:%S", "%T")
35412 }
35413
35414 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
35415 fn ensure_cast_timestamp(expr: Expression) -> Expression {
35416 use crate::expressions::{Cast, DataType, Literal};
35417 match expr {
35418 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
35419 let Literal::Timestamp(s) = lit.as_ref() else {
35420 unreachable!()
35421 };
35422 Expression::Cast(Box::new(Cast {
35423 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35424 to: DataType::Timestamp {
35425 timezone: false,
35426 precision: None,
35427 },
35428 trailing_comments: vec![],
35429 double_colon_syntax: false,
35430 format: None,
35431 default: None,
35432 inferred_type: None,
35433 }))
35434 }
35435 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35436 Expression::Cast(Box::new(Cast {
35437 this: expr,
35438 to: DataType::Timestamp {
35439 timezone: false,
35440 precision: None,
35441 },
35442 trailing_comments: vec![],
35443 double_colon_syntax: false,
35444 format: None,
35445 default: None,
35446 inferred_type: None,
35447 }))
35448 }
35449 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
35450 let Literal::Datetime(s) = lit.as_ref() else {
35451 unreachable!()
35452 };
35453 Expression::Cast(Box::new(Cast {
35454 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35455 to: DataType::Timestamp {
35456 timezone: false,
35457 precision: None,
35458 },
35459 trailing_comments: vec![],
35460 double_colon_syntax: false,
35461 format: None,
35462 default: None,
35463 inferred_type: None,
35464 }))
35465 }
35466 other => other,
35467 }
35468 }
35469
35470 /// Force CAST to TIMESTAMP for any expression (not just literals)
35471 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
35472 fn force_cast_timestamp(expr: Expression) -> Expression {
35473 use crate::expressions::{Cast, DataType};
35474 // Don't double-wrap if already a CAST to TIMESTAMP
35475 if let Expression::Cast(ref c) = expr {
35476 if matches!(c.to, DataType::Timestamp { .. }) {
35477 return expr;
35478 }
35479 }
35480 Expression::Cast(Box::new(Cast {
35481 this: expr,
35482 to: DataType::Timestamp {
35483 timezone: false,
35484 precision: None,
35485 },
35486 trailing_comments: vec![],
35487 double_colon_syntax: false,
35488 format: None,
35489 default: None,
35490 inferred_type: None,
35491 }))
35492 }
35493
35494 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
35495 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
35496 use crate::expressions::{Cast, DataType, Literal};
35497 match expr {
35498 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
35499 let Literal::Timestamp(s) = lit.as_ref() else {
35500 unreachable!()
35501 };
35502 Expression::Cast(Box::new(Cast {
35503 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35504 to: DataType::Timestamp {
35505 timezone: true,
35506 precision: None,
35507 },
35508 trailing_comments: vec![],
35509 double_colon_syntax: false,
35510 format: None,
35511 default: None,
35512 inferred_type: None,
35513 }))
35514 }
35515 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35516 Expression::Cast(Box::new(Cast {
35517 this: expr,
35518 to: DataType::Timestamp {
35519 timezone: true,
35520 precision: None,
35521 },
35522 trailing_comments: vec![],
35523 double_colon_syntax: false,
35524 format: None,
35525 default: None,
35526 inferred_type: None,
35527 }))
35528 }
35529 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
35530 let Literal::Datetime(s) = lit.as_ref() else {
35531 unreachable!()
35532 };
35533 Expression::Cast(Box::new(Cast {
35534 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35535 to: DataType::Timestamp {
35536 timezone: true,
35537 precision: None,
35538 },
35539 trailing_comments: vec![],
35540 double_colon_syntax: false,
35541 format: None,
35542 default: None,
35543 inferred_type: None,
35544 }))
35545 }
35546 other => other,
35547 }
35548 }
35549
35550 /// Ensure expression is CAST to DATETIME (for BigQuery)
35551 fn ensure_cast_datetime(expr: Expression) -> Expression {
35552 use crate::expressions::{Cast, DataType, Literal};
35553 match expr {
35554 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35555 Expression::Cast(Box::new(Cast {
35556 this: expr,
35557 to: DataType::Custom {
35558 name: "DATETIME".to_string(),
35559 },
35560 trailing_comments: vec![],
35561 double_colon_syntax: false,
35562 format: None,
35563 default: None,
35564 inferred_type: None,
35565 }))
35566 }
35567 other => other,
35568 }
35569 }
35570
35571 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
35572 fn force_cast_datetime(expr: Expression) -> Expression {
35573 use crate::expressions::{Cast, DataType};
35574 if let Expression::Cast(ref c) = expr {
35575 if let DataType::Custom { ref name } = c.to {
35576 if name.eq_ignore_ascii_case("DATETIME") {
35577 return expr;
35578 }
35579 }
35580 }
35581 Expression::Cast(Box::new(Cast {
35582 this: expr,
35583 to: DataType::Custom {
35584 name: "DATETIME".to_string(),
35585 },
35586 trailing_comments: vec![],
35587 double_colon_syntax: false,
35588 format: None,
35589 default: None,
35590 inferred_type: None,
35591 }))
35592 }
35593
35594 /// Ensure expression is CAST to DATETIME2 (for TSQL)
35595 fn ensure_cast_datetime2(expr: Expression) -> Expression {
35596 use crate::expressions::{Cast, DataType, Literal};
35597 match expr {
35598 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
35599 Expression::Cast(Box::new(Cast {
35600 this: expr,
35601 to: DataType::Custom {
35602 name: "DATETIME2".to_string(),
35603 },
35604 trailing_comments: vec![],
35605 double_colon_syntax: false,
35606 format: None,
35607 default: None,
35608 inferred_type: None,
35609 }))
35610 }
35611 other => other,
35612 }
35613 }
35614
35615 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
35616 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
35617 use crate::expressions::{Cast, DataType, Literal};
35618 match expr {
35619 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
35620 let Literal::Timestamp(s) = lit.as_ref() else {
35621 unreachable!()
35622 };
35623 Expression::Cast(Box::new(Cast {
35624 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35625 to: DataType::Timestamp {
35626 timezone: true,
35627 precision: None,
35628 },
35629 trailing_comments: vec![],
35630 double_colon_syntax: false,
35631 format: None,
35632 default: None,
35633 inferred_type: None,
35634 }))
35635 }
35636 other => other,
35637 }
35638 }
35639
35640 /// Convert BigQuery format string to Snowflake format string
35641 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
35642 use crate::expressions::Literal;
35643 if let Expression::Literal(lit) = format_expr {
35644 if let Literal::String(s) = lit.as_ref() {
35645 let sf = s
35646 .replace("%Y", "yyyy")
35647 .replace("%m", "mm")
35648 .replace("%d", "DD")
35649 .replace("%H", "HH24")
35650 .replace("%M", "MI")
35651 .replace("%S", "SS")
35652 .replace("%b", "mon")
35653 .replace("%B", "Month")
35654 .replace("%e", "FMDD");
35655 Expression::Literal(Box::new(Literal::String(sf)))
35656 } else {
35657 format_expr.clone()
35658 }
35659 } else {
35660 format_expr.clone()
35661 }
35662 }
35663
35664 /// Convert BigQuery format string to DuckDB format string
35665 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
35666 use crate::expressions::Literal;
35667 if let Expression::Literal(lit) = format_expr {
35668 if let Literal::String(s) = lit.as_ref() {
35669 let duck = s
35670 .replace("%T", "%H:%M:%S")
35671 .replace("%F", "%Y-%m-%d")
35672 .replace("%D", "%m/%d/%y")
35673 .replace("%x", "%m/%d/%y")
35674 .replace("%c", "%a %b %-d %H:%M:%S %Y")
35675 .replace("%e", "%-d")
35676 .replace("%E6S", "%S.%f");
35677 Expression::Literal(Box::new(Literal::String(duck)))
35678 } else {
35679 format_expr.clone()
35680 }
35681 } else {
35682 format_expr.clone()
35683 }
35684 }
35685
35686 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
35687 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
35688 use crate::expressions::Literal;
35689 if let Expression::Literal(lit) = format_expr {
35690 if let Literal::String(s) = lit.as_ref() {
35691 // Replace format elements from longest to shortest to avoid partial matches
35692 let result = s
35693 .replace("YYYYMMDD", "%Y%m%d")
35694 .replace("YYYY", "%Y")
35695 .replace("YY", "%y")
35696 .replace("MONTH", "%B")
35697 .replace("MON", "%b")
35698 .replace("MM", "%m")
35699 .replace("DD", "%d")
35700 .replace("HH24", "%H")
35701 .replace("HH12", "%I")
35702 .replace("HH", "%I")
35703 .replace("MI", "%M")
35704 .replace("SSTZH", "%S%z")
35705 .replace("SS", "%S")
35706 .replace("TZH", "%z");
35707 Expression::Literal(Box::new(Literal::String(result)))
35708 } else {
35709 format_expr.clone()
35710 }
35711 } else {
35712 format_expr.clone()
35713 }
35714 }
35715
35716 /// Normalize BigQuery format strings for BQ->BQ output
35717 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
35718 use crate::expressions::Literal;
35719 if let Expression::Literal(lit) = format_expr {
35720 if let Literal::String(s) = lit.as_ref() {
35721 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
35722 Expression::Literal(Box::new(Literal::String(norm)))
35723 } else {
35724 format_expr.clone()
35725 }
35726 } else {
35727 format_expr.clone()
35728 }
35729 }
35730}
35731
35732#[cfg(test)]
35733mod tests {
35734 use super::*;
35735
35736 #[test]
35737 fn test_dialect_type_from_str() {
35738 assert_eq!(
35739 "postgres".parse::<DialectType>().unwrap(),
35740 DialectType::PostgreSQL
35741 );
35742 assert_eq!(
35743 "postgresql".parse::<DialectType>().unwrap(),
35744 DialectType::PostgreSQL
35745 );
35746 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
35747 assert_eq!(
35748 "bigquery".parse::<DialectType>().unwrap(),
35749 DialectType::BigQuery
35750 );
35751 }
35752
35753 #[test]
35754 fn test_basic_transpile() {
35755 let dialect = Dialect::get(DialectType::Generic);
35756 let result = dialect
35757 .transpile("SELECT 1", DialectType::PostgreSQL)
35758 .unwrap();
35759 assert_eq!(result.len(), 1);
35760 assert_eq!(result[0], "SELECT 1");
35761 }
35762
35763 #[test]
35764 fn test_function_transformation_mysql() {
35765 // NVL should be transformed to IFNULL in MySQL
35766 let dialect = Dialect::get(DialectType::Generic);
35767 let result = dialect
35768 .transpile("SELECT NVL(a, b)", DialectType::MySQL)
35769 .unwrap();
35770 assert_eq!(result[0], "SELECT IFNULL(a, b)");
35771 }
35772
35773 #[test]
35774 fn test_get_path_duckdb() {
35775 // Test: step by step
35776 let snowflake = Dialect::get(DialectType::Snowflake);
35777
35778 // Step 1: Parse and check what Snowflake produces as intermediate
35779 let result_sf_sf = snowflake
35780 .transpile(
35781 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
35782 DialectType::Snowflake,
35783 )
35784 .unwrap();
35785 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
35786
35787 // Step 2: DuckDB target
35788 let result_sf_dk = snowflake
35789 .transpile(
35790 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
35791 DialectType::DuckDB,
35792 )
35793 .unwrap();
35794 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
35795
35796 // Step 3: GET_PATH directly
35797 let result_gp = snowflake
35798 .transpile(
35799 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
35800 DialectType::DuckDB,
35801 )
35802 .unwrap();
35803 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
35804 }
35805
35806 #[test]
35807 fn test_function_transformation_postgres() {
35808 // IFNULL should be transformed to COALESCE in PostgreSQL
35809 let dialect = Dialect::get(DialectType::Generic);
35810 let result = dialect
35811 .transpile("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
35812 .unwrap();
35813 assert_eq!(result[0], "SELECT COALESCE(a, b)");
35814
35815 // NVL should also be transformed to COALESCE
35816 let result = dialect
35817 .transpile("SELECT NVL(a, b)", DialectType::PostgreSQL)
35818 .unwrap();
35819 assert_eq!(result[0], "SELECT COALESCE(a, b)");
35820 }
35821
35822 #[test]
35823 fn test_hive_cast_to_trycast() {
35824 // Hive CAST should become TRY_CAST for targets that support it
35825 let hive = Dialect::get(DialectType::Hive);
35826 let result = hive
35827 .transpile("CAST(1 AS INT)", DialectType::DuckDB)
35828 .unwrap();
35829 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
35830
35831 let result = hive
35832 .transpile("CAST(1 AS INT)", DialectType::Presto)
35833 .unwrap();
35834 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
35835 }
35836
35837 #[test]
35838 fn test_hive_array_identity() {
35839 // Hive ARRAY<DATE> should preserve angle bracket syntax
35840 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
35841 let hive = Dialect::get(DialectType::Hive);
35842
35843 // Test via transpile (this works)
35844 let result = hive.transpile(sql, DialectType::Hive).unwrap();
35845 eprintln!("Hive ARRAY via transpile: {}", result[0]);
35846 assert!(
35847 result[0].contains("ARRAY<DATE>"),
35848 "transpile: Expected ARRAY<DATE>, got: {}",
35849 result[0]
35850 );
35851
35852 // Test via parse -> transform -> generate (identity test path)
35853 let ast = hive.parse(sql).unwrap();
35854 let transformed = hive.transform(ast[0].clone()).unwrap();
35855 let output = hive.generate(&transformed).unwrap();
35856 eprintln!("Hive ARRAY via identity path: {}", output);
35857 assert!(
35858 output.contains("ARRAY<DATE>"),
35859 "identity path: Expected ARRAY<DATE>, got: {}",
35860 output
35861 );
35862 }
35863
35864 #[test]
35865 fn test_starrocks_delete_between_expansion() {
35866 // StarRocks doesn't support BETWEEN in DELETE statements
35867 let dialect = Dialect::get(DialectType::Generic);
35868
35869 // BETWEEN should be expanded to >= AND <= in DELETE
35870 let result = dialect
35871 .transpile(
35872 "DELETE FROM t WHERE a BETWEEN b AND c",
35873 DialectType::StarRocks,
35874 )
35875 .unwrap();
35876 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
35877
35878 // NOT BETWEEN should be expanded to < OR > in DELETE
35879 let result = dialect
35880 .transpile(
35881 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
35882 DialectType::StarRocks,
35883 )
35884 .unwrap();
35885 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
35886
35887 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
35888 let result = dialect
35889 .transpile(
35890 "SELECT * FROM t WHERE a BETWEEN b AND c",
35891 DialectType::StarRocks,
35892 )
35893 .unwrap();
35894 assert!(
35895 result[0].contains("BETWEEN"),
35896 "BETWEEN should be preserved in SELECT"
35897 );
35898 }
35899
35900 #[test]
35901 fn test_snowflake_ltrim_rtrim_parse() {
35902 let sf = Dialect::get(DialectType::Snowflake);
35903 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
35904 let result = sf.transpile(sql, DialectType::DuckDB);
35905 match &result {
35906 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
35907 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
35908 }
35909 assert!(
35910 result.is_ok(),
35911 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
35912 result.err()
35913 );
35914 }
35915
35916 #[test]
35917 fn test_duckdb_count_if_parse() {
35918 let duck = Dialect::get(DialectType::DuckDB);
35919 let sql = "COUNT_IF(x)";
35920 let result = duck.transpile(sql, DialectType::DuckDB);
35921 match &result {
35922 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
35923 Err(e) => eprintln!("COUNT_IF error: {}", e),
35924 }
35925 assert!(
35926 result.is_ok(),
35927 "Expected successful parse of COUNT_IF(x), got error: {:?}",
35928 result.err()
35929 );
35930 }
35931
35932 #[test]
35933 fn test_tsql_cast_tinyint_parse() {
35934 let tsql = Dialect::get(DialectType::TSQL);
35935 let sql = "CAST(X AS TINYINT)";
35936 let result = tsql.transpile(sql, DialectType::DuckDB);
35937 match &result {
35938 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
35939 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
35940 }
35941 assert!(
35942 result.is_ok(),
35943 "Expected successful transpile, got error: {:?}",
35944 result.err()
35945 );
35946 }
35947
35948 #[test]
35949 fn test_pg_hash_bitwise_xor() {
35950 let dialect = Dialect::get(DialectType::PostgreSQL);
35951 let result = dialect
35952 .transpile("x # y", DialectType::PostgreSQL)
35953 .unwrap();
35954 assert_eq!(result[0], "x # y");
35955 }
35956
35957 #[test]
35958 fn test_pg_array_to_duckdb() {
35959 let dialect = Dialect::get(DialectType::PostgreSQL);
35960 let result = dialect
35961 .transpile("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
35962 .unwrap();
35963 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
35964 }
35965
35966 #[test]
35967 fn test_array_remove_bigquery() {
35968 let dialect = Dialect::get(DialectType::Generic);
35969 let result = dialect
35970 .transpile("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
35971 .unwrap();
35972 assert_eq!(
35973 result[0],
35974 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
35975 );
35976 }
35977
35978 #[test]
35979 fn test_map_clickhouse_case() {
35980 let dialect = Dialect::get(DialectType::Generic);
35981 let parsed = dialect
35982 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
35983 .unwrap();
35984 eprintln!("MAP parsed: {:?}", parsed);
35985 let result = dialect
35986 .transpile(
35987 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
35988 DialectType::ClickHouse,
35989 )
35990 .unwrap();
35991 eprintln!("MAP result: {}", result[0]);
35992 }
35993
35994 #[test]
35995 fn test_generate_date_array_presto() {
35996 let dialect = Dialect::get(DialectType::Generic);
35997 let result = dialect.transpile(
35998 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
35999 DialectType::Presto,
36000 ).unwrap();
36001 eprintln!("GDA -> Presto: {}", result[0]);
36002 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
36003 }
36004
36005 #[test]
36006 fn test_generate_date_array_postgres() {
36007 let dialect = Dialect::get(DialectType::Generic);
36008 let result = dialect.transpile(
36009 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
36010 DialectType::PostgreSQL,
36011 ).unwrap();
36012 eprintln!("GDA -> PostgreSQL: {}", result[0]);
36013 }
36014
36015 #[test]
36016 fn test_generate_date_array_snowflake() {
36017 std::thread::Builder::new()
36018 .stack_size(16 * 1024 * 1024)
36019 .spawn(|| {
36020 let dialect = Dialect::get(DialectType::Generic);
36021 let result = dialect.transpile(
36022 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
36023 DialectType::Snowflake,
36024 ).unwrap();
36025 eprintln!("GDA -> Snowflake: {}", result[0]);
36026 })
36027 .unwrap()
36028 .join()
36029 .unwrap();
36030 }
36031
36032 #[test]
36033 fn test_array_length_generate_date_array_snowflake() {
36034 let dialect = Dialect::get(DialectType::Generic);
36035 let result = dialect.transpile(
36036 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
36037 DialectType::Snowflake,
36038 ).unwrap();
36039 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
36040 }
36041
36042 #[test]
36043 fn test_generate_date_array_mysql() {
36044 let dialect = Dialect::get(DialectType::Generic);
36045 let result = dialect.transpile(
36046 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
36047 DialectType::MySQL,
36048 ).unwrap();
36049 eprintln!("GDA -> MySQL: {}", result[0]);
36050 }
36051
36052 #[test]
36053 fn test_generate_date_array_redshift() {
36054 let dialect = Dialect::get(DialectType::Generic);
36055 let result = dialect.transpile(
36056 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
36057 DialectType::Redshift,
36058 ).unwrap();
36059 eprintln!("GDA -> Redshift: {}", result[0]);
36060 }
36061
36062 #[test]
36063 fn test_generate_date_array_tsql() {
36064 let dialect = Dialect::get(DialectType::Generic);
36065 let result = dialect.transpile(
36066 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
36067 DialectType::TSQL,
36068 ).unwrap();
36069 eprintln!("GDA -> TSQL: {}", result[0]);
36070 }
36071
36072 #[test]
36073 fn test_struct_colon_syntax() {
36074 let dialect = Dialect::get(DialectType::Generic);
36075 // Test without colon first
36076 let result = dialect.transpile(
36077 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
36078 DialectType::ClickHouse,
36079 );
36080 match result {
36081 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
36082 Err(e) => eprintln!("STRUCT no colon error: {}", e),
36083 }
36084 // Now test with colon
36085 let result = dialect.transpile(
36086 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
36087 DialectType::ClickHouse,
36088 );
36089 match result {
36090 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
36091 Err(e) => eprintln!("STRUCT colon error: {}", e),
36092 }
36093 }
36094
36095 #[test]
36096 fn test_generate_date_array_cte_wrapped_mysql() {
36097 let dialect = Dialect::get(DialectType::Generic);
36098 let result = dialect.transpile(
36099 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
36100 DialectType::MySQL,
36101 ).unwrap();
36102 eprintln!("GDA CTE -> MySQL: {}", result[0]);
36103 }
36104
36105 #[test]
36106 fn test_generate_date_array_cte_wrapped_tsql() {
36107 let dialect = Dialect::get(DialectType::Generic);
36108 let result = dialect.transpile(
36109 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
36110 DialectType::TSQL,
36111 ).unwrap();
36112 eprintln!("GDA CTE -> TSQL: {}", result[0]);
36113 }
36114
36115 #[test]
36116 fn test_decode_literal_no_null_check() {
36117 // Oracle DECODE with all literals should produce simple equality, no IS NULL
36118 let dialect = Dialect::get(DialectType::Oracle);
36119 let result = dialect
36120 .transpile("SELECT decode(1,2,3,4)", DialectType::DuckDB)
36121 .unwrap();
36122 assert_eq!(
36123 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
36124 "Literal DECODE should not have IS NULL checks"
36125 );
36126 }
36127
36128 #[test]
36129 fn test_decode_column_vs_literal_no_null_check() {
36130 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
36131 let dialect = Dialect::get(DialectType::Oracle);
36132 let result = dialect
36133 .transpile("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
36134 .unwrap();
36135 assert_eq!(
36136 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
36137 "Column vs literal DECODE should not have IS NULL checks"
36138 );
36139 }
36140
36141 #[test]
36142 fn test_decode_column_vs_column_keeps_null_check() {
36143 // Oracle DECODE with column vs column should keep null-safe comparison
36144 let dialect = Dialect::get(DialectType::Oracle);
36145 let result = dialect
36146 .transpile("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
36147 .unwrap();
36148 assert!(
36149 result[0].contains("IS NULL"),
36150 "Column vs column DECODE should have IS NULL checks, got: {}",
36151 result[0]
36152 );
36153 }
36154
36155 #[test]
36156 fn test_decode_null_search() {
36157 // Oracle DECODE with NULL search should use IS NULL
36158 let dialect = Dialect::get(DialectType::Oracle);
36159 let result = dialect
36160 .transpile("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
36161 .unwrap();
36162 assert_eq!(
36163 result[0],
36164 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
36165 );
36166 }
36167
36168 // =========================================================================
36169 // REGEXP function transpilation tests
36170 // =========================================================================
36171
36172 #[test]
36173 fn test_regexp_substr_snowflake_to_duckdb_2arg() {
36174 let dialect = Dialect::get(DialectType::Snowflake);
36175 let result = dialect
36176 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern')", DialectType::DuckDB)
36177 .unwrap();
36178 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
36179 }
36180
36181 #[test]
36182 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos1() {
36183 let dialect = Dialect::get(DialectType::Snowflake);
36184 let result = dialect
36185 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 1)", DialectType::DuckDB)
36186 .unwrap();
36187 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
36188 }
36189
36190 #[test]
36191 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos_gt1() {
36192 let dialect = Dialect::get(DialectType::Snowflake);
36193 let result = dialect
36194 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 3)", DialectType::DuckDB)
36195 .unwrap();
36196 assert_eq!(
36197 result[0],
36198 "SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(s, 3), ''), 'pattern')"
36199 );
36200 }
36201
36202 #[test]
36203 fn test_regexp_substr_snowflake_to_duckdb_4arg_occ_gt1() {
36204 let dialect = Dialect::get(DialectType::Snowflake);
36205 let result = dialect
36206 .transpile(
36207 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 3)",
36208 DialectType::DuckDB,
36209 )
36210 .unwrap();
36211 assert_eq!(
36212 result[0],
36213 "SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, 'pattern'), 3)"
36214 );
36215 }
36216
36217 #[test]
36218 fn test_regexp_substr_snowflake_to_duckdb_5arg_e_flag() {
36219 let dialect = Dialect::get(DialectType::Snowflake);
36220 let result = dialect
36221 .transpile(
36222 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')",
36223 DialectType::DuckDB,
36224 )
36225 .unwrap();
36226 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
36227 }
36228
36229 #[test]
36230 fn test_regexp_substr_snowflake_to_duckdb_6arg_group0() {
36231 let dialect = Dialect::get(DialectType::Snowflake);
36232 let result = dialect
36233 .transpile(
36234 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
36235 DialectType::DuckDB,
36236 )
36237 .unwrap();
36238 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
36239 }
36240
36241 #[test]
36242 fn test_regexp_substr_snowflake_identity_strip_group0() {
36243 let dialect = Dialect::get(DialectType::Snowflake);
36244 let result = dialect
36245 .transpile(
36246 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
36247 DialectType::Snowflake,
36248 )
36249 .unwrap();
36250 assert_eq!(result[0], "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')");
36251 }
36252
36253 #[test]
36254 fn test_regexp_substr_all_snowflake_to_duckdb_2arg() {
36255 let dialect = Dialect::get(DialectType::Snowflake);
36256 let result = dialect
36257 .transpile(
36258 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')",
36259 DialectType::DuckDB,
36260 )
36261 .unwrap();
36262 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
36263 }
36264
36265 #[test]
36266 fn test_regexp_substr_all_snowflake_to_duckdb_3arg_pos_gt1() {
36267 let dialect = Dialect::get(DialectType::Snowflake);
36268 let result = dialect
36269 .transpile(
36270 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 3)",
36271 DialectType::DuckDB,
36272 )
36273 .unwrap();
36274 assert_eq!(
36275 result[0],
36276 "SELECT REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')"
36277 );
36278 }
36279
36280 #[test]
36281 fn test_regexp_substr_all_snowflake_to_duckdb_5arg_e_flag() {
36282 let dialect = Dialect::get(DialectType::Snowflake);
36283 let result = dialect
36284 .transpile(
36285 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')",
36286 DialectType::DuckDB,
36287 )
36288 .unwrap();
36289 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
36290 }
36291
36292 #[test]
36293 fn test_regexp_substr_all_snowflake_to_duckdb_6arg_group0() {
36294 let dialect = Dialect::get(DialectType::Snowflake);
36295 let result = dialect
36296 .transpile(
36297 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
36298 DialectType::DuckDB,
36299 )
36300 .unwrap();
36301 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
36302 }
36303
36304 #[test]
36305 fn test_regexp_substr_all_snowflake_identity_strip_group0() {
36306 let dialect = Dialect::get(DialectType::Snowflake);
36307 let result = dialect
36308 .transpile(
36309 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
36310 DialectType::Snowflake,
36311 )
36312 .unwrap();
36313 assert_eq!(
36314 result[0],
36315 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')"
36316 );
36317 }
36318
36319 #[test]
36320 fn test_regexp_count_snowflake_to_duckdb_2arg() {
36321 let dialect = Dialect::get(DialectType::Snowflake);
36322 let result = dialect
36323 .transpile("SELECT REGEXP_COUNT(s, 'pattern')", DialectType::DuckDB)
36324 .unwrap();
36325 assert_eq!(
36326 result[0],
36327 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, 'pattern')) END"
36328 );
36329 }
36330
36331 #[test]
36332 fn test_regexp_count_snowflake_to_duckdb_3arg() {
36333 let dialect = Dialect::get(DialectType::Snowflake);
36334 let result = dialect
36335 .transpile("SELECT REGEXP_COUNT(s, 'pattern', 3)", DialectType::DuckDB)
36336 .unwrap();
36337 assert_eq!(
36338 result[0],
36339 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')) END"
36340 );
36341 }
36342
36343 #[test]
36344 fn test_regexp_count_snowflake_to_duckdb_4arg_flags() {
36345 let dialect = Dialect::get(DialectType::Snowflake);
36346 let result = dialect
36347 .transpile(
36348 "SELECT REGEXP_COUNT(s, 'pattern', 1, 'i')",
36349 DialectType::DuckDB,
36350 )
36351 .unwrap();
36352 assert_eq!(
36353 result[0],
36354 "SELECT CASE WHEN '(?i)' || 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 1), '(?i)' || 'pattern')) END"
36355 );
36356 }
36357
36358 #[test]
36359 fn test_regexp_count_snowflake_to_duckdb_4arg_flags_literal_string() {
36360 let dialect = Dialect::get(DialectType::Snowflake);
36361 let result = dialect
36362 .transpile(
36363 "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')",
36364 DialectType::DuckDB,
36365 )
36366 .unwrap();
36367 assert_eq!(
36368 result[0],
36369 "SELECT CASE WHEN '(?im)' || 'L' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('Hello World', 1), '(?im)' || 'L')) END"
36370 );
36371 }
36372
36373 #[test]
36374 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos1_occ1() {
36375 let dialect = Dialect::get(DialectType::Snowflake);
36376 let result = dialect
36377 .transpile(
36378 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 1, 1)",
36379 DialectType::DuckDB,
36380 )
36381 .unwrap();
36382 assert_eq!(result[0], "SELECT REGEXP_REPLACE(s, 'pattern', 'repl')");
36383 }
36384
36385 #[test]
36386 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ0() {
36387 let dialect = Dialect::get(DialectType::Snowflake);
36388 let result = dialect
36389 .transpile(
36390 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 0)",
36391 DialectType::DuckDB,
36392 )
36393 .unwrap();
36394 assert_eq!(
36395 result[0],
36396 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl', 'g')"
36397 );
36398 }
36399
36400 #[test]
36401 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ1() {
36402 let dialect = Dialect::get(DialectType::Snowflake);
36403 let result = dialect
36404 .transpile(
36405 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 1)",
36406 DialectType::DuckDB,
36407 )
36408 .unwrap();
36409 assert_eq!(
36410 result[0],
36411 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl')"
36412 );
36413 }
36414
36415 #[test]
36416 fn test_rlike_snowflake_to_duckdb_2arg() {
36417 let dialect = Dialect::get(DialectType::Snowflake);
36418 let result = dialect
36419 .transpile("SELECT RLIKE(a, b)", DialectType::DuckDB)
36420 .unwrap();
36421 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b)");
36422 }
36423
36424 #[test]
36425 fn test_rlike_snowflake_to_duckdb_3arg_flags() {
36426 let dialect = Dialect::get(DialectType::Snowflake);
36427 let result = dialect
36428 .transpile("SELECT RLIKE(a, b, 'i')", DialectType::DuckDB)
36429 .unwrap();
36430 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b, 'i')");
36431 }
36432
36433 #[test]
36434 fn test_regexp_extract_all_bigquery_to_snowflake_no_capture() {
36435 let dialect = Dialect::get(DialectType::BigQuery);
36436 let result = dialect
36437 .transpile(
36438 "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')",
36439 DialectType::Snowflake,
36440 )
36441 .unwrap();
36442 assert_eq!(result[0], "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')");
36443 }
36444
36445 #[test]
36446 fn test_regexp_extract_all_bigquery_to_snowflake_with_capture() {
36447 let dialect = Dialect::get(DialectType::BigQuery);
36448 let result = dialect
36449 .transpile(
36450 "SELECT REGEXP_EXTRACT_ALL(s, '(a)[0-9]')",
36451 DialectType::Snowflake,
36452 )
36453 .unwrap();
36454 assert_eq!(
36455 result[0],
36456 "SELECT REGEXP_SUBSTR_ALL(s, '(a)[0-9]', 1, 1, 'c', 1)"
36457 );
36458 }
36459
36460 #[test]
36461 fn test_regexp_instr_snowflake_to_duckdb_2arg() {
36462 let handle = std::thread::Builder::new()
36463 .stack_size(16 * 1024 * 1024)
36464 .spawn(|| {
36465 let dialect = Dialect::get(DialectType::Snowflake);
36466 let result = dialect
36467 .transpile("SELECT REGEXP_INSTR(s, 'pattern')", DialectType::DuckDB)
36468 .unwrap();
36469 // Should produce a CASE WHEN expression
36470 assert!(
36471 result[0].contains("CASE WHEN"),
36472 "Expected CASE WHEN in result: {}",
36473 result[0]
36474 );
36475 assert!(
36476 result[0].contains("LIST_SUM"),
36477 "Expected LIST_SUM in result: {}",
36478 result[0]
36479 );
36480 })
36481 .unwrap();
36482 handle.join().unwrap();
36483 }
36484
36485 #[test]
36486 fn test_array_except_generic_to_duckdb() {
36487 // Use larger stack to avoid overflow from deeply nested expression Drop
36488 let handle = std::thread::Builder::new()
36489 .stack_size(16 * 1024 * 1024)
36490 .spawn(|| {
36491 let dialect = Dialect::get(DialectType::Generic);
36492 let result = dialect
36493 .transpile(
36494 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
36495 DialectType::DuckDB,
36496 )
36497 .unwrap();
36498 eprintln!("ARRAY_EXCEPT Generic->DuckDB: {}", result[0]);
36499 assert!(
36500 result[0].contains("CASE WHEN"),
36501 "Expected CASE WHEN: {}",
36502 result[0]
36503 );
36504 assert!(
36505 result[0].contains("LIST_FILTER"),
36506 "Expected LIST_FILTER: {}",
36507 result[0]
36508 );
36509 assert!(
36510 result[0].contains("LIST_DISTINCT"),
36511 "Expected LIST_DISTINCT: {}",
36512 result[0]
36513 );
36514 assert!(
36515 result[0].contains("IS NOT DISTINCT FROM"),
36516 "Expected IS NOT DISTINCT FROM: {}",
36517 result[0]
36518 );
36519 assert!(
36520 result[0].contains("= 0"),
36521 "Expected = 0 filter: {}",
36522 result[0]
36523 );
36524 })
36525 .unwrap();
36526 handle.join().unwrap();
36527 }
36528
36529 #[test]
36530 fn test_array_except_generic_to_snowflake() {
36531 let dialect = Dialect::get(DialectType::Generic);
36532 let result = dialect
36533 .transpile(
36534 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
36535 DialectType::Snowflake,
36536 )
36537 .unwrap();
36538 eprintln!("ARRAY_EXCEPT Generic->Snowflake: {}", result[0]);
36539 assert_eq!(result[0], "SELECT ARRAY_EXCEPT([1, 2, 3], [2])");
36540 }
36541
36542 #[test]
36543 fn test_array_except_generic_to_presto() {
36544 let dialect = Dialect::get(DialectType::Generic);
36545 let result = dialect
36546 .transpile(
36547 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
36548 DialectType::Presto,
36549 )
36550 .unwrap();
36551 eprintln!("ARRAY_EXCEPT Generic->Presto: {}", result[0]);
36552 assert_eq!(result[0], "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])");
36553 }
36554
36555 #[test]
36556 fn test_array_except_snowflake_to_duckdb() {
36557 let handle = std::thread::Builder::new()
36558 .stack_size(16 * 1024 * 1024)
36559 .spawn(|| {
36560 let dialect = Dialect::get(DialectType::Snowflake);
36561 let result = dialect
36562 .transpile("SELECT ARRAY_EXCEPT([1, 2, 3], [2])", DialectType::DuckDB)
36563 .unwrap();
36564 eprintln!("ARRAY_EXCEPT Snowflake->DuckDB: {}", result[0]);
36565 assert!(
36566 result[0].contains("CASE WHEN"),
36567 "Expected CASE WHEN: {}",
36568 result[0]
36569 );
36570 assert!(
36571 result[0].contains("LIST_TRANSFORM"),
36572 "Expected LIST_TRANSFORM: {}",
36573 result[0]
36574 );
36575 })
36576 .unwrap();
36577 handle.join().unwrap();
36578 }
36579
36580 #[test]
36581 fn test_array_contains_snowflake_to_snowflake() {
36582 let dialect = Dialect::get(DialectType::Snowflake);
36583 let result = dialect
36584 .transpile(
36585 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
36586 DialectType::Snowflake,
36587 )
36588 .unwrap();
36589 eprintln!("ARRAY_CONTAINS Snowflake->Snowflake: {}", result[0]);
36590 assert_eq!(result[0], "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])");
36591 }
36592
36593 #[test]
36594 fn test_array_contains_snowflake_to_duckdb() {
36595 let dialect = Dialect::get(DialectType::Snowflake);
36596 let result = dialect
36597 .transpile(
36598 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
36599 DialectType::DuckDB,
36600 )
36601 .unwrap();
36602 eprintln!("ARRAY_CONTAINS Snowflake->DuckDB: {}", result[0]);
36603 assert!(
36604 result[0].contains("CASE WHEN"),
36605 "Expected CASE WHEN: {}",
36606 result[0]
36607 );
36608 assert!(
36609 result[0].contains("NULLIF"),
36610 "Expected NULLIF: {}",
36611 result[0]
36612 );
36613 assert!(
36614 result[0].contains("ARRAY_CONTAINS"),
36615 "Expected ARRAY_CONTAINS: {}",
36616 result[0]
36617 );
36618 }
36619
36620 #[test]
36621 fn test_array_distinct_snowflake_to_duckdb() {
36622 let dialect = Dialect::get(DialectType::Snowflake);
36623 let result = dialect
36624 .transpile(
36625 "SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])",
36626 DialectType::DuckDB,
36627 )
36628 .unwrap();
36629 eprintln!("ARRAY_DISTINCT Snowflake->DuckDB: {}", result[0]);
36630 assert!(
36631 result[0].contains("CASE WHEN"),
36632 "Expected CASE WHEN: {}",
36633 result[0]
36634 );
36635 assert!(
36636 result[0].contains("LIST_DISTINCT"),
36637 "Expected LIST_DISTINCT: {}",
36638 result[0]
36639 );
36640 assert!(
36641 result[0].contains("LIST_APPEND"),
36642 "Expected LIST_APPEND: {}",
36643 result[0]
36644 );
36645 assert!(
36646 result[0].contains("LIST_FILTER"),
36647 "Expected LIST_FILTER: {}",
36648 result[0]
36649 );
36650 }
36651}