polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile_to`](Dialect::transpile_to) another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, FunctionBody};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Token, Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_ascii_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 0,
341 0,
342 )),
343 }
344 }
345}
346
347/// Trait that each concrete SQL dialect must implement.
348///
349/// `DialectImpl` provides the configuration hooks and per-expression transform logic
350/// that distinguish one dialect from another. Implementors supply:
351///
352/// - A [`DialectType`] identifier.
353/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
354/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
355/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
356/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
357/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
358///
359/// The default implementations are no-ops, so a minimal dialect only needs to provide
360/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
361/// standard SQL.
362pub trait DialectImpl {
363 /// Returns the [`DialectType`] that identifies this dialect.
364 fn dialect_type(&self) -> DialectType;
365
366 /// Returns the tokenizer configuration for this dialect.
367 ///
368 /// Override to customize identifier quoting characters, string escape rules,
369 /// comment styles, and other lexing behavior.
370 fn tokenizer_config(&self) -> TokenizerConfig {
371 TokenizerConfig::default()
372 }
373
374 /// Returns the generator configuration for this dialect.
375 ///
376 /// Override to customize identifier quoting style, function name casing,
377 /// keyword casing, and other SQL generation behavior.
378 fn generator_config(&self) -> GeneratorConfig {
379 GeneratorConfig::default()
380 }
381
382 /// Returns a generator configuration tailored to a specific expression.
383 ///
384 /// Override this for hybrid dialects like Athena that route to different SQL engines
385 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
386 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
387 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
388 self.generator_config()
389 }
390
391 /// Transforms a single expression node for this dialect, without recursing into children.
392 ///
393 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
394 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
395 /// typically include function renaming, operator substitution, and type mapping.
396 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
397 Ok(expr)
398 }
399
400 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
401 ///
402 /// Override this to apply structural rewrites that must see the entire tree at once,
403 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
404 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
405 fn preprocess(&self, expr: Expression) -> Result<Expression> {
406 Ok(expr)
407 }
408}
409
410/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
411/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
412///
413/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
414/// and then nested element/field types are recursed into. This ensures that dialect-level
415/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
416fn transform_data_type_recursive<F>(
417 dt: crate::expressions::DataType,
418 transform_fn: &F,
419) -> Result<crate::expressions::DataType>
420where
421 F: Fn(Expression) -> Result<Expression>,
422{
423 use crate::expressions::DataType;
424 // First, transform the outermost type through the expression system
425 let dt_expr = transform_fn(Expression::DataType(dt))?;
426 let dt = match dt_expr {
427 Expression::DataType(d) => d,
428 _ => {
429 return Ok(match dt_expr {
430 _ => DataType::Custom {
431 name: "UNKNOWN".to_string(),
432 },
433 })
434 }
435 };
436 // Then recurse into nested types
437 match dt {
438 DataType::Array {
439 element_type,
440 dimension,
441 } => {
442 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
443 Ok(DataType::Array {
444 element_type: Box::new(inner),
445 dimension,
446 })
447 }
448 DataType::List { element_type } => {
449 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
450 Ok(DataType::List {
451 element_type: Box::new(inner),
452 })
453 }
454 DataType::Struct { fields, nested } => {
455 let mut new_fields = Vec::new();
456 for mut field in fields {
457 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
458 new_fields.push(field);
459 }
460 Ok(DataType::Struct {
461 fields: new_fields,
462 nested,
463 })
464 }
465 DataType::Map {
466 key_type,
467 value_type,
468 } => {
469 let k = transform_data_type_recursive(*key_type, transform_fn)?;
470 let v = transform_data_type_recursive(*value_type, transform_fn)?;
471 Ok(DataType::Map {
472 key_type: Box::new(k),
473 value_type: Box::new(v),
474 })
475 }
476 other => Ok(other),
477 }
478}
479
480/// Convert DuckDB C-style format strings to Presto C-style format strings.
481/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
482#[cfg(feature = "transpile")]
483fn duckdb_to_presto_format(fmt: &str) -> String {
484 // Order matters: handle longer patterns first to avoid partial replacements
485 let mut result = fmt.to_string();
486 // First pass: mark multi-char patterns with placeholders
487 result = result.replace("%-m", "\x01NOPADM\x01");
488 result = result.replace("%-d", "\x01NOPADD\x01");
489 result = result.replace("%-I", "\x01NOPADI\x01");
490 result = result.replace("%-H", "\x01NOPADH\x01");
491 result = result.replace("%H:%M:%S", "\x01HMS\x01");
492 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
493 // Now convert individual specifiers
494 result = result.replace("%M", "%i");
495 result = result.replace("%S", "%s");
496 // Restore multi-char patterns with Presto equivalents
497 result = result.replace("\x01NOPADM\x01", "%c");
498 result = result.replace("\x01NOPADD\x01", "%e");
499 result = result.replace("\x01NOPADI\x01", "%l");
500 result = result.replace("\x01NOPADH\x01", "%k");
501 result = result.replace("\x01HMS\x01", "%T");
502 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
503 result
504}
505
506/// Convert DuckDB C-style format strings to BigQuery format strings.
507/// BigQuery uses a mix of strftime-like directives.
508#[cfg(feature = "transpile")]
509fn duckdb_to_bigquery_format(fmt: &str) -> String {
510 let mut result = fmt.to_string();
511 // Handle longer patterns first
512 result = result.replace("%-d", "%e");
513 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
514 result = result.replace("%Y-%m-%d", "%F");
515 result = result.replace("%H:%M:%S", "%T");
516 result
517}
518
519/// Applies a transform function bottom-up through an entire expression tree.
520///
521/// This is the core tree-rewriting engine used by the dialect system. It performs
522/// a post-order (children-first) traversal: for each node, all children are recursively
523/// transformed before the node itself is passed to `transform_fn`. This bottom-up
524/// strategy means that when `transform_fn` sees a node, its children have already
525/// been rewritten, which simplifies pattern matching on sub-expressions.
526///
527/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
528/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
529/// function calls, CASE expressions, date/time functions, and more.
530///
531/// # Arguments
532///
533/// * `expr` - The root expression to transform (consumed).
534/// * `transform_fn` - A closure that receives each expression node (after its children
535/// have been transformed) and returns a possibly-rewritten expression.
536///
537/// # Errors
538///
539/// Returns an error if `transform_fn` returns an error for any node.
540pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
541where
542 F: Fn(Expression) -> Result<Expression>,
543{
544 use crate::expressions::BinaryOp;
545
546 // Helper macro to transform binary ops with Box<BinaryOp>
547 macro_rules! transform_binary {
548 ($variant:ident, $op:expr) => {{
549 let left = transform_recursive($op.left, transform_fn)?;
550 let right = transform_recursive($op.right, transform_fn)?;
551 Expression::$variant(Box::new(BinaryOp {
552 left,
553 right,
554 left_comments: $op.left_comments,
555 operator_comments: $op.operator_comments,
556 trailing_comments: $op.trailing_comments,
557 inferred_type: $op.inferred_type,
558 }))
559 }};
560 }
561
562 // Fast path: leaf nodes never need child traversal, apply transform directly
563 if matches!(
564 &expr,
565 Expression::Literal(_)
566 | Expression::Boolean(_)
567 | Expression::Null(_)
568 | Expression::Identifier(_)
569 | Expression::Star(_)
570 | Expression::Parameter(_)
571 | Expression::Placeholder(_)
572 | Expression::SessionParameter(_)
573 ) {
574 return transform_fn(expr);
575 }
576
577 // First recursively transform children, then apply the transform function
578 let expr = match expr {
579 Expression::Select(mut select) => {
580 select.expressions = select
581 .expressions
582 .into_iter()
583 .map(|e| transform_recursive(e, transform_fn))
584 .collect::<Result<Vec<_>>>()?;
585
586 // Transform FROM clause
587 if let Some(mut from) = select.from.take() {
588 from.expressions = from
589 .expressions
590 .into_iter()
591 .map(|e| transform_recursive(e, transform_fn))
592 .collect::<Result<Vec<_>>>()?;
593 select.from = Some(from);
594 }
595
596 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
597 select.joins = select
598 .joins
599 .into_iter()
600 .map(|mut join| {
601 join.this = transform_recursive(join.this, transform_fn)?;
602 if let Some(on) = join.on.take() {
603 join.on = Some(transform_recursive(on, transform_fn)?);
604 }
605 // Wrap join in Expression::Join to allow transform_fn to transform it
606 match transform_fn(Expression::Join(Box::new(join)))? {
607 Expression::Join(j) => Ok(*j),
608 _ => Err(crate::error::Error::parse(
609 "Join transformation returned non-join expression",
610 0,
611 0,
612 0,
613 0,
614 )),
615 }
616 })
617 .collect::<Result<Vec<_>>>()?;
618
619 // Transform LATERAL VIEW expressions (Hive/Spark)
620 select.lateral_views = select
621 .lateral_views
622 .into_iter()
623 .map(|mut lv| {
624 lv.this = transform_recursive(lv.this, transform_fn)?;
625 Ok(lv)
626 })
627 .collect::<Result<Vec<_>>>()?;
628
629 // Transform WHERE clause
630 if let Some(mut where_clause) = select.where_clause.take() {
631 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
632 select.where_clause = Some(where_clause);
633 }
634
635 // Transform GROUP BY
636 if let Some(mut group_by) = select.group_by.take() {
637 group_by.expressions = group_by
638 .expressions
639 .into_iter()
640 .map(|e| transform_recursive(e, transform_fn))
641 .collect::<Result<Vec<_>>>()?;
642 select.group_by = Some(group_by);
643 }
644
645 // Transform HAVING
646 if let Some(mut having) = select.having.take() {
647 having.this = transform_recursive(having.this, transform_fn)?;
648 select.having = Some(having);
649 }
650
651 // Transform WITH (CTEs)
652 if let Some(mut with) = select.with.take() {
653 with.ctes = with
654 .ctes
655 .into_iter()
656 .map(|mut cte| {
657 let original = cte.this.clone();
658 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
659 cte
660 })
661 .collect();
662 select.with = Some(with);
663 }
664
665 // Transform ORDER BY
666 if let Some(mut order) = select.order_by.take() {
667 order.expressions = order
668 .expressions
669 .into_iter()
670 .map(|o| {
671 let mut o = o;
672 let original = o.this.clone();
673 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
674 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
675 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
676 Ok(Expression::Ordered(transformed)) => *transformed,
677 Ok(_) | Err(_) => o,
678 }
679 })
680 .collect();
681 select.order_by = Some(order);
682 }
683
684 // Transform WINDOW clause order_by
685 if let Some(ref mut windows) = select.windows {
686 for nw in windows.iter_mut() {
687 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
688 .into_iter()
689 .map(|o| {
690 let mut o = o;
691 let original = o.this.clone();
692 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
693 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
694 Ok(Expression::Ordered(transformed)) => *transformed,
695 Ok(_) | Err(_) => o,
696 }
697 })
698 .collect();
699 }
700 }
701
702 // Transform QUALIFY
703 if let Some(mut qual) = select.qualify.take() {
704 qual.this = transform_recursive(qual.this, transform_fn)?;
705 select.qualify = Some(qual);
706 }
707
708 Expression::Select(select)
709 }
710 Expression::Function(mut f) => {
711 f.args = f
712 .args
713 .into_iter()
714 .map(|e| transform_recursive(e, transform_fn))
715 .collect::<Result<Vec<_>>>()?;
716 Expression::Function(f)
717 }
718 Expression::AggregateFunction(mut f) => {
719 f.args = f
720 .args
721 .into_iter()
722 .map(|e| transform_recursive(e, transform_fn))
723 .collect::<Result<Vec<_>>>()?;
724 if let Some(filter) = f.filter {
725 f.filter = Some(transform_recursive(filter, transform_fn)?);
726 }
727 Expression::AggregateFunction(f)
728 }
729 Expression::WindowFunction(mut wf) => {
730 wf.this = transform_recursive(wf.this, transform_fn)?;
731 wf.over.partition_by = wf
732 .over
733 .partition_by
734 .into_iter()
735 .map(|e| transform_recursive(e, transform_fn))
736 .collect::<Result<Vec<_>>>()?;
737 // Transform order_by items through Expression::Ordered wrapper
738 wf.over.order_by = wf
739 .over
740 .order_by
741 .into_iter()
742 .map(|o| {
743 let mut o = o;
744 o.this = transform_recursive(o.this, transform_fn)?;
745 match transform_fn(Expression::Ordered(Box::new(o)))? {
746 Expression::Ordered(transformed) => Ok(*transformed),
747 _ => Err(crate::error::Error::parse(
748 "Ordered transformation returned non-Ordered expression",
749 0,
750 0,
751 0,
752 0,
753 )),
754 }
755 })
756 .collect::<Result<Vec<_>>>()?;
757 Expression::WindowFunction(wf)
758 }
759 Expression::Alias(mut a) => {
760 a.this = transform_recursive(a.this, transform_fn)?;
761 Expression::Alias(a)
762 }
763 Expression::Cast(mut c) => {
764 c.this = transform_recursive(c.this, transform_fn)?;
765 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
766 c.to = transform_data_type_recursive(c.to, transform_fn)?;
767 Expression::Cast(c)
768 }
769 Expression::And(op) => transform_binary!(And, *op),
770 Expression::Or(op) => transform_binary!(Or, *op),
771 Expression::Add(op) => transform_binary!(Add, *op),
772 Expression::Sub(op) => transform_binary!(Sub, *op),
773 Expression::Mul(op) => transform_binary!(Mul, *op),
774 Expression::Div(op) => transform_binary!(Div, *op),
775 Expression::Eq(op) => transform_binary!(Eq, *op),
776 Expression::Lt(op) => transform_binary!(Lt, *op),
777 Expression::Gt(op) => transform_binary!(Gt, *op),
778 Expression::Paren(mut p) => {
779 p.this = transform_recursive(p.this, transform_fn)?;
780 Expression::Paren(p)
781 }
782 Expression::Coalesce(mut f) => {
783 f.expressions = f
784 .expressions
785 .into_iter()
786 .map(|e| transform_recursive(e, transform_fn))
787 .collect::<Result<Vec<_>>>()?;
788 Expression::Coalesce(f)
789 }
790 Expression::IfNull(mut f) => {
791 f.this = transform_recursive(f.this, transform_fn)?;
792 f.expression = transform_recursive(f.expression, transform_fn)?;
793 Expression::IfNull(f)
794 }
795 Expression::Nvl(mut f) => {
796 f.this = transform_recursive(f.this, transform_fn)?;
797 f.expression = transform_recursive(f.expression, transform_fn)?;
798 Expression::Nvl(f)
799 }
800 Expression::In(mut i) => {
801 i.this = transform_recursive(i.this, transform_fn)?;
802 i.expressions = i
803 .expressions
804 .into_iter()
805 .map(|e| transform_recursive(e, transform_fn))
806 .collect::<Result<Vec<_>>>()?;
807 if let Some(query) = i.query {
808 i.query = Some(transform_recursive(query, transform_fn)?);
809 }
810 Expression::In(i)
811 }
812 Expression::Not(mut n) => {
813 n.this = transform_recursive(n.this, transform_fn)?;
814 Expression::Not(n)
815 }
816 Expression::ArraySlice(mut s) => {
817 s.this = transform_recursive(s.this, transform_fn)?;
818 if let Some(start) = s.start {
819 s.start = Some(transform_recursive(start, transform_fn)?);
820 }
821 if let Some(end) = s.end {
822 s.end = Some(transform_recursive(end, transform_fn)?);
823 }
824 Expression::ArraySlice(s)
825 }
826 Expression::Subscript(mut s) => {
827 s.this = transform_recursive(s.this, transform_fn)?;
828 s.index = transform_recursive(s.index, transform_fn)?;
829 Expression::Subscript(s)
830 }
831 Expression::Array(mut a) => {
832 a.expressions = a
833 .expressions
834 .into_iter()
835 .map(|e| transform_recursive(e, transform_fn))
836 .collect::<Result<Vec<_>>>()?;
837 Expression::Array(a)
838 }
839 Expression::Struct(mut s) => {
840 let mut new_fields = Vec::new();
841 for (name, expr) in s.fields {
842 let transformed = transform_recursive(expr, transform_fn)?;
843 new_fields.push((name, transformed));
844 }
845 s.fields = new_fields;
846 Expression::Struct(s)
847 }
848 Expression::NamedArgument(mut na) => {
849 na.value = transform_recursive(na.value, transform_fn)?;
850 Expression::NamedArgument(na)
851 }
852 Expression::MapFunc(mut m) => {
853 m.keys = m
854 .keys
855 .into_iter()
856 .map(|e| transform_recursive(e, transform_fn))
857 .collect::<Result<Vec<_>>>()?;
858 m.values = m
859 .values
860 .into_iter()
861 .map(|e| transform_recursive(e, transform_fn))
862 .collect::<Result<Vec<_>>>()?;
863 Expression::MapFunc(m)
864 }
865 Expression::ArrayFunc(mut a) => {
866 a.expressions = a
867 .expressions
868 .into_iter()
869 .map(|e| transform_recursive(e, transform_fn))
870 .collect::<Result<Vec<_>>>()?;
871 Expression::ArrayFunc(a)
872 }
873 Expression::Lambda(mut l) => {
874 l.body = transform_recursive(l.body, transform_fn)?;
875 Expression::Lambda(l)
876 }
877 Expression::JsonExtract(mut f) => {
878 f.this = transform_recursive(f.this, transform_fn)?;
879 f.path = transform_recursive(f.path, transform_fn)?;
880 Expression::JsonExtract(f)
881 }
882 Expression::JsonExtractScalar(mut f) => {
883 f.this = transform_recursive(f.this, transform_fn)?;
884 f.path = transform_recursive(f.path, transform_fn)?;
885 Expression::JsonExtractScalar(f)
886 }
887
888 // ===== UnaryFunc-based expressions =====
889 // These all have a single `this: Expression` child
890 Expression::Length(mut f) => {
891 f.this = transform_recursive(f.this, transform_fn)?;
892 Expression::Length(f)
893 }
894 Expression::Upper(mut f) => {
895 f.this = transform_recursive(f.this, transform_fn)?;
896 Expression::Upper(f)
897 }
898 Expression::Lower(mut f) => {
899 f.this = transform_recursive(f.this, transform_fn)?;
900 Expression::Lower(f)
901 }
902 Expression::LTrim(mut f) => {
903 f.this = transform_recursive(f.this, transform_fn)?;
904 Expression::LTrim(f)
905 }
906 Expression::RTrim(mut f) => {
907 f.this = transform_recursive(f.this, transform_fn)?;
908 Expression::RTrim(f)
909 }
910 Expression::Reverse(mut f) => {
911 f.this = transform_recursive(f.this, transform_fn)?;
912 Expression::Reverse(f)
913 }
914 Expression::Abs(mut f) => {
915 f.this = transform_recursive(f.this, transform_fn)?;
916 Expression::Abs(f)
917 }
918 Expression::Ceil(mut f) => {
919 f.this = transform_recursive(f.this, transform_fn)?;
920 Expression::Ceil(f)
921 }
922 Expression::Floor(mut f) => {
923 f.this = transform_recursive(f.this, transform_fn)?;
924 Expression::Floor(f)
925 }
926 Expression::Sign(mut f) => {
927 f.this = transform_recursive(f.this, transform_fn)?;
928 Expression::Sign(f)
929 }
930 Expression::Sqrt(mut f) => {
931 f.this = transform_recursive(f.this, transform_fn)?;
932 Expression::Sqrt(f)
933 }
934 Expression::Cbrt(mut f) => {
935 f.this = transform_recursive(f.this, transform_fn)?;
936 Expression::Cbrt(f)
937 }
938 Expression::Ln(mut f) => {
939 f.this = transform_recursive(f.this, transform_fn)?;
940 Expression::Ln(f)
941 }
942 Expression::Log(mut f) => {
943 f.this = transform_recursive(f.this, transform_fn)?;
944 if let Some(base) = f.base {
945 f.base = Some(transform_recursive(base, transform_fn)?);
946 }
947 Expression::Log(f)
948 }
949 Expression::Exp(mut f) => {
950 f.this = transform_recursive(f.this, transform_fn)?;
951 Expression::Exp(f)
952 }
953 Expression::Date(mut f) => {
954 f.this = transform_recursive(f.this, transform_fn)?;
955 Expression::Date(f)
956 }
957 Expression::Stddev(mut f) => {
958 f.this = transform_recursive(f.this, transform_fn)?;
959 Expression::Stddev(f)
960 }
961 Expression::Variance(mut f) => {
962 f.this = transform_recursive(f.this, transform_fn)?;
963 Expression::Variance(f)
964 }
965
966 // ===== BinaryFunc-based expressions =====
967 Expression::ModFunc(mut f) => {
968 f.this = transform_recursive(f.this, transform_fn)?;
969 f.expression = transform_recursive(f.expression, transform_fn)?;
970 Expression::ModFunc(f)
971 }
972 Expression::Power(mut f) => {
973 f.this = transform_recursive(f.this, transform_fn)?;
974 f.expression = transform_recursive(f.expression, transform_fn)?;
975 Expression::Power(f)
976 }
977 Expression::MapFromArrays(mut f) => {
978 f.this = transform_recursive(f.this, transform_fn)?;
979 f.expression = transform_recursive(f.expression, transform_fn)?;
980 Expression::MapFromArrays(f)
981 }
982 Expression::ElementAt(mut f) => {
983 f.this = transform_recursive(f.this, transform_fn)?;
984 f.expression = transform_recursive(f.expression, transform_fn)?;
985 Expression::ElementAt(f)
986 }
987 Expression::MapContainsKey(mut f) => {
988 f.this = transform_recursive(f.this, transform_fn)?;
989 f.expression = transform_recursive(f.expression, transform_fn)?;
990 Expression::MapContainsKey(f)
991 }
992 Expression::Left(mut f) => {
993 f.this = transform_recursive(f.this, transform_fn)?;
994 f.length = transform_recursive(f.length, transform_fn)?;
995 Expression::Left(f)
996 }
997 Expression::Right(mut f) => {
998 f.this = transform_recursive(f.this, transform_fn)?;
999 f.length = transform_recursive(f.length, transform_fn)?;
1000 Expression::Right(f)
1001 }
1002 Expression::Repeat(mut f) => {
1003 f.this = transform_recursive(f.this, transform_fn)?;
1004 f.times = transform_recursive(f.times, transform_fn)?;
1005 Expression::Repeat(f)
1006 }
1007
1008 // ===== Complex function expressions =====
1009 Expression::Substring(mut f) => {
1010 f.this = transform_recursive(f.this, transform_fn)?;
1011 f.start = transform_recursive(f.start, transform_fn)?;
1012 if let Some(len) = f.length {
1013 f.length = Some(transform_recursive(len, transform_fn)?);
1014 }
1015 Expression::Substring(f)
1016 }
1017 Expression::Replace(mut f) => {
1018 f.this = transform_recursive(f.this, transform_fn)?;
1019 f.old = transform_recursive(f.old, transform_fn)?;
1020 f.new = transform_recursive(f.new, transform_fn)?;
1021 Expression::Replace(f)
1022 }
1023 Expression::ConcatWs(mut f) => {
1024 f.separator = transform_recursive(f.separator, transform_fn)?;
1025 f.expressions = f
1026 .expressions
1027 .into_iter()
1028 .map(|e| transform_recursive(e, transform_fn))
1029 .collect::<Result<Vec<_>>>()?;
1030 Expression::ConcatWs(f)
1031 }
1032 Expression::Trim(mut f) => {
1033 f.this = transform_recursive(f.this, transform_fn)?;
1034 if let Some(chars) = f.characters {
1035 f.characters = Some(transform_recursive(chars, transform_fn)?);
1036 }
1037 Expression::Trim(f)
1038 }
1039 Expression::Split(mut f) => {
1040 f.this = transform_recursive(f.this, transform_fn)?;
1041 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
1042 Expression::Split(f)
1043 }
1044 Expression::Lpad(mut f) => {
1045 f.this = transform_recursive(f.this, transform_fn)?;
1046 f.length = transform_recursive(f.length, transform_fn)?;
1047 if let Some(fill) = f.fill {
1048 f.fill = Some(transform_recursive(fill, transform_fn)?);
1049 }
1050 Expression::Lpad(f)
1051 }
1052 Expression::Rpad(mut f) => {
1053 f.this = transform_recursive(f.this, transform_fn)?;
1054 f.length = transform_recursive(f.length, transform_fn)?;
1055 if let Some(fill) = f.fill {
1056 f.fill = Some(transform_recursive(fill, transform_fn)?);
1057 }
1058 Expression::Rpad(f)
1059 }
1060
1061 // ===== Conditional expressions =====
1062 Expression::Case(mut c) => {
1063 if let Some(operand) = c.operand {
1064 c.operand = Some(transform_recursive(operand, transform_fn)?);
1065 }
1066 c.whens = c
1067 .whens
1068 .into_iter()
1069 .map(|(cond, then)| {
1070 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
1071 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
1072 (new_cond, new_then)
1073 })
1074 .collect();
1075 if let Some(else_expr) = c.else_ {
1076 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
1077 }
1078 Expression::Case(c)
1079 }
1080 Expression::IfFunc(mut f) => {
1081 f.condition = transform_recursive(f.condition, transform_fn)?;
1082 f.true_value = transform_recursive(f.true_value, transform_fn)?;
1083 if let Some(false_val) = f.false_value {
1084 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
1085 }
1086 Expression::IfFunc(f)
1087 }
1088
1089 // ===== Date/Time expressions =====
1090 Expression::DateAdd(mut f) => {
1091 f.this = transform_recursive(f.this, transform_fn)?;
1092 f.interval = transform_recursive(f.interval, transform_fn)?;
1093 Expression::DateAdd(f)
1094 }
1095 Expression::DateSub(mut f) => {
1096 f.this = transform_recursive(f.this, transform_fn)?;
1097 f.interval = transform_recursive(f.interval, transform_fn)?;
1098 Expression::DateSub(f)
1099 }
1100 Expression::DateDiff(mut f) => {
1101 f.this = transform_recursive(f.this, transform_fn)?;
1102 f.expression = transform_recursive(f.expression, transform_fn)?;
1103 Expression::DateDiff(f)
1104 }
1105 Expression::DateTrunc(mut f) => {
1106 f.this = transform_recursive(f.this, transform_fn)?;
1107 Expression::DateTrunc(f)
1108 }
1109 Expression::Extract(mut f) => {
1110 f.this = transform_recursive(f.this, transform_fn)?;
1111 Expression::Extract(f)
1112 }
1113
1114 // ===== JSON expressions =====
1115 Expression::JsonObject(mut f) => {
1116 f.pairs = f
1117 .pairs
1118 .into_iter()
1119 .map(|(k, v)| {
1120 let new_k = transform_recursive(k, transform_fn)?;
1121 let new_v = transform_recursive(v, transform_fn)?;
1122 Ok((new_k, new_v))
1123 })
1124 .collect::<Result<Vec<_>>>()?;
1125 Expression::JsonObject(f)
1126 }
1127
1128 // ===== Subquery expressions =====
1129 Expression::Subquery(mut s) => {
1130 s.this = transform_recursive(s.this, transform_fn)?;
1131 Expression::Subquery(s)
1132 }
1133 Expression::Exists(mut e) => {
1134 e.this = transform_recursive(e.this, transform_fn)?;
1135 Expression::Exists(e)
1136 }
1137
1138 // ===== Set operations =====
1139 Expression::Union(mut u) => {
1140 u.left = transform_recursive(u.left, transform_fn)?;
1141 u.right = transform_recursive(u.right, transform_fn)?;
1142 Expression::Union(u)
1143 }
1144 Expression::Intersect(mut i) => {
1145 i.left = transform_recursive(i.left, transform_fn)?;
1146 i.right = transform_recursive(i.right, transform_fn)?;
1147 Expression::Intersect(i)
1148 }
1149 Expression::Except(mut e) => {
1150 e.left = transform_recursive(e.left, transform_fn)?;
1151 e.right = transform_recursive(e.right, transform_fn)?;
1152 Expression::Except(e)
1153 }
1154
1155 // ===== DML expressions =====
1156 Expression::Insert(mut ins) => {
1157 // Transform VALUES clause expressions
1158 let mut new_values = Vec::new();
1159 for row in ins.values {
1160 let mut new_row = Vec::new();
1161 for e in row {
1162 new_row.push(transform_recursive(e, transform_fn)?);
1163 }
1164 new_values.push(new_row);
1165 }
1166 ins.values = new_values;
1167
1168 // Transform query (for INSERT ... SELECT)
1169 if let Some(query) = ins.query {
1170 ins.query = Some(transform_recursive(query, transform_fn)?);
1171 }
1172
1173 // Transform RETURNING clause
1174 let mut new_returning = Vec::new();
1175 for e in ins.returning {
1176 new_returning.push(transform_recursive(e, transform_fn)?);
1177 }
1178 ins.returning = new_returning;
1179
1180 // Transform ON CONFLICT clause
1181 if let Some(on_conflict) = ins.on_conflict {
1182 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
1183 }
1184
1185 Expression::Insert(ins)
1186 }
1187 Expression::Update(mut upd) => {
1188 upd.set = upd
1189 .set
1190 .into_iter()
1191 .map(|(id, val)| {
1192 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1193 (id, new_val)
1194 })
1195 .collect();
1196 if let Some(mut where_clause) = upd.where_clause.take() {
1197 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1198 upd.where_clause = Some(where_clause);
1199 }
1200 Expression::Update(upd)
1201 }
1202 Expression::Delete(mut del) => {
1203 if let Some(mut where_clause) = del.where_clause.take() {
1204 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1205 del.where_clause = Some(where_clause);
1206 }
1207 Expression::Delete(del)
1208 }
1209
1210 // ===== CTE expressions =====
1211 Expression::With(mut w) => {
1212 w.ctes = w
1213 .ctes
1214 .into_iter()
1215 .map(|mut cte| {
1216 let original = cte.this.clone();
1217 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1218 cte
1219 })
1220 .collect();
1221 Expression::With(w)
1222 }
1223 Expression::Cte(mut c) => {
1224 c.this = transform_recursive(c.this, transform_fn)?;
1225 Expression::Cte(c)
1226 }
1227
1228 // ===== Order expressions =====
1229 Expression::Ordered(mut o) => {
1230 o.this = transform_recursive(o.this, transform_fn)?;
1231 Expression::Ordered(o)
1232 }
1233
1234 // ===== Negation =====
1235 Expression::Neg(mut n) => {
1236 n.this = transform_recursive(n.this, transform_fn)?;
1237 Expression::Neg(n)
1238 }
1239
1240 // ===== Between =====
1241 Expression::Between(mut b) => {
1242 b.this = transform_recursive(b.this, transform_fn)?;
1243 b.low = transform_recursive(b.low, transform_fn)?;
1244 b.high = transform_recursive(b.high, transform_fn)?;
1245 Expression::Between(b)
1246 }
1247 Expression::IsNull(mut i) => {
1248 i.this = transform_recursive(i.this, transform_fn)?;
1249 Expression::IsNull(i)
1250 }
1251 Expression::IsTrue(mut i) => {
1252 i.this = transform_recursive(i.this, transform_fn)?;
1253 Expression::IsTrue(i)
1254 }
1255 Expression::IsFalse(mut i) => {
1256 i.this = transform_recursive(i.this, transform_fn)?;
1257 Expression::IsFalse(i)
1258 }
1259
1260 // ===== Like expressions =====
1261 Expression::Like(mut l) => {
1262 l.left = transform_recursive(l.left, transform_fn)?;
1263 l.right = transform_recursive(l.right, transform_fn)?;
1264 Expression::Like(l)
1265 }
1266 Expression::ILike(mut l) => {
1267 l.left = transform_recursive(l.left, transform_fn)?;
1268 l.right = transform_recursive(l.right, transform_fn)?;
1269 Expression::ILike(l)
1270 }
1271
1272 // ===== Additional binary ops not covered by macro =====
1273 Expression::Neq(op) => transform_binary!(Neq, *op),
1274 Expression::Lte(op) => transform_binary!(Lte, *op),
1275 Expression::Gte(op) => transform_binary!(Gte, *op),
1276 Expression::Mod(op) => transform_binary!(Mod, *op),
1277 Expression::Concat(op) => transform_binary!(Concat, *op),
1278 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1279 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1280 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1281 Expression::Is(op) => transform_binary!(Is, *op),
1282
1283 // ===== TryCast / SafeCast =====
1284 Expression::TryCast(mut c) => {
1285 c.this = transform_recursive(c.this, transform_fn)?;
1286 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1287 Expression::TryCast(c)
1288 }
1289 Expression::SafeCast(mut c) => {
1290 c.this = transform_recursive(c.this, transform_fn)?;
1291 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1292 Expression::SafeCast(c)
1293 }
1294
1295 // ===== Misc =====
1296 Expression::Unnest(mut f) => {
1297 f.this = transform_recursive(f.this, transform_fn)?;
1298 f.expressions = f
1299 .expressions
1300 .into_iter()
1301 .map(|e| transform_recursive(e, transform_fn))
1302 .collect::<Result<Vec<_>>>()?;
1303 Expression::Unnest(f)
1304 }
1305 Expression::Explode(mut f) => {
1306 f.this = transform_recursive(f.this, transform_fn)?;
1307 Expression::Explode(f)
1308 }
1309 Expression::GroupConcat(mut f) => {
1310 f.this = transform_recursive(f.this, transform_fn)?;
1311 Expression::GroupConcat(f)
1312 }
1313 Expression::StringAgg(mut f) => {
1314 f.this = transform_recursive(f.this, transform_fn)?;
1315 Expression::StringAgg(f)
1316 }
1317 Expression::ListAgg(mut f) => {
1318 f.this = transform_recursive(f.this, transform_fn)?;
1319 Expression::ListAgg(f)
1320 }
1321 Expression::ArrayAgg(mut f) => {
1322 f.this = transform_recursive(f.this, transform_fn)?;
1323 Expression::ArrayAgg(f)
1324 }
1325 Expression::ParseJson(mut f) => {
1326 f.this = transform_recursive(f.this, transform_fn)?;
1327 Expression::ParseJson(f)
1328 }
1329 Expression::ToJson(mut f) => {
1330 f.this = transform_recursive(f.this, transform_fn)?;
1331 Expression::ToJson(f)
1332 }
1333 Expression::JSONExtract(mut e) => {
1334 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1335 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1336 Expression::JSONExtract(e)
1337 }
1338 Expression::JSONExtractScalar(mut e) => {
1339 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1340 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1341 Expression::JSONExtractScalar(e)
1342 }
1343
1344 // StrToTime: recurse into this
1345 Expression::StrToTime(mut e) => {
1346 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1347 Expression::StrToTime(e)
1348 }
1349
1350 // UnixToTime: recurse into this
1351 Expression::UnixToTime(mut e) => {
1352 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1353 Expression::UnixToTime(e)
1354 }
1355
1356 // CreateTable: recurse into column defaults, on_update expressions, and data types
1357 Expression::CreateTable(mut ct) => {
1358 for col in &mut ct.columns {
1359 if let Some(default_expr) = col.default.take() {
1360 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1361 }
1362 if let Some(on_update_expr) = col.on_update.take() {
1363 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1364 }
1365 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1366 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1367 // contexts and may not produce correct results for DDL column definitions.
1368 // The DDL type mappings would need dedicated handling per source/target pair.
1369 }
1370 if let Some(as_select) = ct.as_select.take() {
1371 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1372 }
1373 Expression::CreateTable(ct)
1374 }
1375
1376 // CreateProcedure: recurse into body expressions
1377 Expression::CreateProcedure(mut cp) => {
1378 if let Some(body) = cp.body.take() {
1379 cp.body = Some(match body {
1380 FunctionBody::Expression(expr) => {
1381 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1382 }
1383 FunctionBody::Return(expr) => {
1384 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1385 }
1386 FunctionBody::Statements(stmts) => {
1387 let transformed_stmts = stmts
1388 .into_iter()
1389 .map(|s| transform_recursive(s, transform_fn))
1390 .collect::<Result<Vec<_>>>()?;
1391 FunctionBody::Statements(transformed_stmts)
1392 }
1393 other => other,
1394 });
1395 }
1396 Expression::CreateProcedure(cp)
1397 }
1398
1399 // CreateFunction: recurse into body expressions
1400 Expression::CreateFunction(mut cf) => {
1401 if let Some(body) = cf.body.take() {
1402 cf.body = Some(match body {
1403 FunctionBody::Expression(expr) => {
1404 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1405 }
1406 FunctionBody::Return(expr) => {
1407 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1408 }
1409 FunctionBody::Statements(stmts) => {
1410 let transformed_stmts = stmts
1411 .into_iter()
1412 .map(|s| transform_recursive(s, transform_fn))
1413 .collect::<Result<Vec<_>>>()?;
1414 FunctionBody::Statements(transformed_stmts)
1415 }
1416 other => other,
1417 });
1418 }
1419 Expression::CreateFunction(cf)
1420 }
1421
1422 // MemberOf: recurse into left and right operands
1423 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1424 // ArrayContainsAll (@>): recurse into left and right operands
1425 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1426 // ArrayContainedBy (<@): recurse into left and right operands
1427 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1428 // ArrayOverlaps (&&): recurse into left and right operands
1429 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1430 // TsMatch (@@): recurse into left and right operands
1431 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1432 // Adjacent (-|-): recurse into left and right operands
1433 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1434
1435 // Table: recurse into when (HistoricalData) and changes fields
1436 Expression::Table(mut t) => {
1437 if let Some(when) = t.when.take() {
1438 let transformed =
1439 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1440 if let Expression::HistoricalData(hd) = transformed {
1441 t.when = Some(hd);
1442 }
1443 }
1444 if let Some(changes) = t.changes.take() {
1445 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1446 if let Expression::Changes(c) = transformed {
1447 t.changes = Some(c);
1448 }
1449 }
1450 Expression::Table(t)
1451 }
1452
1453 // HistoricalData (Snowflake time travel): recurse into expression
1454 Expression::HistoricalData(mut hd) => {
1455 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1456 Expression::HistoricalData(hd)
1457 }
1458
1459 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1460 Expression::Changes(mut c) => {
1461 if let Some(at_before) = c.at_before.take() {
1462 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1463 }
1464 if let Some(end) = c.end.take() {
1465 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1466 }
1467 Expression::Changes(c)
1468 }
1469
1470 // TableArgument: TABLE(expr) or MODEL(expr)
1471 Expression::TableArgument(mut ta) => {
1472 ta.this = transform_recursive(ta.this, transform_fn)?;
1473 Expression::TableArgument(ta)
1474 }
1475
1476 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1477 Expression::JoinedTable(mut jt) => {
1478 jt.left = transform_recursive(jt.left, transform_fn)?;
1479 for join in &mut jt.joins {
1480 join.this = transform_recursive(
1481 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
1482 transform_fn,
1483 )?;
1484 if let Some(on) = join.on.take() {
1485 join.on = Some(transform_recursive(on, transform_fn)?);
1486 }
1487 }
1488 jt.lateral_views = jt
1489 .lateral_views
1490 .into_iter()
1491 .map(|mut lv| {
1492 lv.this = transform_recursive(lv.this, transform_fn)?;
1493 Ok(lv)
1494 })
1495 .collect::<Result<Vec<_>>>()?;
1496 Expression::JoinedTable(jt)
1497 }
1498
1499 // Lateral: LATERAL func() - recurse into the function expression
1500 Expression::Lateral(mut lat) => {
1501 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1502 Expression::Lateral(lat)
1503 }
1504
1505 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1506 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1507 // as a unit together with the WithinGroup wrapper
1508 Expression::WithinGroup(mut wg) => {
1509 wg.order_by = wg
1510 .order_by
1511 .into_iter()
1512 .map(|mut o| {
1513 let original = o.this.clone();
1514 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1515 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1516 Ok(Expression::Ordered(transformed)) => *transformed,
1517 Ok(_) | Err(_) => o,
1518 }
1519 })
1520 .collect();
1521 Expression::WithinGroup(wg)
1522 }
1523
1524 // Filter: recurse into both the aggregate and the filter condition
1525 Expression::Filter(mut f) => {
1526 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1527 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1528 Expression::Filter(f)
1529 }
1530
1531 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1532 Expression::BitwiseOrAgg(mut f) => {
1533 f.this = transform_recursive(f.this, transform_fn)?;
1534 Expression::BitwiseOrAgg(f)
1535 }
1536 Expression::BitwiseAndAgg(mut f) => {
1537 f.this = transform_recursive(f.this, transform_fn)?;
1538 Expression::BitwiseAndAgg(f)
1539 }
1540 Expression::BitwiseXorAgg(mut f) => {
1541 f.this = transform_recursive(f.this, transform_fn)?;
1542 Expression::BitwiseXorAgg(f)
1543 }
1544 Expression::PipeOperator(mut pipe) => {
1545 pipe.this = transform_recursive(pipe.this, transform_fn)?;
1546 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
1547 Expression::PipeOperator(pipe)
1548 }
1549
1550 // ArrayExcept/ArrayContains/ArrayDistinct: recurse into children
1551 Expression::ArrayExcept(mut f) => {
1552 f.this = transform_recursive(f.this, transform_fn)?;
1553 f.expression = transform_recursive(f.expression, transform_fn)?;
1554 Expression::ArrayExcept(f)
1555 }
1556 Expression::ArrayContains(mut f) => {
1557 f.this = transform_recursive(f.this, transform_fn)?;
1558 f.expression = transform_recursive(f.expression, transform_fn)?;
1559 Expression::ArrayContains(f)
1560 }
1561 Expression::ArrayDistinct(mut f) => {
1562 f.this = transform_recursive(f.this, transform_fn)?;
1563 Expression::ArrayDistinct(f)
1564 }
1565
1566 // Pass through leaf nodes unchanged
1567 other => other,
1568 };
1569
1570 // Then apply the transform function
1571 transform_fn(expr)
1572}
1573
1574/// Returns the tokenizer config, generator config, and expression transform closure
1575/// for a built-in dialect type. This is the shared implementation used by both
1576/// `Dialect::get()` and custom dialect construction.
1577// ---------------------------------------------------------------------------
1578// Cached dialect configurations
1579// ---------------------------------------------------------------------------
1580
1581/// Pre-computed tokenizer + generator configs for a dialect, cached via `LazyLock`.
1582/// Transform closures are cheap (unit-struct method calls) and created fresh each time.
1583struct CachedDialectConfig {
1584 tokenizer_config: TokenizerConfig,
1585 generator_config: GeneratorConfig,
1586}
1587
1588/// Declare a per-dialect `LazyLock<CachedDialectConfig>` static.
1589macro_rules! cached_dialect {
1590 ($static_name:ident, $dialect_struct:expr, $feature:literal) => {
1591 #[cfg(feature = $feature)]
1592 static $static_name: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
1593 let d = $dialect_struct;
1594 CachedDialectConfig {
1595 tokenizer_config: d.tokenizer_config(),
1596 generator_config: d.generator_config(),
1597 }
1598 });
1599 };
1600}
1601
1602static CACHED_GENERIC: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
1603 let d = GenericDialect;
1604 CachedDialectConfig {
1605 tokenizer_config: d.tokenizer_config(),
1606 generator_config: d.generator_config(),
1607 }
1608});
1609
1610cached_dialect!(CACHED_POSTGRESQL, PostgresDialect, "dialect-postgresql");
1611cached_dialect!(CACHED_MYSQL, MySQLDialect, "dialect-mysql");
1612cached_dialect!(CACHED_BIGQUERY, BigQueryDialect, "dialect-bigquery");
1613cached_dialect!(CACHED_SNOWFLAKE, SnowflakeDialect, "dialect-snowflake");
1614cached_dialect!(CACHED_DUCKDB, DuckDBDialect, "dialect-duckdb");
1615cached_dialect!(CACHED_TSQL, TSQLDialect, "dialect-tsql");
1616cached_dialect!(CACHED_ORACLE, OracleDialect, "dialect-oracle");
1617cached_dialect!(CACHED_HIVE, HiveDialect, "dialect-hive");
1618cached_dialect!(CACHED_SPARK, SparkDialect, "dialect-spark");
1619cached_dialect!(CACHED_SQLITE, SQLiteDialect, "dialect-sqlite");
1620cached_dialect!(CACHED_PRESTO, PrestoDialect, "dialect-presto");
1621cached_dialect!(CACHED_TRINO, TrinoDialect, "dialect-trino");
1622cached_dialect!(CACHED_REDSHIFT, RedshiftDialect, "dialect-redshift");
1623cached_dialect!(CACHED_CLICKHOUSE, ClickHouseDialect, "dialect-clickhouse");
1624cached_dialect!(CACHED_DATABRICKS, DatabricksDialect, "dialect-databricks");
1625cached_dialect!(CACHED_ATHENA, AthenaDialect, "dialect-athena");
1626cached_dialect!(CACHED_TERADATA, TeradataDialect, "dialect-teradata");
1627cached_dialect!(CACHED_DORIS, DorisDialect, "dialect-doris");
1628cached_dialect!(CACHED_STARROCKS, StarRocksDialect, "dialect-starrocks");
1629cached_dialect!(CACHED_MATERIALIZE, MaterializeDialect, "dialect-materialize");
1630cached_dialect!(CACHED_RISINGWAVE, RisingWaveDialect, "dialect-risingwave");
1631cached_dialect!(CACHED_SINGLESTORE, SingleStoreDialect, "dialect-singlestore");
1632cached_dialect!(CACHED_COCKROACHDB, CockroachDBDialect, "dialect-cockroachdb");
1633cached_dialect!(CACHED_TIDB, TiDBDialect, "dialect-tidb");
1634cached_dialect!(CACHED_DRUID, DruidDialect, "dialect-druid");
1635cached_dialect!(CACHED_SOLR, SolrDialect, "dialect-solr");
1636cached_dialect!(CACHED_TABLEAU, TableauDialect, "dialect-tableau");
1637cached_dialect!(CACHED_DUNE, DuneDialect, "dialect-dune");
1638cached_dialect!(CACHED_FABRIC, FabricDialect, "dialect-fabric");
1639cached_dialect!(CACHED_DRILL, DrillDialect, "dialect-drill");
1640cached_dialect!(CACHED_DREMIO, DremioDialect, "dialect-dremio");
1641cached_dialect!(CACHED_EXASOL, ExasolDialect, "dialect-exasol");
1642cached_dialect!(CACHED_DATAFUSION, DataFusionDialect, "dialect-datafusion");
1643
1644fn configs_for_dialect_type(
1645 dt: DialectType,
1646) -> (
1647 TokenizerConfig,
1648 GeneratorConfig,
1649 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1650) {
1651 /// Clone configs from a cached static and pair with a fresh transform closure.
1652 macro_rules! from_cache {
1653 ($cache:expr, $dialect_struct:expr) => {{
1654 let c = &*$cache;
1655 (
1656 c.tokenizer_config.clone(),
1657 c.generator_config.clone(),
1658 Box::new(move |e| $dialect_struct.transform_expr(e)),
1659 )
1660 }};
1661 }
1662 match dt {
1663 #[cfg(feature = "dialect-postgresql")]
1664 DialectType::PostgreSQL => from_cache!(CACHED_POSTGRESQL, PostgresDialect),
1665 #[cfg(feature = "dialect-mysql")]
1666 DialectType::MySQL => from_cache!(CACHED_MYSQL, MySQLDialect),
1667 #[cfg(feature = "dialect-bigquery")]
1668 DialectType::BigQuery => from_cache!(CACHED_BIGQUERY, BigQueryDialect),
1669 #[cfg(feature = "dialect-snowflake")]
1670 DialectType::Snowflake => from_cache!(CACHED_SNOWFLAKE, SnowflakeDialect),
1671 #[cfg(feature = "dialect-duckdb")]
1672 DialectType::DuckDB => from_cache!(CACHED_DUCKDB, DuckDBDialect),
1673 #[cfg(feature = "dialect-tsql")]
1674 DialectType::TSQL => from_cache!(CACHED_TSQL, TSQLDialect),
1675 #[cfg(feature = "dialect-oracle")]
1676 DialectType::Oracle => from_cache!(CACHED_ORACLE, OracleDialect),
1677 #[cfg(feature = "dialect-hive")]
1678 DialectType::Hive => from_cache!(CACHED_HIVE, HiveDialect),
1679 #[cfg(feature = "dialect-spark")]
1680 DialectType::Spark => from_cache!(CACHED_SPARK, SparkDialect),
1681 #[cfg(feature = "dialect-sqlite")]
1682 DialectType::SQLite => from_cache!(CACHED_SQLITE, SQLiteDialect),
1683 #[cfg(feature = "dialect-presto")]
1684 DialectType::Presto => from_cache!(CACHED_PRESTO, PrestoDialect),
1685 #[cfg(feature = "dialect-trino")]
1686 DialectType::Trino => from_cache!(CACHED_TRINO, TrinoDialect),
1687 #[cfg(feature = "dialect-redshift")]
1688 DialectType::Redshift => from_cache!(CACHED_REDSHIFT, RedshiftDialect),
1689 #[cfg(feature = "dialect-clickhouse")]
1690 DialectType::ClickHouse => from_cache!(CACHED_CLICKHOUSE, ClickHouseDialect),
1691 #[cfg(feature = "dialect-databricks")]
1692 DialectType::Databricks => from_cache!(CACHED_DATABRICKS, DatabricksDialect),
1693 #[cfg(feature = "dialect-athena")]
1694 DialectType::Athena => from_cache!(CACHED_ATHENA, AthenaDialect),
1695 #[cfg(feature = "dialect-teradata")]
1696 DialectType::Teradata => from_cache!(CACHED_TERADATA, TeradataDialect),
1697 #[cfg(feature = "dialect-doris")]
1698 DialectType::Doris => from_cache!(CACHED_DORIS, DorisDialect),
1699 #[cfg(feature = "dialect-starrocks")]
1700 DialectType::StarRocks => from_cache!(CACHED_STARROCKS, StarRocksDialect),
1701 #[cfg(feature = "dialect-materialize")]
1702 DialectType::Materialize => from_cache!(CACHED_MATERIALIZE, MaterializeDialect),
1703 #[cfg(feature = "dialect-risingwave")]
1704 DialectType::RisingWave => from_cache!(CACHED_RISINGWAVE, RisingWaveDialect),
1705 #[cfg(feature = "dialect-singlestore")]
1706 DialectType::SingleStore => from_cache!(CACHED_SINGLESTORE, SingleStoreDialect),
1707 #[cfg(feature = "dialect-cockroachdb")]
1708 DialectType::CockroachDB => from_cache!(CACHED_COCKROACHDB, CockroachDBDialect),
1709 #[cfg(feature = "dialect-tidb")]
1710 DialectType::TiDB => from_cache!(CACHED_TIDB, TiDBDialect),
1711 #[cfg(feature = "dialect-druid")]
1712 DialectType::Druid => from_cache!(CACHED_DRUID, DruidDialect),
1713 #[cfg(feature = "dialect-solr")]
1714 DialectType::Solr => from_cache!(CACHED_SOLR, SolrDialect),
1715 #[cfg(feature = "dialect-tableau")]
1716 DialectType::Tableau => from_cache!(CACHED_TABLEAU, TableauDialect),
1717 #[cfg(feature = "dialect-dune")]
1718 DialectType::Dune => from_cache!(CACHED_DUNE, DuneDialect),
1719 #[cfg(feature = "dialect-fabric")]
1720 DialectType::Fabric => from_cache!(CACHED_FABRIC, FabricDialect),
1721 #[cfg(feature = "dialect-drill")]
1722 DialectType::Drill => from_cache!(CACHED_DRILL, DrillDialect),
1723 #[cfg(feature = "dialect-dremio")]
1724 DialectType::Dremio => from_cache!(CACHED_DREMIO, DremioDialect),
1725 #[cfg(feature = "dialect-exasol")]
1726 DialectType::Exasol => from_cache!(CACHED_EXASOL, ExasolDialect),
1727 #[cfg(feature = "dialect-datafusion")]
1728 DialectType::DataFusion => from_cache!(CACHED_DATAFUSION, DataFusionDialect),
1729 _ => from_cache!(CACHED_GENERIC, GenericDialect),
1730 }
1731}
1732
1733// ---------------------------------------------------------------------------
1734// Custom dialect registry
1735// ---------------------------------------------------------------------------
1736
1737static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1738 LazyLock::new(|| RwLock::new(HashMap::new()));
1739
1740struct CustomDialectConfig {
1741 name: String,
1742 base_dialect: DialectType,
1743 tokenizer_config: TokenizerConfig,
1744 generator_config: GeneratorConfig,
1745 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1746 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1747}
1748
1749/// Fluent builder for creating and registering custom SQL dialects.
1750///
1751/// A custom dialect is based on an existing built-in dialect and allows selective
1752/// overrides of tokenizer configuration, generator configuration, and expression
1753/// transforms.
1754///
1755/// # Example
1756///
1757/// ```rust,ignore
1758/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1759/// use polyglot_sql::generator::NormalizeFunctions;
1760///
1761/// CustomDialectBuilder::new("my_postgres")
1762/// .based_on(DialectType::PostgreSQL)
1763/// .generator_config_modifier(|gc| {
1764/// gc.normalize_functions = NormalizeFunctions::Lower;
1765/// })
1766/// .register()
1767/// .unwrap();
1768///
1769/// let d = Dialect::get_by_name("my_postgres").unwrap();
1770/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1771/// let sql = d.generate(&exprs[0]).unwrap();
1772/// assert_eq!(sql, "select count(*)");
1773///
1774/// polyglot_sql::unregister_custom_dialect("my_postgres");
1775/// ```
1776pub struct CustomDialectBuilder {
1777 name: String,
1778 base_dialect: DialectType,
1779 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1780 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1781 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1782 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1783}
1784
1785impl CustomDialectBuilder {
1786 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1787 pub fn new(name: impl Into<String>) -> Self {
1788 Self {
1789 name: name.into(),
1790 base_dialect: DialectType::Generic,
1791 tokenizer_modifier: None,
1792 generator_modifier: None,
1793 transform: None,
1794 preprocess: None,
1795 }
1796 }
1797
1798 /// Set the base built-in dialect to inherit configuration from.
1799 pub fn based_on(mut self, dialect: DialectType) -> Self {
1800 self.base_dialect = dialect;
1801 self
1802 }
1803
1804 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1805 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1806 where
1807 F: FnOnce(&mut TokenizerConfig) + 'static,
1808 {
1809 self.tokenizer_modifier = Some(Box::new(f));
1810 self
1811 }
1812
1813 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1814 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1815 where
1816 F: FnOnce(&mut GeneratorConfig) + 'static,
1817 {
1818 self.generator_modifier = Some(Box::new(f));
1819 self
1820 }
1821
1822 /// Set a custom per-node expression transform function.
1823 ///
1824 /// This replaces the base dialect's transform. It is called on every expression
1825 /// node during the recursive transform pass.
1826 pub fn transform_fn<F>(mut self, f: F) -> Self
1827 where
1828 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1829 {
1830 self.transform = Some(Arc::new(f));
1831 self
1832 }
1833
1834 /// Set a custom whole-tree preprocessing function.
1835 ///
1836 /// This replaces the base dialect's built-in preprocessing. It is called once
1837 /// on the entire expression tree before the recursive per-node transform.
1838 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1839 where
1840 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1841 {
1842 self.preprocess = Some(Arc::new(f));
1843 self
1844 }
1845
1846 /// Build the custom dialect configuration and register it in the global registry.
1847 ///
1848 /// Returns an error if:
1849 /// - The name collides with a built-in dialect name
1850 /// - A custom dialect with the same name is already registered
1851 pub fn register(self) -> Result<()> {
1852 // Reject names that collide with built-in dialects
1853 if DialectType::from_str(&self.name).is_ok() {
1854 return Err(crate::error::Error::parse(
1855 format!(
1856 "Cannot register custom dialect '{}': name collides with built-in dialect",
1857 self.name
1858 ),
1859 0,
1860 0,
1861 0,
1862 0,
1863 ));
1864 }
1865
1866 // Get base configs
1867 let (mut tok_config, mut gen_config, _base_transform) =
1868 configs_for_dialect_type(self.base_dialect);
1869
1870 // Apply modifiers
1871 if let Some(tok_mod) = self.tokenizer_modifier {
1872 tok_mod(&mut tok_config);
1873 }
1874 if let Some(gen_mod) = self.generator_modifier {
1875 gen_mod(&mut gen_config);
1876 }
1877
1878 let config = CustomDialectConfig {
1879 name: self.name.clone(),
1880 base_dialect: self.base_dialect,
1881 tokenizer_config: tok_config,
1882 generator_config: gen_config,
1883 transform: self.transform,
1884 preprocess: self.preprocess,
1885 };
1886
1887 register_custom_dialect(config)
1888 }
1889}
1890
1891use std::str::FromStr;
1892
1893fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
1894 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
1895 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
1896 })?;
1897
1898 if registry.contains_key(&config.name) {
1899 return Err(crate::error::Error::parse(
1900 format!("Custom dialect '{}' is already registered", config.name),
1901 0,
1902 0,
1903 0,
1904 0,
1905 ));
1906 }
1907
1908 registry.insert(config.name.clone(), Arc::new(config));
1909 Ok(())
1910}
1911
1912/// Remove a custom dialect from the global registry.
1913///
1914/// Returns `true` if a dialect with that name was found and removed,
1915/// `false` if no such custom dialect existed.
1916pub fn unregister_custom_dialect(name: &str) -> bool {
1917 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
1918 registry.remove(name).is_some()
1919 } else {
1920 false
1921 }
1922}
1923
1924fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
1925 CUSTOM_DIALECT_REGISTRY
1926 .read()
1927 .ok()
1928 .and_then(|registry| registry.get(name).cloned())
1929}
1930
1931/// Main entry point for dialect-specific SQL operations.
1932///
1933/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
1934/// transformer for a specific SQL database engine. It is the high-level API through
1935/// which callers parse, generate, transform, and transpile SQL.
1936///
1937/// # Usage
1938///
1939/// ```rust,ignore
1940/// use polyglot_sql::dialects::{Dialect, DialectType};
1941///
1942/// // Parse PostgreSQL SQL into an AST
1943/// let pg = Dialect::get(DialectType::PostgreSQL);
1944/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
1945///
1946/// // Transpile from PostgreSQL to BigQuery
1947/// let results = pg.transpile_to("SELECT NOW()", DialectType::BigQuery)?;
1948/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
1949/// ```
1950///
1951/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
1952/// The struct is `Send + Sync` safe so it can be shared across threads.
1953pub struct Dialect {
1954 dialect_type: DialectType,
1955 tokenizer: Tokenizer,
1956 generator_config: GeneratorConfig,
1957 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1958 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
1959 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
1960 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
1961 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1962}
1963
1964impl Dialect {
1965 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
1966 ///
1967 /// This is the primary constructor. It initializes the tokenizer, generator config,
1968 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
1969 /// For hybrid dialects like Athena, it also sets up expression-specific generator
1970 /// config routing.
1971 pub fn get(dialect_type: DialectType) -> Self {
1972 let (tokenizer_config, generator_config, transformer) =
1973 configs_for_dialect_type(dialect_type);
1974
1975 // Set up expression-specific generator config for hybrid dialects
1976 let generator_config_for_expr: Option<
1977 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
1978 > = match dialect_type {
1979 #[cfg(feature = "dialect-athena")]
1980 DialectType::Athena => Some(Box::new(|expr| {
1981 AthenaDialect.generator_config_for_expr(expr)
1982 })),
1983 _ => None,
1984 };
1985
1986 Self {
1987 dialect_type,
1988 tokenizer: Tokenizer::new(tokenizer_config),
1989 generator_config,
1990 transformer,
1991 generator_config_for_expr,
1992 custom_preprocess: None,
1993 }
1994 }
1995
1996 /// Look up a dialect by string name.
1997 ///
1998 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
1999 /// falls back to the custom dialect registry. Returns `None` if no dialect
2000 /// with the given name exists.
2001 pub fn get_by_name(name: &str) -> Option<Self> {
2002 // Try built-in first
2003 if let Ok(dt) = DialectType::from_str(name) {
2004 return Some(Self::get(dt));
2005 }
2006
2007 // Try custom registry
2008 let config = get_custom_dialect_config(name)?;
2009 Some(Self::from_custom_config(&config))
2010 }
2011
2012 /// Construct a `Dialect` from a custom dialect configuration.
2013 fn from_custom_config(config: &CustomDialectConfig) -> Self {
2014 // Build the transformer: use custom if provided, else use base dialect's
2015 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
2016 if let Some(ref custom_transform) = config.transform {
2017 let t = Arc::clone(custom_transform);
2018 Box::new(move |e| t(e))
2019 } else {
2020 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
2021 base_transform
2022 };
2023
2024 // Build the custom preprocess: use custom if provided
2025 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
2026 config.preprocess.as_ref().map(|p| {
2027 let p = Arc::clone(p);
2028 Box::new(move |e: Expression| p(e))
2029 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
2030 });
2031
2032 Self {
2033 dialect_type: config.base_dialect,
2034 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
2035 generator_config: config.generator_config.clone(),
2036 transformer,
2037 generator_config_for_expr: None,
2038 custom_preprocess,
2039 }
2040 }
2041
2042 /// Get the dialect type
2043 pub fn dialect_type(&self) -> DialectType {
2044 self.dialect_type
2045 }
2046
2047 /// Get the generator configuration
2048 pub fn generator_config(&self) -> &GeneratorConfig {
2049 &self.generator_config
2050 }
2051
2052 /// Parses a SQL string into a list of [`Expression`] AST nodes.
2053 ///
2054 /// The input may contain multiple semicolon-separated statements; each one
2055 /// produces a separate element in the returned vector. Tokenization uses
2056 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
2057 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
2058 let tokens = self.tokenizer.tokenize(sql)?;
2059 let config = crate::parser::ParserConfig {
2060 dialect: Some(self.dialect_type),
2061 ..Default::default()
2062 };
2063 let mut parser = Parser::with_source(tokens, config, sql.to_string());
2064 parser.parse()
2065 }
2066
2067 /// Tokenize SQL using this dialect's tokenizer configuration.
2068 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
2069 self.tokenizer.tokenize(sql)
2070 }
2071
2072 /// Get the generator config for a specific expression (supports hybrid dialects)
2073 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
2074 if let Some(ref config_fn) = self.generator_config_for_expr {
2075 config_fn(expr)
2076 } else {
2077 self.generator_config.clone()
2078 }
2079 }
2080
2081 /// Generates a SQL string from an [`Expression`] AST node.
2082 ///
2083 /// The output uses this dialect's generator configuration for identifier quoting,
2084 /// keyword casing, function name normalization, and syntax style. The result is
2085 /// a single-line (non-pretty) SQL string.
2086 pub fn generate(&self, expr: &Expression) -> Result<String> {
2087 let config = self.get_config_for_expr(expr);
2088 let mut generator = Generator::with_config(config);
2089 generator.generate(expr)
2090 }
2091
2092 /// Generate SQL from an expression with pretty printing enabled
2093 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
2094 let mut config = self.get_config_for_expr(expr);
2095 config.pretty = true;
2096 let mut generator = Generator::with_config(config);
2097 generator.generate(expr)
2098 }
2099
2100 /// Generate SQL from an expression with source dialect info (for transpilation)
2101 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
2102 let mut config = self.get_config_for_expr(expr);
2103 config.source_dialect = Some(source);
2104 let mut generator = Generator::with_config(config);
2105 generator.generate(expr)
2106 }
2107
2108 /// Generate SQL from an expression with pretty printing and source dialect info
2109 pub fn generate_pretty_with_source(
2110 &self,
2111 expr: &Expression,
2112 source: DialectType,
2113 ) -> Result<String> {
2114 let mut config = self.get_config_for_expr(expr);
2115 config.pretty = true;
2116 config.source_dialect = Some(source);
2117 let mut generator = Generator::with_config(config);
2118 generator.generate(expr)
2119 }
2120
2121 /// Generate SQL from an expression with forced identifier quoting (identify=True)
2122 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
2123 let mut config = self.get_config_for_expr(expr);
2124 config.always_quote_identifiers = true;
2125 let mut generator = Generator::with_config(config);
2126 generator.generate(expr)
2127 }
2128
2129 /// Generate SQL from an expression with pretty printing and forced identifier quoting
2130 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
2131 let mut config = self.generator_config.clone();
2132 config.pretty = true;
2133 config.always_quote_identifiers = true;
2134 let mut generator = Generator::with_config(config);
2135 generator.generate(expr)
2136 }
2137
2138 /// Generate SQL from an expression with caller-specified config overrides
2139 pub fn generate_with_overrides(
2140 &self,
2141 expr: &Expression,
2142 overrides: impl FnOnce(&mut GeneratorConfig),
2143 ) -> Result<String> {
2144 let mut config = self.get_config_for_expr(expr);
2145 overrides(&mut config);
2146 let mut generator = Generator::with_config(config);
2147 generator.generate(expr)
2148 }
2149
2150 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
2151 ///
2152 /// The transformation proceeds in two phases:
2153 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
2154 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
2155 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
2156 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
2157 ///
2158 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
2159 /// and for identity transforms (normalizing SQL within the same dialect).
2160 pub fn transform(&self, expr: Expression) -> Result<Expression> {
2161 // Apply preprocessing transforms based on dialect
2162 let preprocessed = self.preprocess(expr)?;
2163 // Then apply recursive transformation
2164 transform_recursive(preprocessed, &self.transformer)
2165 }
2166
2167 /// Apply dialect-specific preprocessing transforms
2168 fn preprocess(&self, expr: Expression) -> Result<Expression> {
2169 // If a custom preprocess function is set, use it instead of the built-in logic
2170 if let Some(ref custom_preprocess) = self.custom_preprocess {
2171 return custom_preprocess(expr);
2172 }
2173
2174 #[cfg(any(
2175 feature = "dialect-mysql",
2176 feature = "dialect-postgresql",
2177 feature = "dialect-bigquery",
2178 feature = "dialect-snowflake",
2179 feature = "dialect-tsql",
2180 feature = "dialect-spark",
2181 feature = "dialect-databricks",
2182 feature = "dialect-hive",
2183 feature = "dialect-sqlite",
2184 feature = "dialect-trino",
2185 feature = "dialect-presto",
2186 feature = "dialect-duckdb",
2187 feature = "dialect-redshift",
2188 feature = "dialect-starrocks",
2189 feature = "dialect-oracle",
2190 feature = "dialect-clickhouse",
2191 ))]
2192 use crate::transforms;
2193
2194 match self.dialect_type {
2195 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
2196 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
2197 #[cfg(feature = "dialect-mysql")]
2198 DialectType::MySQL => {
2199 let expr = transforms::eliminate_qualify(expr)?;
2200 let expr = transforms::eliminate_full_outer_join(expr)?;
2201 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2202 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2203 Ok(expr)
2204 }
2205 // PostgreSQL doesn't support QUALIFY
2206 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
2207 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
2208 #[cfg(feature = "dialect-postgresql")]
2209 DialectType::PostgreSQL => {
2210 let expr = transforms::eliminate_qualify(expr)?;
2211 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2212 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
2213 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
2214 // Only normalize when sqlglot would fully parse (no body) —
2215 // sqlglot falls back to Command for complex function bodies,
2216 // preserving the original text including TO.
2217 let expr = if let Expression::CreateFunction(mut cf) = expr {
2218 if cf.body.is_none() {
2219 for opt in &mut cf.set_options {
2220 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
2221 &mut opt.value
2222 {
2223 *use_to = false;
2224 }
2225 }
2226 }
2227 Expression::CreateFunction(cf)
2228 } else {
2229 expr
2230 };
2231 Ok(expr)
2232 }
2233 // BigQuery doesn't support DISTINCT ON or CTE column aliases
2234 #[cfg(feature = "dialect-bigquery")]
2235 DialectType::BigQuery => {
2236 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2237 let expr = transforms::pushdown_cte_column_names(expr)?;
2238 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
2239 Ok(expr)
2240 }
2241 // Snowflake
2242 #[cfg(feature = "dialect-snowflake")]
2243 DialectType::Snowflake => {
2244 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2245 let expr = transforms::eliminate_window_clause(expr)?;
2246 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
2247 Ok(expr)
2248 }
2249 // TSQL doesn't support QUALIFY
2250 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
2251 // TSQL doesn't support CTEs in subqueries (hoist to top level)
2252 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
2253 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
2254 #[cfg(feature = "dialect-tsql")]
2255 DialectType::TSQL => {
2256 let expr = transforms::eliminate_qualify(expr)?;
2257 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2258 let expr = transforms::ensure_bools(expr)?;
2259 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2260 let expr = transforms::move_ctes_to_top_level(expr)?;
2261 let expr = transforms::qualify_derived_table_outputs(expr)?;
2262 Ok(expr)
2263 }
2264 // Spark doesn't support QUALIFY (but Databricks does)
2265 // Spark doesn't support CTEs in subqueries (hoist to top level)
2266 #[cfg(feature = "dialect-spark")]
2267 DialectType::Spark => {
2268 let expr = transforms::eliminate_qualify(expr)?;
2269 let expr = transforms::add_auto_table_alias(expr)?;
2270 let expr = transforms::simplify_nested_paren_values(expr)?;
2271 let expr = transforms::move_ctes_to_top_level(expr)?;
2272 Ok(expr)
2273 }
2274 // Databricks supports QUALIFY natively
2275 // Databricks doesn't support CTEs in subqueries (hoist to top level)
2276 #[cfg(feature = "dialect-databricks")]
2277 DialectType::Databricks => {
2278 let expr = transforms::add_auto_table_alias(expr)?;
2279 let expr = transforms::simplify_nested_paren_values(expr)?;
2280 let expr = transforms::move_ctes_to_top_level(expr)?;
2281 Ok(expr)
2282 }
2283 // Hive doesn't support QUALIFY or CTEs in subqueries
2284 #[cfg(feature = "dialect-hive")]
2285 DialectType::Hive => {
2286 let expr = transforms::eliminate_qualify(expr)?;
2287 let expr = transforms::move_ctes_to_top_level(expr)?;
2288 Ok(expr)
2289 }
2290 // SQLite doesn't support QUALIFY
2291 #[cfg(feature = "dialect-sqlite")]
2292 DialectType::SQLite => {
2293 let expr = transforms::eliminate_qualify(expr)?;
2294 Ok(expr)
2295 }
2296 // Trino doesn't support QUALIFY
2297 #[cfg(feature = "dialect-trino")]
2298 DialectType::Trino => {
2299 let expr = transforms::eliminate_qualify(expr)?;
2300 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
2301 Ok(expr)
2302 }
2303 // Presto doesn't support QUALIFY or WINDOW clause
2304 #[cfg(feature = "dialect-presto")]
2305 DialectType::Presto => {
2306 let expr = transforms::eliminate_qualify(expr)?;
2307 let expr = transforms::eliminate_window_clause(expr)?;
2308 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
2309 Ok(expr)
2310 }
2311 // DuckDB supports QUALIFY - no elimination needed
2312 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
2313 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
2314 #[cfg(feature = "dialect-duckdb")]
2315 DialectType::DuckDB => {
2316 let expr = transforms::expand_posexplode_duckdb(expr)?;
2317 let expr = transforms::expand_like_any(expr)?;
2318 Ok(expr)
2319 }
2320 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
2321 #[cfg(feature = "dialect-redshift")]
2322 DialectType::Redshift => {
2323 let expr = transforms::eliminate_qualify(expr)?;
2324 let expr = transforms::eliminate_window_clause(expr)?;
2325 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2326 Ok(expr)
2327 }
2328 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
2329 #[cfg(feature = "dialect-starrocks")]
2330 DialectType::StarRocks => {
2331 let expr = transforms::eliminate_qualify(expr)?;
2332 let expr = transforms::expand_between_in_delete(expr)?;
2333 Ok(expr)
2334 }
2335 // DataFusion supports QUALIFY and semi/anti joins natively
2336 #[cfg(feature = "dialect-datafusion")]
2337 DialectType::DataFusion => Ok(expr),
2338 // Oracle doesn't support QUALIFY
2339 #[cfg(feature = "dialect-oracle")]
2340 DialectType::Oracle => {
2341 let expr = transforms::eliminate_qualify(expr)?;
2342 Ok(expr)
2343 }
2344 // Drill - no special preprocessing needed
2345 #[cfg(feature = "dialect-drill")]
2346 DialectType::Drill => Ok(expr),
2347 // Teradata - no special preprocessing needed
2348 #[cfg(feature = "dialect-teradata")]
2349 DialectType::Teradata => Ok(expr),
2350 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
2351 #[cfg(feature = "dialect-clickhouse")]
2352 DialectType::ClickHouse => {
2353 let expr = transforms::no_limit_order_by_union(expr)?;
2354 Ok(expr)
2355 }
2356 // Other dialects - no preprocessing
2357 _ => Ok(expr),
2358 }
2359 }
2360
2361 /// Transpile SQL from this dialect to another
2362 pub fn transpile_to(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2363 self.transpile_to_inner(sql, target, false)
2364 }
2365
2366 /// Transpile SQL from this dialect to another with pretty printing enabled
2367 pub fn transpile_to_pretty(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2368 self.transpile_to_inner(sql, target, true)
2369 }
2370
2371 #[cfg(not(feature = "transpile"))]
2372 fn transpile_to_inner(
2373 &self,
2374 sql: &str,
2375 target: DialectType,
2376 pretty: bool,
2377 ) -> Result<Vec<String>> {
2378 // Without the transpile feature, only same-dialect or to/from generic is supported
2379 if self.dialect_type != target
2380 && self.dialect_type != DialectType::Generic
2381 && target != DialectType::Generic
2382 {
2383 return Err(crate::error::Error::parse(
2384 "Cross-dialect transpilation not available in this build",
2385 0,
2386 0,
2387 0,
2388 0,
2389 ));
2390 }
2391
2392 let expressions = self.parse(sql)?;
2393 let target_dialect = Dialect::get(target);
2394 let generic_identity =
2395 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2396
2397 if generic_identity {
2398 return expressions
2399 .into_iter()
2400 .map(|expr| {
2401 if pretty {
2402 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2403 } else {
2404 target_dialect.generate_with_source(&expr, self.dialect_type)
2405 }
2406 })
2407 .collect();
2408 }
2409
2410 expressions
2411 .into_iter()
2412 .map(|expr| {
2413 let transformed = target_dialect.transform(expr)?;
2414 if pretty {
2415 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
2416 } else {
2417 target_dialect.generate_with_source(&transformed, self.dialect_type)
2418 }
2419 })
2420 .collect()
2421 }
2422
2423 #[cfg(feature = "transpile")]
2424 fn transpile_to_inner(
2425 &self,
2426 sql: &str,
2427 target: DialectType,
2428 pretty: bool,
2429 ) -> Result<Vec<String>> {
2430 let expressions = self.parse(sql)?;
2431 let target_dialect = Dialect::get(target);
2432 let generic_identity =
2433 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2434
2435 if generic_identity {
2436 return expressions
2437 .into_iter()
2438 .map(|expr| {
2439 if pretty {
2440 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2441 } else {
2442 target_dialect.generate_with_source(&expr, self.dialect_type)
2443 }
2444 })
2445 .collect();
2446 }
2447
2448 expressions
2449 .into_iter()
2450 .map(|expr| {
2451 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
2452 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
2453 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
2454 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
2455 use crate::expressions::DataType as DT;
2456 transform_recursive(expr, &|e| match e {
2457 Expression::DataType(DT::VarChar { .. }) => {
2458 Ok(Expression::DataType(DT::Text))
2459 }
2460 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
2461 _ => Ok(e),
2462 })?
2463 } else {
2464 expr
2465 };
2466
2467 // When source and target differ, first normalize the source dialect's
2468 // AST constructs to standard SQL, so that the target dialect can handle them.
2469 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
2470 let normalized =
2471 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
2472 self.transform(expr)?
2473 } else {
2474 expr
2475 };
2476
2477 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
2478 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
2479 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
2480 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
2481 let normalized =
2482 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2483 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2484 {
2485 transform_recursive(normalized, &|e| {
2486 if let Expression::Function(ref f) = e {
2487 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
2488 // Check if first arg is JSON_QUERY and second is JSON_VALUE
2489 if let (
2490 Expression::Function(ref jq),
2491 Expression::Function(ref jv),
2492 ) = (&f.args[0], &f.args[1])
2493 {
2494 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
2495 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
2496 {
2497 // Unwrap: return just JSON_QUERY(...)
2498 return Ok(f.args[0].clone());
2499 }
2500 }
2501 }
2502 }
2503 Ok(e)
2504 })?
2505 } else {
2506 normalized
2507 };
2508
2509 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
2510 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
2511 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
2512 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2513 && !matches!(target, DialectType::Snowflake)
2514 {
2515 transform_recursive(normalized, &|e| {
2516 if let Expression::Function(ref f) = e {
2517 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
2518 return Ok(Expression::Localtime(Box::new(
2519 crate::expressions::Localtime { this: None },
2520 )));
2521 }
2522 }
2523 Ok(e)
2524 })?
2525 } else {
2526 normalized
2527 };
2528
2529 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
2530 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
2531 // transform. DuckDB requires the count argument to be BIGINT.
2532 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2533 && matches!(target, DialectType::DuckDB)
2534 {
2535 transform_recursive(normalized, &|e| {
2536 if let Expression::Function(ref f) = e {
2537 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
2538 // Check if first arg is space string literal
2539 if let Expression::Literal(crate::expressions::Literal::String(
2540 ref s,
2541 )) = f.args[0]
2542 {
2543 if s == " " {
2544 // Wrap second arg in CAST(... AS BIGINT) if not already
2545 if !matches!(f.args[1], Expression::Cast(_)) {
2546 let mut new_args = f.args.clone();
2547 new_args[1] = Expression::Cast(Box::new(
2548 crate::expressions::Cast {
2549 this: new_args[1].clone(),
2550 to: crate::expressions::DataType::BigInt {
2551 length: None,
2552 },
2553 trailing_comments: Vec::new(),
2554 double_colon_syntax: false,
2555 format: None,
2556 default: None,
2557 inferred_type: None,
2558 },
2559 ));
2560 return Ok(Expression::Function(Box::new(
2561 crate::expressions::Function {
2562 name: f.name.clone(),
2563 args: new_args,
2564 distinct: f.distinct,
2565 trailing_comments: f.trailing_comments.clone(),
2566 use_bracket_syntax: f.use_bracket_syntax,
2567 no_parens: f.no_parens,
2568 quoted: f.quoted,
2569 span: None,
2570 inferred_type: None,
2571 },
2572 )));
2573 }
2574 }
2575 }
2576 }
2577 }
2578 Ok(e)
2579 })?
2580 } else {
2581 normalized
2582 };
2583
2584 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
2585 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
2586 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2587 && !matches!(target, DialectType::BigQuery)
2588 {
2589 crate::transforms::propagate_struct_field_names(normalized)?
2590 } else {
2591 normalized
2592 };
2593
2594 // Apply cross-dialect semantic normalizations
2595 let normalized =
2596 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
2597
2598 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
2599 // (SELECT UNNEST(..., max_depth => 2)) subquery
2600 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
2601 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2602 && matches!(target, DialectType::DuckDB)
2603 {
2604 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
2605 } else {
2606 normalized
2607 };
2608
2609 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
2610 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
2611 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2612 && matches!(
2613 target,
2614 DialectType::DuckDB
2615 | DialectType::Presto
2616 | DialectType::Trino
2617 | DialectType::Athena
2618 | DialectType::Spark
2619 | DialectType::Databricks
2620 ) {
2621 crate::transforms::unnest_alias_to_column_alias(normalized)?
2622 } else if matches!(self.dialect_type, DialectType::BigQuery)
2623 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
2624 {
2625 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
2626 // but don't convert alias format (no _t0 wrapper)
2627 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
2628 // For Redshift: strip UNNEST when arg is a column reference path
2629 if matches!(target, DialectType::Redshift) {
2630 crate::transforms::strip_unnest_column_refs(result)?
2631 } else {
2632 result
2633 }
2634 } else {
2635 normalized
2636 };
2637
2638 // For Presto/Trino targets from PostgreSQL/Redshift source:
2639 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
2640 let normalized = if matches!(
2641 self.dialect_type,
2642 DialectType::PostgreSQL | DialectType::Redshift
2643 ) && matches!(
2644 target,
2645 DialectType::Presto | DialectType::Trino | DialectType::Athena
2646 ) {
2647 crate::transforms::wrap_unnest_join_aliases(normalized)?
2648 } else {
2649 normalized
2650 };
2651
2652 // Eliminate DISTINCT ON with target-dialect awareness
2653 // This must happen after source transform (which may produce DISTINCT ON)
2654 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
2655 let normalized =
2656 crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
2657
2658 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
2659 let normalized = if matches!(target, DialectType::Snowflake) {
2660 Self::transform_generate_date_array_snowflake(normalized)?
2661 } else {
2662 normalized
2663 };
2664
2665 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
2666 let normalized = if matches!(
2667 target,
2668 DialectType::Spark | DialectType::Databricks | DialectType::Hive
2669 ) {
2670 crate::transforms::unnest_to_explode_select(normalized)?
2671 } else {
2672 normalized
2673 };
2674
2675 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
2676 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
2677 crate::transforms::no_limit_order_by_union(normalized)?
2678 } else {
2679 normalized
2680 };
2681
2682 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
2683 // Python sqlglot does this in the TSQL generator, but we can't do it there
2684 // because it would break TSQL -> TSQL identity
2685 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
2686 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2687 {
2688 transform_recursive(normalized, &|e| {
2689 if let Expression::Count(ref c) = e {
2690 // Build COUNT_BIG(...) as an AggregateFunction
2691 let args = if c.star {
2692 vec![Expression::Star(crate::expressions::Star {
2693 table: None,
2694 except: None,
2695 replace: None,
2696 rename: None,
2697 trailing_comments: Vec::new(),
2698 span: None,
2699 })]
2700 } else if let Some(ref this) = c.this {
2701 vec![this.clone()]
2702 } else {
2703 vec![]
2704 };
2705 Ok(Expression::AggregateFunction(Box::new(
2706 crate::expressions::AggregateFunction {
2707 name: "COUNT_BIG".to_string(),
2708 args,
2709 distinct: c.distinct,
2710 filter: c.filter.clone(),
2711 order_by: Vec::new(),
2712 limit: None,
2713 ignore_nulls: None,
2714 inferred_type: None,
2715 },
2716 )))
2717 } else {
2718 Ok(e)
2719 }
2720 })?
2721 } else {
2722 normalized
2723 };
2724
2725 let transformed = target_dialect.transform(normalized)?;
2726
2727 // DuckDB target: when FROM is RANGE(n), replace SEQ's ROW_NUMBER pattern with `range`
2728 let transformed = if matches!(target, DialectType::DuckDB) {
2729 Self::seq_rownum_to_range(transformed)?
2730 } else {
2731 transformed
2732 };
2733
2734 let mut sql = if pretty {
2735 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
2736 } else {
2737 target_dialect.generate_with_source(&transformed, self.dialect_type)?
2738 };
2739
2740 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
2741 if pretty && target == DialectType::Snowflake {
2742 sql = Self::normalize_snowflake_pretty(sql);
2743 }
2744
2745 Ok(sql)
2746 })
2747 .collect()
2748 }
2749}
2750
2751// Transpile-only methods: cross-dialect normalization and helpers
2752#[cfg(feature = "transpile")]
2753impl Dialect {
2754 /// For DuckDB target: when FROM clause contains RANGE(n), replace
2755 /// `(ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1)` with `range` in select expressions.
2756 /// This handles SEQ1/2/4/8 → RANGE transpilation from Snowflake.
2757 fn seq_rownum_to_range(expr: Expression) -> Result<Expression> {
2758 if let Expression::Select(mut select) = expr {
2759 // Check if FROM contains a RANGE function
2760 let has_range_from = if let Some(ref from) = select.from {
2761 from.expressions.iter().any(|e| {
2762 // Check for direct RANGE(...) or aliased RANGE(...)
2763 match e {
2764 Expression::Function(f) => f.name.eq_ignore_ascii_case("RANGE"),
2765 Expression::Alias(a) => {
2766 matches!(&a.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("RANGE"))
2767 }
2768 _ => false,
2769 }
2770 })
2771 } else {
2772 false
2773 };
2774
2775 if has_range_from {
2776 // Replace the ROW_NUMBER pattern in select expressions
2777 select.expressions = select.expressions.into_iter().map(|e| {
2778 Self::replace_rownum_with_range(e)
2779 }).collect();
2780 }
2781
2782 Ok(Expression::Select(select))
2783 } else {
2784 Ok(expr)
2785 }
2786 }
2787
2788 /// Replace `(ROW_NUMBER() OVER (...) - 1)` with `range` column reference
2789 fn replace_rownum_with_range(expr: Expression) -> Expression {
2790 match expr {
2791 // Match: (ROW_NUMBER() OVER (...) - 1) % N → range % N
2792 Expression::Mod(op) => {
2793 let new_left = Self::try_replace_rownum_paren(&op.left);
2794 Expression::Mod(Box::new(crate::expressions::BinaryOp {
2795 left: new_left,
2796 right: op.right,
2797 left_comments: op.left_comments,
2798 operator_comments: op.operator_comments,
2799 trailing_comments: op.trailing_comments,
2800 inferred_type: op.inferred_type,
2801 }))
2802 }
2803 // Match: (CASE WHEN (ROW...) % N >= ... THEN ... ELSE ... END)
2804 Expression::Paren(p) => {
2805 let inner = Self::replace_rownum_with_range(p.this);
2806 Expression::Paren(Box::new(crate::expressions::Paren {
2807 this: inner,
2808 trailing_comments: p.trailing_comments,
2809 }))
2810 }
2811 Expression::Case(mut c) => {
2812 // Replace ROW_NUMBER in WHEN conditions and THEN expressions
2813 c.whens = c.whens.into_iter().map(|(cond, then)| {
2814 (Self::replace_rownum_with_range(cond), Self::replace_rownum_with_range(then))
2815 }).collect();
2816 if let Some(else_) = c.else_ {
2817 c.else_ = Some(Self::replace_rownum_with_range(else_));
2818 }
2819 Expression::Case(c)
2820 }
2821 Expression::Gte(op) => {
2822 Expression::Gte(Box::new(crate::expressions::BinaryOp {
2823 left: Self::replace_rownum_with_range(op.left),
2824 right: op.right,
2825 left_comments: op.left_comments,
2826 operator_comments: op.operator_comments,
2827 trailing_comments: op.trailing_comments,
2828 inferred_type: op.inferred_type,
2829 }))
2830 }
2831 Expression::Sub(op) => {
2832 Expression::Sub(Box::new(crate::expressions::BinaryOp {
2833 left: Self::replace_rownum_with_range(op.left),
2834 right: op.right,
2835 left_comments: op.left_comments,
2836 operator_comments: op.operator_comments,
2837 trailing_comments: op.trailing_comments,
2838 inferred_type: op.inferred_type,
2839 }))
2840 }
2841 Expression::Alias(mut a) => {
2842 a.this = Self::replace_rownum_with_range(a.this);
2843 Expression::Alias(a)
2844 }
2845 other => other,
2846 }
2847 }
2848
2849 /// Check if an expression is `(ROW_NUMBER() OVER (...) - 1)` and replace with `range`
2850 fn try_replace_rownum_paren(expr: &Expression) -> Expression {
2851 if let Expression::Paren(ref p) = expr {
2852 if let Expression::Sub(ref sub) = p.this {
2853 if let Expression::WindowFunction(ref wf) = sub.left {
2854 if let Expression::Function(ref f) = wf.this {
2855 if f.name.eq_ignore_ascii_case("ROW_NUMBER") {
2856 if let Expression::Literal(crate::expressions::Literal::Number(ref n)) = sub.right {
2857 if n == "1" {
2858 return Expression::column("range");
2859 }
2860 }
2861 }
2862 }
2863 }
2864 }
2865 }
2866 expr.clone()
2867 }
2868
2869 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
2870 /// Converts:
2871 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
2872 /// To:
2873 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
2874 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)) AS _t0(seq, key, path, index, alias, this)
2875 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
2876 use crate::expressions::*;
2877 transform_recursive(expr, &|e| {
2878 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
2879 if let Expression::ArraySize(ref af) = e {
2880 if let Expression::Function(ref f) = af.this {
2881 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2882 let result = Self::convert_array_size_gda_snowflake(f)?;
2883 return Ok(result);
2884 }
2885 }
2886 }
2887
2888 let Expression::Select(mut sel) = e else {
2889 return Ok(e);
2890 };
2891
2892 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
2893 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
2894 let mut gda_join_idx: Option<usize> = None;
2895
2896 for (idx, join) in sel.joins.iter().enumerate() {
2897 // The join.this may be:
2898 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
2899 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
2900 let (unnest_ref, alias_name) = match &join.this {
2901 Expression::Unnest(ref unnest) => {
2902 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
2903 (Some(unnest.as_ref()), alias)
2904 }
2905 Expression::Alias(ref a) => {
2906 if let Expression::Unnest(ref unnest) = a.this {
2907 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
2908 } else {
2909 (None, None)
2910 }
2911 }
2912 _ => (None, None),
2913 };
2914
2915 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
2916 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
2917 if let Expression::Function(ref f) = unnest.this {
2918 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
2919 let start_expr = f.args[0].clone();
2920 let end_expr = f.args[1].clone();
2921 let step = f.args.get(2).cloned();
2922
2923 // Extract unit from step interval
2924 let unit = if let Some(Expression::Interval(ref iv)) = step {
2925 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
2926 Some(format!("{:?}", unit).to_ascii_uppercase())
2927 } else if let Some(ref this) = iv.this {
2928 // The interval may be stored as a string like "1 MONTH"
2929 if let Expression::Literal(Literal::String(ref s)) = this {
2930 let parts: Vec<&str> = s.split_whitespace().collect();
2931 if parts.len() == 2 {
2932 Some(parts[1].to_ascii_uppercase())
2933 } else if parts.len() == 1 {
2934 // Single word like "MONTH" or just "1"
2935 let upper = parts[0].to_ascii_uppercase();
2936 if matches!(
2937 upper.as_str(),
2938 "YEAR"
2939 | "QUARTER"
2940 | "MONTH"
2941 | "WEEK"
2942 | "DAY"
2943 | "HOUR"
2944 | "MINUTE"
2945 | "SECOND"
2946 ) {
2947 Some(upper)
2948 } else {
2949 None
2950 }
2951 } else {
2952 None
2953 }
2954 } else {
2955 None
2956 }
2957 } else {
2958 None
2959 }
2960 } else {
2961 None
2962 };
2963
2964 if let Some(unit_str) = unit {
2965 gda_info = Some((alias, start_expr, end_expr, unit_str));
2966 gda_join_idx = Some(idx);
2967 }
2968 }
2969 }
2970 }
2971 if gda_info.is_some() {
2972 break;
2973 }
2974 }
2975
2976 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
2977 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
2978 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
2979 let result = Self::try_transform_from_gda_snowflake(sel);
2980 return result;
2981 };
2982 let join_idx = gda_join_idx.unwrap();
2983
2984 // Build ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1)
2985 let datediff = Expression::Function(Box::new(Function::new(
2986 "DATEDIFF".to_string(),
2987 vec![
2988 Expression::boxed_column(Column {
2989 name: Identifier::new(&unit_str),
2990 table: None,
2991 join_mark: false,
2992 trailing_comments: vec![],
2993 span: None,
2994 inferred_type: None,
2995 }),
2996 start_expr.clone(),
2997 end_expr.clone(),
2998 ],
2999 )));
3000 // (DATEDIFF(...) + 1 - 1) + 1
3001 let plus_one = Expression::Add(Box::new(BinaryOp {
3002 left: datediff,
3003 right: Expression::Literal(Literal::Number("1".to_string())),
3004 left_comments: vec![],
3005 operator_comments: vec![],
3006 trailing_comments: vec![],
3007 inferred_type: None,
3008 }));
3009 let minus_one = Expression::Sub(Box::new(BinaryOp {
3010 left: plus_one,
3011 right: Expression::Literal(Literal::Number("1".to_string())),
3012 left_comments: vec![],
3013 operator_comments: vec![],
3014 trailing_comments: vec![],
3015 inferred_type: None,
3016 }));
3017 let paren_inner = Expression::Paren(Box::new(Paren {
3018 this: minus_one,
3019 trailing_comments: vec![],
3020 }));
3021 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3022 left: paren_inner,
3023 right: Expression::Literal(Literal::Number("1".to_string())),
3024 left_comments: vec![],
3025 operator_comments: vec![],
3026 trailing_comments: vec![],
3027 inferred_type: None,
3028 }));
3029
3030 let array_gen_range = Expression::Function(Box::new(Function::new(
3031 "ARRAY_GENERATE_RANGE".to_string(),
3032 vec![
3033 Expression::Literal(Literal::Number("0".to_string())),
3034 outer_plus_one,
3035 ],
3036 )));
3037
3038 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
3039 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3040 name: Identifier::new("INPUT"),
3041 value: array_gen_range,
3042 separator: crate::expressions::NamedArgSeparator::DArrow,
3043 }));
3044 let flatten = Expression::Function(Box::new(Function::new(
3045 "FLATTEN".to_string(),
3046 vec![flatten_input],
3047 )));
3048
3049 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
3050 let alias_table = Alias {
3051 this: flatten,
3052 alias: Identifier::new("_t0"),
3053 column_aliases: vec![
3054 Identifier::new("seq"),
3055 Identifier::new("key"),
3056 Identifier::new("path"),
3057 Identifier::new("index"),
3058 Identifier::new(&alias_name),
3059 Identifier::new("this"),
3060 ],
3061 pre_alias_comments: vec![],
3062 trailing_comments: vec![],
3063 inferred_type: None,
3064 };
3065 let lateral_expr = Expression::Lateral(Box::new(Lateral {
3066 this: Box::new(Expression::Alias(Box::new(alias_table))),
3067 view: None,
3068 outer: None,
3069 alias: None,
3070 alias_quoted: false,
3071 cross_apply: None,
3072 ordinality: None,
3073 column_aliases: vec![],
3074 }));
3075
3076 // Remove the original join and add to FROM expressions
3077 sel.joins.remove(join_idx);
3078 if let Some(ref mut from) = sel.from {
3079 from.expressions.push(lateral_expr);
3080 }
3081
3082 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
3083 let dateadd_expr = Expression::Function(Box::new(Function::new(
3084 "DATEADD".to_string(),
3085 vec![
3086 Expression::boxed_column(Column {
3087 name: Identifier::new(&unit_str),
3088 table: None,
3089 join_mark: false,
3090 trailing_comments: vec![],
3091 span: None,
3092 inferred_type: None,
3093 }),
3094 Expression::Cast(Box::new(Cast {
3095 this: Expression::boxed_column(Column {
3096 name: Identifier::new(&alias_name),
3097 table: None,
3098 join_mark: false,
3099 trailing_comments: vec![],
3100 span: None,
3101 inferred_type: None,
3102 }),
3103 to: DataType::Int {
3104 length: None,
3105 integer_spelling: false,
3106 },
3107 trailing_comments: vec![],
3108 double_colon_syntax: false,
3109 format: None,
3110 default: None,
3111 inferred_type: None,
3112 })),
3113 Expression::Cast(Box::new(Cast {
3114 this: start_expr.clone(),
3115 to: DataType::Date,
3116 trailing_comments: vec![],
3117 double_colon_syntax: false,
3118 format: None,
3119 default: None,
3120 inferred_type: None,
3121 })),
3122 ],
3123 )));
3124
3125 // Replace references to the alias in the SELECT list
3126 let new_exprs: Vec<Expression> = sel
3127 .expressions
3128 .iter()
3129 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
3130 .collect();
3131 sel.expressions = new_exprs;
3132
3133 Ok(Expression::Select(sel))
3134 })
3135 }
3136
3137 /// Helper: replace column references to `alias_name` with dateadd expression
3138 fn replace_column_ref_with_dateadd(
3139 expr: &Expression,
3140 alias_name: &str,
3141 dateadd: &Expression,
3142 ) -> Expression {
3143 use crate::expressions::*;
3144 match expr {
3145 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
3146 // Plain column reference -> DATEADD(...) AS alias_name
3147 Expression::Alias(Box::new(Alias {
3148 this: dateadd.clone(),
3149 alias: Identifier::new(alias_name),
3150 column_aliases: vec![],
3151 pre_alias_comments: vec![],
3152 trailing_comments: vec![],
3153 inferred_type: None,
3154 }))
3155 }
3156 Expression::Alias(a) => {
3157 // Check if the inner expression references the alias
3158 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
3159 Expression::Alias(Box::new(Alias {
3160 this: new_this,
3161 alias: a.alias.clone(),
3162 column_aliases: a.column_aliases.clone(),
3163 pre_alias_comments: a.pre_alias_comments.clone(),
3164 trailing_comments: a.trailing_comments.clone(),
3165 inferred_type: None,
3166 }))
3167 }
3168 _ => expr.clone(),
3169 }
3170 }
3171
3172 /// Helper: replace column references in inner expression (not top-level)
3173 fn replace_column_ref_inner(
3174 expr: &Expression,
3175 alias_name: &str,
3176 dateadd: &Expression,
3177 ) -> Expression {
3178 use crate::expressions::*;
3179 match expr {
3180 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
3181 dateadd.clone()
3182 }
3183 Expression::Add(op) => {
3184 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3185 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3186 Expression::Add(Box::new(BinaryOp {
3187 left,
3188 right,
3189 left_comments: op.left_comments.clone(),
3190 operator_comments: op.operator_comments.clone(),
3191 trailing_comments: op.trailing_comments.clone(),
3192 inferred_type: None,
3193 }))
3194 }
3195 Expression::Sub(op) => {
3196 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3197 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3198 Expression::Sub(Box::new(BinaryOp {
3199 left,
3200 right,
3201 left_comments: op.left_comments.clone(),
3202 operator_comments: op.operator_comments.clone(),
3203 trailing_comments: op.trailing_comments.clone(),
3204 inferred_type: None,
3205 }))
3206 }
3207 Expression::Mul(op) => {
3208 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3209 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3210 Expression::Mul(Box::new(BinaryOp {
3211 left,
3212 right,
3213 left_comments: op.left_comments.clone(),
3214 operator_comments: op.operator_comments.clone(),
3215 trailing_comments: op.trailing_comments.clone(),
3216 inferred_type: None,
3217 }))
3218 }
3219 _ => expr.clone(),
3220 }
3221 }
3222
3223 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
3224 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
3225 fn try_transform_from_gda_snowflake(
3226 mut sel: Box<crate::expressions::Select>,
3227 ) -> Result<Expression> {
3228 use crate::expressions::*;
3229
3230 // Extract GDA info from FROM clause
3231 let mut gda_info: Option<(
3232 usize,
3233 String,
3234 Expression,
3235 Expression,
3236 String,
3237 Option<(String, Vec<Identifier>)>,
3238 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
3239
3240 if let Some(ref from) = sel.from {
3241 for (idx, table_expr) in from.expressions.iter().enumerate() {
3242 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
3243 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
3244 let (unnest_opt, outer_alias_info) = match table_expr {
3245 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
3246 Expression::Alias(ref a) => {
3247 if let Expression::Unnest(ref unnest) = a.this {
3248 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
3249 (Some(unnest.as_ref()), Some(alias_info))
3250 } else {
3251 (None, None)
3252 }
3253 }
3254 _ => (None, None),
3255 };
3256
3257 if let Some(unnest) = unnest_opt {
3258 // Check for GENERATE_DATE_ARRAY function
3259 let func_opt = match &unnest.this {
3260 Expression::Function(ref f)
3261 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
3262 && f.args.len() >= 2 =>
3263 {
3264 Some(f)
3265 }
3266 // Also check for GenerateSeries (from earlier normalization)
3267 _ => None,
3268 };
3269
3270 if let Some(f) = func_opt {
3271 let start_expr = f.args[0].clone();
3272 let end_expr = f.args[1].clone();
3273 let step = f.args.get(2).cloned();
3274
3275 // Extract unit and column name
3276 let unit = Self::extract_interval_unit_str(&step);
3277 let col_name = outer_alias_info
3278 .as_ref()
3279 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
3280 .unwrap_or_else(|| "value".to_string());
3281
3282 if let Some(unit_str) = unit {
3283 gda_info = Some((
3284 idx,
3285 col_name,
3286 start_expr,
3287 end_expr,
3288 unit_str,
3289 outer_alias_info,
3290 ));
3291 break;
3292 }
3293 }
3294 }
3295 }
3296 }
3297
3298 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
3299 else {
3300 return Ok(Expression::Select(sel));
3301 };
3302
3303 // Build the Snowflake subquery:
3304 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3305 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(seq, key, path, index, col_name, this))
3306
3307 // DATEDIFF(unit, start, end)
3308 let datediff = Expression::Function(Box::new(Function::new(
3309 "DATEDIFF".to_string(),
3310 vec![
3311 Expression::boxed_column(Column {
3312 name: Identifier::new(&unit_str),
3313 table: None,
3314 join_mark: false,
3315 trailing_comments: vec![],
3316 span: None,
3317 inferred_type: None,
3318 }),
3319 start_expr.clone(),
3320 end_expr.clone(),
3321 ],
3322 )));
3323 // (DATEDIFF(...) + 1 - 1) + 1
3324 let plus_one = Expression::Add(Box::new(BinaryOp {
3325 left: datediff,
3326 right: Expression::Literal(Literal::Number("1".to_string())),
3327 left_comments: vec![],
3328 operator_comments: vec![],
3329 trailing_comments: vec![],
3330 inferred_type: None,
3331 }));
3332 let minus_one = Expression::Sub(Box::new(BinaryOp {
3333 left: plus_one,
3334 right: Expression::Literal(Literal::Number("1".to_string())),
3335 left_comments: vec![],
3336 operator_comments: vec![],
3337 trailing_comments: vec![],
3338 inferred_type: None,
3339 }));
3340 let paren_inner = Expression::Paren(Box::new(Paren {
3341 this: minus_one,
3342 trailing_comments: vec![],
3343 }));
3344 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3345 left: paren_inner,
3346 right: Expression::Literal(Literal::Number("1".to_string())),
3347 left_comments: vec![],
3348 operator_comments: vec![],
3349 trailing_comments: vec![],
3350 inferred_type: None,
3351 }));
3352
3353 let array_gen_range = Expression::Function(Box::new(Function::new(
3354 "ARRAY_GENERATE_RANGE".to_string(),
3355 vec![
3356 Expression::Literal(Literal::Number("0".to_string())),
3357 outer_plus_one,
3358 ],
3359 )));
3360
3361 // TABLE(FLATTEN(INPUT => ...))
3362 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3363 name: Identifier::new("INPUT"),
3364 value: array_gen_range,
3365 separator: crate::expressions::NamedArgSeparator::DArrow,
3366 }));
3367 let flatten = Expression::Function(Box::new(Function::new(
3368 "FLATTEN".to_string(),
3369 vec![flatten_input],
3370 )));
3371
3372 // Determine alias name for the table: use outer alias or _t0
3373 let table_alias_name = outer_alias_info
3374 .as_ref()
3375 .map(|(name, _)| name.clone())
3376 .unwrap_or_else(|| "_t0".to_string());
3377
3378 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
3379 let table_func =
3380 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3381 let flatten_aliased = Expression::Alias(Box::new(Alias {
3382 this: table_func,
3383 alias: Identifier::new(&table_alias_name),
3384 column_aliases: vec![
3385 Identifier::new("seq"),
3386 Identifier::new("key"),
3387 Identifier::new("path"),
3388 Identifier::new("index"),
3389 Identifier::new(&col_name),
3390 Identifier::new("this"),
3391 ],
3392 pre_alias_comments: vec![],
3393 trailing_comments: vec![],
3394 inferred_type: None,
3395 }));
3396
3397 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3398 let dateadd_expr = Expression::Function(Box::new(Function::new(
3399 "DATEADD".to_string(),
3400 vec![
3401 Expression::boxed_column(Column {
3402 name: Identifier::new(&unit_str),
3403 table: None,
3404 join_mark: false,
3405 trailing_comments: vec![],
3406 span: None,
3407 inferred_type: None,
3408 }),
3409 Expression::Cast(Box::new(Cast {
3410 this: Expression::boxed_column(Column {
3411 name: Identifier::new(&col_name),
3412 table: None,
3413 join_mark: false,
3414 trailing_comments: vec![],
3415 span: None,
3416 inferred_type: None,
3417 }),
3418 to: DataType::Int {
3419 length: None,
3420 integer_spelling: false,
3421 },
3422 trailing_comments: vec![],
3423 double_colon_syntax: false,
3424 format: None,
3425 default: None,
3426 inferred_type: None,
3427 })),
3428 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
3429 start_expr.clone(),
3430 ],
3431 )));
3432 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3433 this: dateadd_expr,
3434 alias: Identifier::new(&col_name),
3435 column_aliases: vec![],
3436 pre_alias_comments: vec![],
3437 trailing_comments: vec![],
3438 inferred_type: None,
3439 }));
3440
3441 // Build inner SELECT
3442 let mut inner_select = Select::new();
3443 inner_select.expressions = vec![dateadd_aliased];
3444 inner_select.from = Some(From {
3445 expressions: vec![flatten_aliased],
3446 });
3447
3448 let inner_select_expr = Expression::Select(Box::new(inner_select));
3449 let subquery = Expression::Subquery(Box::new(Subquery {
3450 this: inner_select_expr,
3451 alias: None,
3452 column_aliases: vec![],
3453 order_by: None,
3454 limit: None,
3455 offset: None,
3456 distribute_by: None,
3457 sort_by: None,
3458 cluster_by: None,
3459 lateral: false,
3460 modifiers_inside: false,
3461 trailing_comments: vec![],
3462 inferred_type: None,
3463 }));
3464
3465 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
3466 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
3467 Expression::Alias(Box::new(Alias {
3468 this: subquery,
3469 alias: Identifier::new(&alias_name),
3470 column_aliases: col_aliases,
3471 pre_alias_comments: vec![],
3472 trailing_comments: vec![],
3473 inferred_type: None,
3474 }))
3475 } else {
3476 subquery
3477 };
3478
3479 // Replace the FROM expression
3480 if let Some(ref mut from) = sel.from {
3481 from.expressions[from_idx] = replacement;
3482 }
3483
3484 Ok(Expression::Select(sel))
3485 }
3486
3487 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
3488 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
3489 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (DATEDIFF(unit, start, end) + 1 - 1) + 1))) AS _t0(...))))
3490 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
3491 use crate::expressions::*;
3492
3493 let start_expr = f.args[0].clone();
3494 let end_expr = f.args[1].clone();
3495 let step = f.args.get(2).cloned();
3496 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
3497 let col_name = "value";
3498
3499 // Build the inner subquery: same as try_transform_from_gda_snowflake
3500 let datediff = Expression::Function(Box::new(Function::new(
3501 "DATEDIFF".to_string(),
3502 vec![
3503 Expression::boxed_column(Column {
3504 name: Identifier::new(&unit_str),
3505 table: None,
3506 join_mark: false,
3507 trailing_comments: vec![],
3508 span: None,
3509 inferred_type: None,
3510 }),
3511 start_expr.clone(),
3512 end_expr.clone(),
3513 ],
3514 )));
3515 let plus_one = Expression::Add(Box::new(BinaryOp {
3516 left: datediff,
3517 right: Expression::Literal(Literal::Number("1".to_string())),
3518 left_comments: vec![],
3519 operator_comments: vec![],
3520 trailing_comments: vec![],
3521 inferred_type: None,
3522 }));
3523 let minus_one = Expression::Sub(Box::new(BinaryOp {
3524 left: plus_one,
3525 right: Expression::Literal(Literal::Number("1".to_string())),
3526 left_comments: vec![],
3527 operator_comments: vec![],
3528 trailing_comments: vec![],
3529 inferred_type: None,
3530 }));
3531 let paren_inner = Expression::Paren(Box::new(Paren {
3532 this: minus_one,
3533 trailing_comments: vec![],
3534 }));
3535 let outer_plus_one = Expression::Add(Box::new(BinaryOp {
3536 left: paren_inner,
3537 right: Expression::Literal(Literal::Number("1".to_string())),
3538 left_comments: vec![],
3539 operator_comments: vec![],
3540 trailing_comments: vec![],
3541 inferred_type: None,
3542 }));
3543
3544 let array_gen_range = Expression::Function(Box::new(Function::new(
3545 "ARRAY_GENERATE_RANGE".to_string(),
3546 vec![
3547 Expression::Literal(Literal::Number("0".to_string())),
3548 outer_plus_one,
3549 ],
3550 )));
3551
3552 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3553 name: Identifier::new("INPUT"),
3554 value: array_gen_range,
3555 separator: crate::expressions::NamedArgSeparator::DArrow,
3556 }));
3557 let flatten = Expression::Function(Box::new(Function::new(
3558 "FLATTEN".to_string(),
3559 vec![flatten_input],
3560 )));
3561
3562 let table_func =
3563 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3564 let flatten_aliased = Expression::Alias(Box::new(Alias {
3565 this: table_func,
3566 alias: Identifier::new("_t0"),
3567 column_aliases: vec![
3568 Identifier::new("seq"),
3569 Identifier::new("key"),
3570 Identifier::new("path"),
3571 Identifier::new("index"),
3572 Identifier::new(col_name),
3573 Identifier::new("this"),
3574 ],
3575 pre_alias_comments: vec![],
3576 trailing_comments: vec![],
3577 inferred_type: None,
3578 }));
3579
3580 let dateadd_expr = Expression::Function(Box::new(Function::new(
3581 "DATEADD".to_string(),
3582 vec![
3583 Expression::boxed_column(Column {
3584 name: Identifier::new(&unit_str),
3585 table: None,
3586 join_mark: false,
3587 trailing_comments: vec![],
3588 span: None,
3589 inferred_type: None,
3590 }),
3591 Expression::Cast(Box::new(Cast {
3592 this: Expression::boxed_column(Column {
3593 name: Identifier::new(col_name),
3594 table: None,
3595 join_mark: false,
3596 trailing_comments: vec![],
3597 span: None,
3598 inferred_type: None,
3599 }),
3600 to: DataType::Int {
3601 length: None,
3602 integer_spelling: false,
3603 },
3604 trailing_comments: vec![],
3605 double_colon_syntax: false,
3606 format: None,
3607 default: None,
3608 inferred_type: None,
3609 })),
3610 start_expr.clone(),
3611 ],
3612 )));
3613 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3614 this: dateadd_expr,
3615 alias: Identifier::new(col_name),
3616 column_aliases: vec![],
3617 pre_alias_comments: vec![],
3618 trailing_comments: vec![],
3619 inferred_type: None,
3620 }));
3621
3622 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
3623 let mut inner_select = Select::new();
3624 inner_select.expressions = vec![dateadd_aliased];
3625 inner_select.from = Some(From {
3626 expressions: vec![flatten_aliased],
3627 });
3628
3629 // Wrap in subquery for the inner part
3630 let inner_subquery = Expression::Subquery(Box::new(Subquery {
3631 this: Expression::Select(Box::new(inner_select)),
3632 alias: None,
3633 column_aliases: vec![],
3634 order_by: None,
3635 limit: None,
3636 offset: None,
3637 distribute_by: None,
3638 sort_by: None,
3639 cluster_by: None,
3640 lateral: false,
3641 modifiers_inside: false,
3642 trailing_comments: vec![],
3643 inferred_type: None,
3644 }));
3645
3646 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
3647 let star = Expression::Star(Star {
3648 table: None,
3649 except: None,
3650 replace: None,
3651 rename: None,
3652 trailing_comments: vec![],
3653 span: None,
3654 });
3655 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
3656 this: star,
3657 distinct: false,
3658 filter: None,
3659 order_by: vec![],
3660 name: Some("ARRAY_AGG".to_string()),
3661 ignore_nulls: None,
3662 having_max: None,
3663 limit: None,
3664 inferred_type: None,
3665 }));
3666
3667 let mut outer_select = Select::new();
3668 outer_select.expressions = vec![array_agg];
3669 outer_select.from = Some(From {
3670 expressions: vec![inner_subquery],
3671 });
3672
3673 // Wrap in a subquery
3674 let outer_subquery = Expression::Subquery(Box::new(Subquery {
3675 this: Expression::Select(Box::new(outer_select)),
3676 alias: None,
3677 column_aliases: vec![],
3678 order_by: None,
3679 limit: None,
3680 offset: None,
3681 distribute_by: None,
3682 sort_by: None,
3683 cluster_by: None,
3684 lateral: false,
3685 modifiers_inside: false,
3686 trailing_comments: vec![],
3687 inferred_type: None,
3688 }));
3689
3690 // ARRAY_SIZE(subquery)
3691 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
3692 outer_subquery,
3693 ))))
3694 }
3695
3696 /// Extract interval unit string from an optional step expression.
3697 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
3698 use crate::expressions::*;
3699 if let Some(Expression::Interval(ref iv)) = step {
3700 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3701 return Some(format!("{:?}", unit).to_ascii_uppercase());
3702 }
3703 if let Some(ref this) = iv.this {
3704 if let Expression::Literal(Literal::String(ref s)) = this {
3705 let parts: Vec<&str> = s.split_whitespace().collect();
3706 if parts.len() == 2 {
3707 return Some(parts[1].to_ascii_uppercase());
3708 } else if parts.len() == 1 {
3709 let upper = parts[0].to_ascii_uppercase();
3710 if matches!(
3711 upper.as_str(),
3712 "YEAR"
3713 | "QUARTER"
3714 | "MONTH"
3715 | "WEEK"
3716 | "DAY"
3717 | "HOUR"
3718 | "MINUTE"
3719 | "SECOND"
3720 ) {
3721 return Some(upper);
3722 }
3723 }
3724 }
3725 }
3726 }
3727 // Default to DAY if no step or no interval
3728 if step.is_none() {
3729 return Some("DAY".to_string());
3730 }
3731 None
3732 }
3733
3734 fn normalize_snowflake_pretty(mut sql: String) -> String {
3735 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
3736 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
3737 {
3738 sql = sql.replace(
3739 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
3740 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
3741 );
3742
3743 sql = sql.replace(
3744 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
3745 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
3746 );
3747
3748 sql = sql.replace(
3749 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
3750 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
3751 );
3752 }
3753
3754 sql
3755 }
3756
3757 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
3758 /// This handles cases where the same syntax has different semantics across dialects.
3759 fn cross_dialect_normalize(
3760 expr: Expression,
3761 source: DialectType,
3762 target: DialectType,
3763 ) -> Result<Expression> {
3764 use crate::expressions::{
3765 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
3766 Function, Identifier, IsNull, Literal, Null, Paren,
3767 };
3768
3769 // Helper to tag which kind of transform to apply
3770 #[derive(Debug)]
3771 enum Action {
3772 None,
3773 GreatestLeastNull,
3774 ArrayGenerateRange,
3775 Div0TypedDivision,
3776 ArrayAggCollectList,
3777 ArrayAggWithinGroupFilter,
3778 ArrayAggFilter,
3779 CastTimestampToDatetime,
3780 DateTruncWrapCast,
3781 ToDateToCast,
3782 ConvertTimezoneToExpr,
3783 SetToVariable,
3784 RegexpReplaceSnowflakeToDuckDB,
3785 BigQueryFunctionNormalize,
3786 BigQuerySafeDivide,
3787 BigQueryCastType,
3788 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
3789 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
3790 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
3791 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
3792 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
3793 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
3794 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3795 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
3796 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target (partial match)
3797 EpochConvert, // Expression::Epoch -> target-specific epoch function
3798 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
3799 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
3800 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
3801 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
3802 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
3803 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
3804 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
3805 TempTableHash, // TSQL #table -> temp table normalization
3806 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
3807 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
3808 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
3809 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
3810 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
3811 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
3812 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3813 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3814 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
3815 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
3816 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
3817 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
3818 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
3819 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
3820 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
3821 ArrayAggToGroupConcat, // ARRAY_AGG(x) -> GROUP_CONCAT(x) for MySQL-like targets
3822 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
3823 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
3824 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
3825 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
3826 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
3827 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
3828 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
3829 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
3830 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
3831 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
3832 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
3833 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
3834 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
3835 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
3836 DollarParamConvert, // $foo -> @foo for BigQuery
3837 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
3838 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
3839 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
3840 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
3841 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
3842 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
3843 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
3844 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
3845 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
3846 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
3847 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
3848 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
3849 RespectNullsConvert, // RESPECT NULLS window function handling
3850 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
3851 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
3852 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
3853 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
3854 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
3855 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
3856 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
3857 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
3858 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
3859 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
3860 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
3861 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
3862 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
3863 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
3864 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
3865 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
3866 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
3867 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
3868 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
3869 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
3870 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
3871 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
3872 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
3873 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
3874 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
3875 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
3876 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
3877 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
3878 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
3879 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
3880 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3881 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
3882 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
3883 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
3884 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
3885 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
3886 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
3887 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
3888 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
3889 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
3890 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
3891 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
3892 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
3893 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
3894 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
3895 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
3896 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
3897 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
3898 DecodeSimplify, // DECODE with null-safe -> simple = comparison
3899 ArraySumConvert, // ARRAY_SUM -> target-specific
3900 ArraySizeConvert, // ARRAY_SIZE -> target-specific
3901 ArrayAnyConvert, // ARRAY_ANY -> target-specific
3902 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
3903 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
3904 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
3905 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
3906 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
3907 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
3908 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
3909 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
3910 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
3911 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
3912 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
3913 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
3914 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
3915 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
3916 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
3917 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
3918 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
3919 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
3920 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
3921 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
3922 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
3923 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
3924 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
3925 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
3926 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
3927 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
3928 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
3929 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
3930 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
3931 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
3932 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
3933 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
3934 RegexpSubstrSnowflakeToDuckDB, // REGEXP_SUBSTR(s, p, ...) -> REGEXP_EXTRACT variants for DuckDB
3935 RegexpSubstrSnowflakeIdentity, // REGEXP_SUBSTR/REGEXP_SUBSTR_ALL strip trailing group=0 for Snowflake identity
3936 RegexpSubstrAllSnowflakeToDuckDB, // REGEXP_SUBSTR_ALL(s, p, ...) -> REGEXP_EXTRACT_ALL variants for DuckDB
3937 RegexpCountSnowflakeToDuckDB, // REGEXP_COUNT(s, p, ...) -> LENGTH(REGEXP_EXTRACT_ALL(...)) for DuckDB
3938 RegexpInstrSnowflakeToDuckDB, // REGEXP_INSTR(s, p, ...) -> complex CASE expression for DuckDB
3939 RegexpReplacePositionSnowflakeToDuckDB, // REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
3940 RlikeSnowflakeToDuckDB, // RLIKE(a, b[, flags]) -> REGEXP_MATCHES(a, anchored_pattern) for DuckDB
3941 RegexpExtractAllToSnowflake, // BigQuery REGEXP_EXTRACT_ALL -> REGEXP_SUBSTR_ALL for Snowflake
3942 ArrayExceptConvert, // ARRAY_EXCEPT -> DuckDB complex CASE / Snowflake ARRAY_EXCEPT / Presto ARRAY_EXCEPT
3943 ArrayDistinctConvert, // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
3944 ArrayContainsDuckDBConvert, // ARRAY_CONTAINS -> DuckDB CASE with NULL-aware check
3945 }
3946
3947 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
3948 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
3949 Self::transform_select_into(expr, source, target)
3950 } else {
3951 expr
3952 };
3953
3954 // Strip OFFSET ROWS for non-TSQL/Oracle targets
3955 let expr = if !matches!(
3956 target,
3957 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
3958 ) {
3959 if let Expression::Select(mut select) = expr {
3960 if let Some(ref mut offset) = select.offset {
3961 offset.rows = None;
3962 }
3963 Expression::Select(select)
3964 } else {
3965 expr
3966 }
3967 } else {
3968 expr
3969 };
3970
3971 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
3972 let expr = if matches!(target, DialectType::Oracle) {
3973 if let Expression::Select(mut select) = expr {
3974 if let Some(limit) = select.limit.take() {
3975 // Convert LIMIT to FETCH FIRST n ROWS ONLY
3976 select.fetch = Some(crate::expressions::Fetch {
3977 direction: "FIRST".to_string(),
3978 count: Some(limit.this),
3979 percent: false,
3980 rows: true,
3981 with_ties: false,
3982 });
3983 }
3984 // Add ROWS to OFFSET if present
3985 if let Some(ref mut offset) = select.offset {
3986 offset.rows = Some(true);
3987 }
3988 Expression::Select(select)
3989 } else {
3990 expr
3991 }
3992 } else {
3993 expr
3994 };
3995
3996 // Handle CreateTable WITH properties transformation before recursive transforms
3997 let expr = if let Expression::CreateTable(mut ct) = expr {
3998 Self::transform_create_table_properties(&mut ct, source, target);
3999
4000 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
4001 // When the PARTITIONED BY clause contains column definitions, merge them into the
4002 // main column list and adjust the PARTITIONED BY clause for the target dialect.
4003 if matches!(
4004 source,
4005 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4006 ) {
4007 let mut partition_col_names: Vec<String> = Vec::new();
4008 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
4009 let mut has_col_def_partitions = false;
4010
4011 // Check if any PARTITIONED BY property contains ColumnDef expressions
4012 for prop in &ct.properties {
4013 if let Expression::PartitionedByProperty(ref pbp) = prop {
4014 if let Expression::Tuple(ref tuple) = *pbp.this {
4015 for expr in &tuple.expressions {
4016 if let Expression::ColumnDef(ref cd) = expr {
4017 has_col_def_partitions = true;
4018 partition_col_names.push(cd.name.name.clone());
4019 partition_col_defs.push(*cd.clone());
4020 }
4021 }
4022 }
4023 }
4024 }
4025
4026 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
4027 // Merge partition columns into main column list
4028 for cd in partition_col_defs {
4029 ct.columns.push(cd);
4030 }
4031
4032 // Replace PARTITIONED BY property with column-name-only version
4033 ct.properties
4034 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
4035
4036 if matches!(
4037 target,
4038 DialectType::Presto | DialectType::Trino | DialectType::Athena
4039 ) {
4040 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
4041 let array_elements: Vec<String> = partition_col_names
4042 .iter()
4043 .map(|n| format!("'{}'", n))
4044 .collect();
4045 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
4046 ct.with_properties
4047 .push(("PARTITIONED_BY".to_string(), array_value));
4048 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
4049 // Spark: PARTITIONED BY (y, z) - just column names
4050 let name_exprs: Vec<Expression> = partition_col_names
4051 .iter()
4052 .map(|n| {
4053 Expression::Column(Box::new(crate::expressions::Column {
4054 name: crate::expressions::Identifier::new(n.clone()),
4055 table: None,
4056 join_mark: false,
4057 trailing_comments: Vec::new(),
4058 span: None,
4059 inferred_type: None,
4060 }))
4061 })
4062 .collect();
4063 ct.properties.insert(
4064 0,
4065 Expression::PartitionedByProperty(Box::new(
4066 crate::expressions::PartitionedByProperty {
4067 this: Box::new(Expression::Tuple(Box::new(
4068 crate::expressions::Tuple {
4069 expressions: name_exprs,
4070 },
4071 ))),
4072 },
4073 )),
4074 );
4075 }
4076 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
4077 }
4078
4079 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
4080 // are handled by transform_create_table_properties which runs first
4081 }
4082
4083 // Strip LOCATION property for Presto/Trino (not supported)
4084 if matches!(
4085 target,
4086 DialectType::Presto | DialectType::Trino | DialectType::Athena
4087 ) {
4088 ct.properties
4089 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
4090 }
4091
4092 // Strip table-level constraints for Spark/Hive/Databricks
4093 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
4094 if matches!(
4095 target,
4096 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4097 ) {
4098 ct.constraints.retain(|c| {
4099 matches!(
4100 c,
4101 crate::expressions::TableConstraint::PrimaryKey { .. }
4102 | crate::expressions::TableConstraint::Like { .. }
4103 )
4104 });
4105 for constraint in &mut ct.constraints {
4106 if let crate::expressions::TableConstraint::PrimaryKey {
4107 columns,
4108 modifiers,
4109 ..
4110 } = constraint
4111 {
4112 // Strip ASC/DESC from column names
4113 for col in columns.iter_mut() {
4114 if col.name.ends_with(" ASC") {
4115 col.name = col.name[..col.name.len() - 4].to_string();
4116 } else if col.name.ends_with(" DESC") {
4117 col.name = col.name[..col.name.len() - 5].to_string();
4118 }
4119 }
4120 // Strip TSQL-specific modifiers
4121 modifiers.clustered = None;
4122 modifiers.with_options.clear();
4123 modifiers.on_filegroup = None;
4124 }
4125 }
4126 }
4127
4128 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
4129 if matches!(target, DialectType::Databricks) {
4130 for col in &mut ct.columns {
4131 if col.auto_increment {
4132 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
4133 col.data_type = crate::expressions::DataType::BigInt { length: None };
4134 }
4135 }
4136 }
4137 }
4138
4139 // Spark/Databricks: INTEGER -> INT in column definitions
4140 // Python sqlglot always outputs INT for Spark/Databricks
4141 if matches!(target, DialectType::Spark | DialectType::Databricks) {
4142 for col in &mut ct.columns {
4143 if let crate::expressions::DataType::Int {
4144 integer_spelling, ..
4145 } = &mut col.data_type
4146 {
4147 *integer_spelling = false;
4148 }
4149 }
4150 }
4151
4152 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
4153 if matches!(target, DialectType::Hive | DialectType::Spark) {
4154 for col in &mut ct.columns {
4155 // If nullable is explicitly true (NULL), change to None (omit it)
4156 if col.nullable == Some(true) {
4157 col.nullable = None;
4158 }
4159 // Also remove from constraints if stored there
4160 col.constraints
4161 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
4162 }
4163 }
4164
4165 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
4166 if ct.on_property.is_some()
4167 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4168 {
4169 ct.on_property = None;
4170 }
4171
4172 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
4173 // Snowflake doesn't support typed arrays in DDL
4174 if matches!(target, DialectType::Snowflake) {
4175 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
4176 if let crate::expressions::DataType::Array { .. } = dt {
4177 *dt = crate::expressions::DataType::Custom {
4178 name: "ARRAY".to_string(),
4179 };
4180 }
4181 }
4182 for col in &mut ct.columns {
4183 strip_array_type_params(&mut col.data_type);
4184 }
4185 }
4186
4187 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
4188 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
4189 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
4190 if matches!(target, DialectType::PostgreSQL) {
4191 for col in &mut ct.columns {
4192 if col.auto_increment && !col.constraint_order.is_empty() {
4193 use crate::expressions::ConstraintType;
4194 let has_explicit_not_null = col
4195 .constraint_order
4196 .iter()
4197 .any(|ct| *ct == ConstraintType::NotNull);
4198
4199 if has_explicit_not_null {
4200 // Source had explicit NOT NULL - preserve original order
4201 // Just ensure nullable is set
4202 if col.nullable != Some(false) {
4203 col.nullable = Some(false);
4204 }
4205 } else {
4206 // Source didn't have explicit NOT NULL - build order with
4207 // AutoIncrement + NotNull first, then remaining constraints
4208 let mut new_order = Vec::new();
4209 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
4210 new_order.push(ConstraintType::AutoIncrement);
4211 new_order.push(ConstraintType::NotNull);
4212 // Add remaining constraints in original order (except AutoIncrement)
4213 for ct_type in &col.constraint_order {
4214 if *ct_type != ConstraintType::AutoIncrement {
4215 new_order.push(ct_type.clone());
4216 }
4217 }
4218 col.constraint_order = new_order;
4219 col.nullable = Some(false);
4220 }
4221 }
4222 }
4223 }
4224
4225 Expression::CreateTable(ct)
4226 } else {
4227 expr
4228 };
4229
4230 // Handle CreateView column stripping for Presto/Trino target
4231 let expr = if let Expression::CreateView(mut cv) = expr {
4232 // Presto/Trino: drop column list when view has a SELECT body
4233 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
4234 {
4235 if !matches!(&cv.query, Expression::Null(_)) {
4236 cv.columns.clear();
4237 }
4238 }
4239 Expression::CreateView(cv)
4240 } else {
4241 expr
4242 };
4243
4244 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
4245 let expr = if !matches!(
4246 target,
4247 DialectType::Presto | DialectType::Trino | DialectType::Athena
4248 ) {
4249 if let Expression::Select(mut select) = expr {
4250 if let Some(ref mut with) = select.with {
4251 for cte in &mut with.ctes {
4252 if let Expression::Values(ref vals) = cte.this {
4253 // Build: SELECT * FROM (VALUES ...) AS _values
4254 let values_subquery =
4255 Expression::Subquery(Box::new(crate::expressions::Subquery {
4256 this: Expression::Values(vals.clone()),
4257 alias: Some(Identifier::new("_values".to_string())),
4258 column_aliases: Vec::new(),
4259 order_by: None,
4260 limit: None,
4261 offset: None,
4262 distribute_by: None,
4263 sort_by: None,
4264 cluster_by: None,
4265 lateral: false,
4266 modifiers_inside: false,
4267 trailing_comments: Vec::new(),
4268 inferred_type: None,
4269 }));
4270 let mut new_select = crate::expressions::Select::new();
4271 new_select.expressions =
4272 vec![Expression::Star(crate::expressions::Star {
4273 table: None,
4274 except: None,
4275 replace: None,
4276 rename: None,
4277 trailing_comments: Vec::new(),
4278 span: None,
4279 })];
4280 new_select.from = Some(crate::expressions::From {
4281 expressions: vec![values_subquery],
4282 });
4283 cte.this = Expression::Select(Box::new(new_select));
4284 }
4285 }
4286 }
4287 Expression::Select(select)
4288 } else {
4289 expr
4290 }
4291 } else {
4292 expr
4293 };
4294
4295 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
4296 let expr = if matches!(target, DialectType::PostgreSQL) {
4297 if let Expression::CreateIndex(mut ci) = expr {
4298 for col in &mut ci.columns {
4299 if col.nulls_first.is_none() {
4300 col.nulls_first = Some(true);
4301 }
4302 }
4303 Expression::CreateIndex(ci)
4304 } else {
4305 expr
4306 }
4307 } else {
4308 expr
4309 };
4310
4311 transform_recursive(expr, &|e| {
4312 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
4313 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
4314 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4315 if let Expression::Cast(ref c) = e {
4316 // Check if this is a CAST of an array to a struct array type
4317 let is_struct_array_cast =
4318 matches!(&c.to, crate::expressions::DataType::Array { .. });
4319 if is_struct_array_cast {
4320 let has_auto_named_structs = match &c.this {
4321 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
4322 if let Expression::Struct(s) = elem {
4323 s.fields.iter().all(|(name, _)| {
4324 name.as_ref().map_or(true, |n| {
4325 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4326 })
4327 })
4328 } else {
4329 false
4330 }
4331 }),
4332 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
4333 if let Expression::Struct(s) = elem {
4334 s.fields.iter().all(|(name, _)| {
4335 name.as_ref().map_or(true, |n| {
4336 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4337 })
4338 })
4339 } else {
4340 false
4341 }
4342 }),
4343 _ => false,
4344 };
4345 if has_auto_named_structs {
4346 let convert_struct_to_row = |elem: Expression| -> Expression {
4347 if let Expression::Struct(s) = elem {
4348 let row_args: Vec<Expression> =
4349 s.fields.into_iter().map(|(_, v)| v).collect();
4350 Expression::Function(Box::new(Function::new(
4351 "ROW".to_string(),
4352 row_args,
4353 )))
4354 } else {
4355 elem
4356 }
4357 };
4358 let mut c_clone = c.as_ref().clone();
4359 match &mut c_clone.this {
4360 Expression::Array(arr) => {
4361 arr.expressions = arr
4362 .expressions
4363 .drain(..)
4364 .map(convert_struct_to_row)
4365 .collect();
4366 }
4367 Expression::ArrayFunc(arr) => {
4368 arr.expressions = arr
4369 .expressions
4370 .drain(..)
4371 .map(convert_struct_to_row)
4372 .collect();
4373 }
4374 _ => {}
4375 }
4376 return Ok(Expression::Cast(Box::new(c_clone)));
4377 }
4378 }
4379 }
4380 }
4381
4382 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
4383 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4384 if let Expression::Select(ref sel) = e {
4385 if sel.kind.as_deref() == Some("STRUCT") {
4386 let mut fields = Vec::new();
4387 for expr in &sel.expressions {
4388 match expr {
4389 Expression::Alias(a) => {
4390 fields.push((Some(a.alias.name.clone()), a.this.clone()));
4391 }
4392 Expression::Column(c) => {
4393 fields.push((Some(c.name.name.clone()), expr.clone()));
4394 }
4395 _ => {
4396 fields.push((None, expr.clone()));
4397 }
4398 }
4399 }
4400 let struct_lit =
4401 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
4402 let mut new_select = sel.as_ref().clone();
4403 new_select.kind = None;
4404 new_select.expressions = vec![struct_lit];
4405 return Ok(Expression::Select(Box::new(new_select)));
4406 }
4407 }
4408 }
4409
4410 // Convert @variable -> ${variable} for Spark/Hive/Databricks
4411 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4412 && matches!(
4413 target,
4414 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4415 )
4416 {
4417 if let Expression::Parameter(ref p) = e {
4418 if p.style == crate::expressions::ParameterStyle::At {
4419 if let Some(ref name) = p.name {
4420 return Ok(Expression::Parameter(Box::new(
4421 crate::expressions::Parameter {
4422 name: Some(name.clone()),
4423 index: p.index,
4424 style: crate::expressions::ParameterStyle::DollarBrace,
4425 quoted: p.quoted,
4426 string_quoted: p.string_quoted,
4427 expression: None,
4428 },
4429 )));
4430 }
4431 }
4432 }
4433 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
4434 if let Expression::Column(ref col) = e {
4435 if col.name.name.starts_with('@') && col.table.is_none() {
4436 let var_name = col.name.name.trim_start_matches('@').to_string();
4437 return Ok(Expression::Parameter(Box::new(
4438 crate::expressions::Parameter {
4439 name: Some(var_name),
4440 index: None,
4441 style: crate::expressions::ParameterStyle::DollarBrace,
4442 quoted: false,
4443 string_quoted: false,
4444 expression: None,
4445 },
4446 )));
4447 }
4448 }
4449 }
4450
4451 // Convert @variable -> variable in SET statements for Spark/Databricks
4452 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4453 && matches!(target, DialectType::Spark | DialectType::Databricks)
4454 {
4455 if let Expression::SetStatement(ref s) = e {
4456 let mut new_items = s.items.clone();
4457 let mut changed = false;
4458 for item in &mut new_items {
4459 // Strip @ from the SET name (Parameter style)
4460 if let Expression::Parameter(ref p) = item.name {
4461 if p.style == crate::expressions::ParameterStyle::At {
4462 if let Some(ref name) = p.name {
4463 item.name = Expression::Identifier(Identifier::new(name));
4464 changed = true;
4465 }
4466 }
4467 }
4468 // Strip @ from the SET name (Identifier style - SET parser)
4469 if let Expression::Identifier(ref id) = item.name {
4470 if id.name.starts_with('@') {
4471 let var_name = id.name.trim_start_matches('@').to_string();
4472 item.name = Expression::Identifier(Identifier::new(&var_name));
4473 changed = true;
4474 }
4475 }
4476 // Strip @ from the SET name (Column style - alternative parsing)
4477 if let Expression::Column(ref col) = item.name {
4478 if col.name.name.starts_with('@') && col.table.is_none() {
4479 let var_name = col.name.name.trim_start_matches('@').to_string();
4480 item.name = Expression::Identifier(Identifier::new(&var_name));
4481 changed = true;
4482 }
4483 }
4484 }
4485 if changed {
4486 let mut new_set = (**s).clone();
4487 new_set.items = new_items;
4488 return Ok(Expression::SetStatement(Box::new(new_set)));
4489 }
4490 }
4491 }
4492
4493 // Strip NOLOCK hint for non-TSQL targets
4494 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4495 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4496 {
4497 if let Expression::Table(ref tr) = e {
4498 if !tr.hints.is_empty() {
4499 let mut new_tr = tr.clone();
4500 new_tr.hints.clear();
4501 return Ok(Expression::Table(new_tr));
4502 }
4503 }
4504 }
4505
4506 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
4507 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
4508 if matches!(target, DialectType::Snowflake) {
4509 if let Expression::IsTrue(ref itf) = e {
4510 if let Expression::Boolean(ref b) = itf.this {
4511 if !itf.not {
4512 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4513 value: b.value,
4514 }));
4515 } else {
4516 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4517 value: !b.value,
4518 }));
4519 }
4520 }
4521 }
4522 if let Expression::IsFalse(ref itf) = e {
4523 if let Expression::Boolean(ref b) = itf.this {
4524 if !itf.not {
4525 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4526 value: !b.value,
4527 }));
4528 } else {
4529 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4530 value: b.value,
4531 }));
4532 }
4533 }
4534 }
4535 }
4536
4537 // BigQuery: split dotted backtick identifiers in table names
4538 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
4539 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4540 if let Expression::CreateTable(ref ct) = e {
4541 let mut changed = false;
4542 let mut new_ct = ct.clone();
4543 // Split the table name
4544 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
4545 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
4546 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
4547 let was_quoted = ct.name.name.quoted;
4548 let mk_id = |s: &str| {
4549 if was_quoted {
4550 Identifier::quoted(s)
4551 } else {
4552 Identifier::new(s)
4553 }
4554 };
4555 if parts.len() == 3 {
4556 new_ct.name.catalog = Some(mk_id(parts[0]));
4557 new_ct.name.schema = Some(mk_id(parts[1]));
4558 new_ct.name.name = mk_id(parts[2]);
4559 changed = true;
4560 } else if parts.len() == 2 {
4561 new_ct.name.schema = Some(mk_id(parts[0]));
4562 new_ct.name.name = mk_id(parts[1]);
4563 changed = true;
4564 }
4565 }
4566 // Split the clone source name
4567 if let Some(ref clone_src) = ct.clone_source {
4568 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
4569 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
4570 let was_quoted = clone_src.name.quoted;
4571 let mk_id = |s: &str| {
4572 if was_quoted {
4573 Identifier::quoted(s)
4574 } else {
4575 Identifier::new(s)
4576 }
4577 };
4578 let mut new_src = clone_src.clone();
4579 if parts.len() == 3 {
4580 new_src.catalog = Some(mk_id(parts[0]));
4581 new_src.schema = Some(mk_id(parts[1]));
4582 new_src.name = mk_id(parts[2]);
4583 new_ct.clone_source = Some(new_src);
4584 changed = true;
4585 } else if parts.len() == 2 {
4586 new_src.schema = Some(mk_id(parts[0]));
4587 new_src.name = mk_id(parts[1]);
4588 new_ct.clone_source = Some(new_src);
4589 changed = true;
4590 }
4591 }
4592 }
4593 if changed {
4594 return Ok(Expression::CreateTable(new_ct));
4595 }
4596 }
4597 }
4598
4599 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
4600 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
4601 if matches!(source, DialectType::BigQuery)
4602 && matches!(
4603 target,
4604 DialectType::DuckDB
4605 | DialectType::Presto
4606 | DialectType::Trino
4607 | DialectType::Athena
4608 )
4609 {
4610 if let Expression::Subscript(ref sub) = e {
4611 let (new_index, is_safe) = match &sub.index {
4612 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
4613 Expression::Literal(Literal::Number(n)) => {
4614 if let Ok(val) = n.parse::<i64>() {
4615 (
4616 Some(Expression::Literal(Literal::Number(
4617 (val + 1).to_string(),
4618 ))),
4619 false,
4620 )
4621 } else {
4622 (None, false)
4623 }
4624 }
4625 // OFFSET(n) -> n+1 (0-based)
4626 Expression::Function(ref f)
4627 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
4628 {
4629 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4630 if let Ok(val) = n.parse::<i64>() {
4631 (
4632 Some(Expression::Literal(Literal::Number(
4633 (val + 1).to_string(),
4634 ))),
4635 false,
4636 )
4637 } else {
4638 (
4639 Some(Expression::Add(Box::new(
4640 crate::expressions::BinaryOp::new(
4641 f.args[0].clone(),
4642 Expression::number(1),
4643 ),
4644 ))),
4645 false,
4646 )
4647 }
4648 } else {
4649 (
4650 Some(Expression::Add(Box::new(
4651 crate::expressions::BinaryOp::new(
4652 f.args[0].clone(),
4653 Expression::number(1),
4654 ),
4655 ))),
4656 false,
4657 )
4658 }
4659 }
4660 // ORDINAL(n) -> n (already 1-based)
4661 Expression::Function(ref f)
4662 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
4663 {
4664 (Some(f.args[0].clone()), false)
4665 }
4666 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
4667 Expression::Function(ref f)
4668 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
4669 {
4670 if let Expression::Literal(Literal::Number(n)) = &f.args[0] {
4671 if let Ok(val) = n.parse::<i64>() {
4672 (
4673 Some(Expression::Literal(Literal::Number(
4674 (val + 1).to_string(),
4675 ))),
4676 true,
4677 )
4678 } else {
4679 (
4680 Some(Expression::Add(Box::new(
4681 crate::expressions::BinaryOp::new(
4682 f.args[0].clone(),
4683 Expression::number(1),
4684 ),
4685 ))),
4686 true,
4687 )
4688 }
4689 } else {
4690 (
4691 Some(Expression::Add(Box::new(
4692 crate::expressions::BinaryOp::new(
4693 f.args[0].clone(),
4694 Expression::number(1),
4695 ),
4696 ))),
4697 true,
4698 )
4699 }
4700 }
4701 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
4702 Expression::Function(ref f)
4703 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
4704 {
4705 (Some(f.args[0].clone()), true)
4706 }
4707 _ => (None, false),
4708 };
4709 if let Some(idx) = new_index {
4710 if is_safe
4711 && matches!(
4712 target,
4713 DialectType::Presto | DialectType::Trino | DialectType::Athena
4714 )
4715 {
4716 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
4717 return Ok(Expression::Function(Box::new(Function::new(
4718 "ELEMENT_AT".to_string(),
4719 vec![sub.this.clone(), idx],
4720 ))));
4721 } else {
4722 // DuckDB or non-safe: just use subscript with converted index
4723 return Ok(Expression::Subscript(Box::new(
4724 crate::expressions::Subscript {
4725 this: sub.this.clone(),
4726 index: idx,
4727 },
4728 )));
4729 }
4730 }
4731 }
4732 }
4733
4734 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
4735 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4736 if let Expression::Length(ref uf) = e {
4737 let arg = uf.this.clone();
4738 let typeof_func = Expression::Function(Box::new(Function::new(
4739 "TYPEOF".to_string(),
4740 vec![arg.clone()],
4741 )));
4742 let blob_cast = Expression::Cast(Box::new(Cast {
4743 this: arg.clone(),
4744 to: DataType::VarBinary { length: None },
4745 trailing_comments: vec![],
4746 double_colon_syntax: false,
4747 format: None,
4748 default: None,
4749 inferred_type: None,
4750 }));
4751 let octet_length = Expression::Function(Box::new(Function::new(
4752 "OCTET_LENGTH".to_string(),
4753 vec![blob_cast],
4754 )));
4755 let text_cast = Expression::Cast(Box::new(Cast {
4756 this: arg,
4757 to: DataType::Text,
4758 trailing_comments: vec![],
4759 double_colon_syntax: false,
4760 format: None,
4761 default: None,
4762 inferred_type: None,
4763 }));
4764 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
4765 this: text_cast,
4766 original_name: None,
4767 inferred_type: None,
4768 }));
4769 return Ok(Expression::Case(Box::new(Case {
4770 operand: Some(typeof_func),
4771 whens: vec![(
4772 Expression::Literal(Literal::String("BLOB".to_string())),
4773 octet_length,
4774 )],
4775 else_: Some(length_text),
4776 comments: Vec::new(),
4777 inferred_type: None,
4778 })));
4779 }
4780 }
4781
4782 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
4783 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
4784 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
4785 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
4786 if let Expression::Alias(ref a) = e {
4787 if matches!(&a.this, Expression::Unnest(_)) {
4788 if a.column_aliases.is_empty() {
4789 // Drop the entire alias, return just the UNNEST expression
4790 return Ok(a.this.clone());
4791 } else {
4792 // Use first column alias as the main alias
4793 let mut new_alias = a.as_ref().clone();
4794 new_alias.alias = a.column_aliases[0].clone();
4795 new_alias.column_aliases.clear();
4796 return Ok(Expression::Alias(Box::new(new_alias)));
4797 }
4798 }
4799 }
4800 }
4801
4802 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
4803 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4804 if let Expression::In(ref in_expr) = e {
4805 if let Some(ref unnest_inner) = in_expr.unnest {
4806 // Build the function call for the target dialect
4807 let func_expr = if matches!(
4808 target,
4809 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4810 ) {
4811 // Use EXPLODE for Hive/Spark
4812 Expression::Function(Box::new(Function::new(
4813 "EXPLODE".to_string(),
4814 vec![*unnest_inner.clone()],
4815 )))
4816 } else {
4817 // Use UNNEST for Presto/Trino/DuckDB/etc.
4818 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
4819 this: *unnest_inner.clone(),
4820 expressions: Vec::new(),
4821 with_ordinality: false,
4822 alias: None,
4823 offset_alias: None,
4824 }))
4825 };
4826
4827 // Wrap in SELECT
4828 let mut inner_select = crate::expressions::Select::new();
4829 inner_select.expressions = vec![func_expr];
4830
4831 let subquery_expr = Expression::Select(Box::new(inner_select));
4832
4833 return Ok(Expression::In(Box::new(crate::expressions::In {
4834 this: in_expr.this.clone(),
4835 expressions: Vec::new(),
4836 query: Some(subquery_expr),
4837 not: in_expr.not,
4838 global: in_expr.global,
4839 unnest: None,
4840 is_field: false,
4841 })));
4842 }
4843 }
4844 }
4845
4846 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
4847 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
4848 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
4849 if let Expression::Alias(ref a) = e {
4850 if let Expression::Function(ref f) = a.this {
4851 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
4852 && !a.column_aliases.is_empty()
4853 {
4854 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
4855 let col_alias = a.column_aliases[0].clone();
4856 let mut inner_select = crate::expressions::Select::new();
4857 inner_select.expressions =
4858 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
4859 Expression::Identifier(Identifier::new("value".to_string())),
4860 col_alias,
4861 )))];
4862 inner_select.from = Some(crate::expressions::From {
4863 expressions: vec![a.this.clone()],
4864 });
4865 let subquery =
4866 Expression::Subquery(Box::new(crate::expressions::Subquery {
4867 this: Expression::Select(Box::new(inner_select)),
4868 alias: Some(a.alias.clone()),
4869 column_aliases: Vec::new(),
4870 order_by: None,
4871 limit: None,
4872 offset: None,
4873 lateral: false,
4874 modifiers_inside: false,
4875 trailing_comments: Vec::new(),
4876 distribute_by: None,
4877 sort_by: None,
4878 cluster_by: None,
4879 inferred_type: None,
4880 }));
4881 return Ok(subquery);
4882 }
4883 }
4884 }
4885 }
4886
4887 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
4888 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
4889 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
4890 if matches!(source, DialectType::BigQuery) {
4891 if let Expression::Select(ref s) = e {
4892 if let Some(ref from) = s.from {
4893 if from.expressions.len() >= 2 {
4894 // Collect table names from first expression
4895 let first_tables: Vec<String> = from
4896 .expressions
4897 .iter()
4898 .take(1)
4899 .filter_map(|expr| {
4900 if let Expression::Table(t) = expr {
4901 Some(t.name.name.to_ascii_lowercase())
4902 } else {
4903 None
4904 }
4905 })
4906 .collect();
4907
4908 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
4909 // or have a dotted name matching a table
4910 let mut needs_rewrite = false;
4911 for expr in from.expressions.iter().skip(1) {
4912 if let Expression::Table(t) = expr {
4913 if let Some(ref schema) = t.schema {
4914 if first_tables.contains(&schema.name.to_ascii_lowercase()) {
4915 needs_rewrite = true;
4916 break;
4917 }
4918 }
4919 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
4920 if t.schema.is_none() && t.name.name.contains('.') {
4921 let parts: Vec<&str> = t.name.name.split('.').collect();
4922 if parts.len() >= 2
4923 && first_tables.contains(&parts[0].to_ascii_lowercase())
4924 {
4925 needs_rewrite = true;
4926 break;
4927 }
4928 }
4929 }
4930 }
4931
4932 if needs_rewrite {
4933 let mut new_select = s.clone();
4934 let mut new_from_exprs = vec![from.expressions[0].clone()];
4935 let mut new_joins = s.joins.clone();
4936
4937 for expr in from.expressions.iter().skip(1) {
4938 if let Expression::Table(ref t) = expr {
4939 if let Some(ref schema) = t.schema {
4940 if first_tables.contains(&schema.name.to_ascii_lowercase()) {
4941 // This is an array path reference, convert to CROSS JOIN UNNEST
4942 let col_expr = Expression::Column(
4943 Box::new(crate::expressions::Column {
4944 name: t.name.clone(),
4945 table: Some(schema.clone()),
4946 join_mark: false,
4947 trailing_comments: vec![],
4948 span: None,
4949 inferred_type: None,
4950 }),
4951 );
4952 let unnest_expr = Expression::Unnest(Box::new(
4953 crate::expressions::UnnestFunc {
4954 this: col_expr,
4955 expressions: Vec::new(),
4956 with_ordinality: false,
4957 alias: None,
4958 offset_alias: None,
4959 },
4960 ));
4961 let join_this = if let Some(ref alias) = t.alias {
4962 if matches!(
4963 target,
4964 DialectType::Presto
4965 | DialectType::Trino
4966 | DialectType::Athena
4967 ) {
4968 // Presto: UNNEST(x) AS _t0(results)
4969 Expression::Alias(Box::new(
4970 crate::expressions::Alias {
4971 this: unnest_expr,
4972 alias: Identifier::new("_t0"),
4973 column_aliases: vec![alias.clone()],
4974 pre_alias_comments: vec![],
4975 trailing_comments: vec![],
4976 inferred_type: None,
4977 },
4978 ))
4979 } else {
4980 // BigQuery: UNNEST(x) AS results
4981 Expression::Alias(Box::new(
4982 crate::expressions::Alias {
4983 this: unnest_expr,
4984 alias: alias.clone(),
4985 column_aliases: vec![],
4986 pre_alias_comments: vec![],
4987 trailing_comments: vec![],
4988 inferred_type: None,
4989 },
4990 ))
4991 }
4992 } else {
4993 unnest_expr
4994 };
4995 new_joins.push(crate::expressions::Join {
4996 kind: crate::expressions::JoinKind::Cross,
4997 this: join_this,
4998 on: None,
4999 using: Vec::new(),
5000 use_inner_keyword: false,
5001 use_outer_keyword: false,
5002 deferred_condition: false,
5003 join_hint: None,
5004 match_condition: None,
5005 pivots: Vec::new(),
5006 comments: Vec::new(),
5007 nesting_group: 0,
5008 directed: false,
5009 });
5010 } else {
5011 new_from_exprs.push(expr.clone());
5012 }
5013 } else if t.schema.is_none() && t.name.name.contains('.') {
5014 // Dotted name in quoted identifier: `Coordinates.position`
5015 let parts: Vec<&str> = t.name.name.split('.').collect();
5016 if parts.len() >= 2
5017 && first_tables.contains(&parts[0].to_ascii_lowercase())
5018 {
5019 let join_this =
5020 if matches!(target, DialectType::BigQuery) {
5021 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
5022 Expression::Table(t.clone())
5023 } else {
5024 // Other targets: split into "schema"."name"
5025 let mut new_t = t.clone();
5026 new_t.schema =
5027 Some(Identifier::quoted(parts[0]));
5028 new_t.name = Identifier::quoted(parts[1]);
5029 Expression::Table(new_t)
5030 };
5031 new_joins.push(crate::expressions::Join {
5032 kind: crate::expressions::JoinKind::Cross,
5033 this: join_this,
5034 on: None,
5035 using: Vec::new(),
5036 use_inner_keyword: false,
5037 use_outer_keyword: false,
5038 deferred_condition: false,
5039 join_hint: None,
5040 match_condition: None,
5041 pivots: Vec::new(),
5042 comments: Vec::new(),
5043 nesting_group: 0,
5044 directed: false,
5045 });
5046 } else {
5047 new_from_exprs.push(expr.clone());
5048 }
5049 } else {
5050 new_from_exprs.push(expr.clone());
5051 }
5052 } else {
5053 new_from_exprs.push(expr.clone());
5054 }
5055 }
5056
5057 new_select.from = Some(crate::expressions::From {
5058 expressions: new_from_exprs,
5059 ..from.clone()
5060 });
5061 new_select.joins = new_joins;
5062 return Ok(Expression::Select(new_select));
5063 }
5064 }
5065 }
5066 }
5067 }
5068
5069 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
5070 if matches!(
5071 target,
5072 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5073 ) {
5074 if let Expression::Select(ref s) = e {
5075 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
5076 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
5077 matches!(expr, Expression::Unnest(_))
5078 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
5079 };
5080 let has_unnest_join = s.joins.iter().any(|j| {
5081 j.kind == crate::expressions::JoinKind::Cross && (
5082 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
5083 || is_unnest_or_explode_expr(&j.this)
5084 )
5085 });
5086 if has_unnest_join {
5087 let mut select = s.clone();
5088 let mut new_joins = Vec::new();
5089 for join in select.joins.drain(..) {
5090 if join.kind == crate::expressions::JoinKind::Cross {
5091 // Extract the UNNEST/EXPLODE from the join
5092 let (func_expr, table_alias, col_aliases) = match &join.this {
5093 Expression::Alias(a) => {
5094 let ta = if a.alias.is_empty() {
5095 None
5096 } else {
5097 Some(a.alias.clone())
5098 };
5099 let cas = a.column_aliases.clone();
5100 match &a.this {
5101 Expression::Unnest(u) => {
5102 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
5103 if !u.expressions.is_empty() {
5104 let mut all_args = vec![u.this.clone()];
5105 all_args.extend(u.expressions.clone());
5106 let arrays_zip =
5107 Expression::Function(Box::new(
5108 crate::expressions::Function::new(
5109 "ARRAYS_ZIP".to_string(),
5110 all_args,
5111 ),
5112 ));
5113 let inline = Expression::Function(Box::new(
5114 crate::expressions::Function::new(
5115 "INLINE".to_string(),
5116 vec![arrays_zip],
5117 ),
5118 ));
5119 (Some(inline), ta, a.column_aliases.clone())
5120 } else {
5121 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
5122 let func_name = if u.with_ordinality {
5123 "POSEXPLODE"
5124 } else {
5125 "EXPLODE"
5126 };
5127 let explode = Expression::Function(Box::new(
5128 crate::expressions::Function::new(
5129 func_name.to_string(),
5130 vec![u.this.clone()],
5131 ),
5132 ));
5133 // For POSEXPLODE, add 'pos' to column aliases
5134 let cas = if u.with_ordinality {
5135 let mut pos_aliases =
5136 vec![Identifier::new(
5137 "pos".to_string(),
5138 )];
5139 pos_aliases
5140 .extend(a.column_aliases.clone());
5141 pos_aliases
5142 } else {
5143 a.column_aliases.clone()
5144 };
5145 (Some(explode), ta, cas)
5146 }
5147 }
5148 Expression::Function(f)
5149 if f.name.eq_ignore_ascii_case("EXPLODE") =>
5150 {
5151 (Some(Expression::Function(f.clone())), ta, cas)
5152 }
5153 _ => (None, None, Vec::new()),
5154 }
5155 }
5156 Expression::Unnest(u) => {
5157 let func_name = if u.with_ordinality {
5158 "POSEXPLODE"
5159 } else {
5160 "EXPLODE"
5161 };
5162 let explode = Expression::Function(Box::new(
5163 crate::expressions::Function::new(
5164 func_name.to_string(),
5165 vec![u.this.clone()],
5166 ),
5167 ));
5168 let ta = u.alias.clone();
5169 let col_aliases = if u.with_ordinality {
5170 vec![Identifier::new("pos".to_string())]
5171 } else {
5172 Vec::new()
5173 };
5174 (Some(explode), ta, col_aliases)
5175 }
5176 _ => (None, None, Vec::new()),
5177 };
5178 if let Some(func) = func_expr {
5179 select.lateral_views.push(crate::expressions::LateralView {
5180 this: func,
5181 table_alias,
5182 column_aliases: col_aliases,
5183 outer: false,
5184 });
5185 } else {
5186 new_joins.push(join);
5187 }
5188 } else {
5189 new_joins.push(join);
5190 }
5191 }
5192 select.joins = new_joins;
5193 return Ok(Expression::Select(select));
5194 }
5195 }
5196 }
5197
5198 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
5199 // for BigQuery, Presto/Trino, Snowflake
5200 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
5201 && matches!(
5202 target,
5203 DialectType::BigQuery
5204 | DialectType::Presto
5205 | DialectType::Trino
5206 | DialectType::Snowflake
5207 )
5208 {
5209 if let Expression::Select(ref s) = e {
5210 // Check if any SELECT expressions contain UNNEST
5211 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
5212 let has_unnest_in_select = s.expressions.iter().any(|expr| {
5213 fn contains_unnest(e: &Expression) -> bool {
5214 match e {
5215 Expression::Unnest(_) => true,
5216 Expression::Function(f)
5217 if f.name.eq_ignore_ascii_case("UNNEST") =>
5218 {
5219 true
5220 }
5221 Expression::Alias(a) => contains_unnest(&a.this),
5222 Expression::Add(op)
5223 | Expression::Sub(op)
5224 | Expression::Mul(op)
5225 | Expression::Div(op) => {
5226 contains_unnest(&op.left) || contains_unnest(&op.right)
5227 }
5228 _ => false,
5229 }
5230 }
5231 contains_unnest(expr)
5232 });
5233
5234 if has_unnest_in_select {
5235 let rewritten = Self::rewrite_unnest_expansion(s, target);
5236 if let Some(new_select) = rewritten {
5237 return Ok(Expression::Select(Box::new(new_select)));
5238 }
5239 }
5240 }
5241 }
5242
5243 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
5244 // BigQuery '\n' -> PostgreSQL literal newline in string
5245 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
5246 {
5247 if let Expression::Literal(Literal::String(ref s)) = e {
5248 if s.contains("\\n")
5249 || s.contains("\\t")
5250 || s.contains("\\r")
5251 || s.contains("\\\\")
5252 {
5253 let converted = s
5254 .replace("\\n", "\n")
5255 .replace("\\t", "\t")
5256 .replace("\\r", "\r")
5257 .replace("\\\\", "\\");
5258 return Ok(Expression::Literal(Literal::String(converted)));
5259 }
5260 }
5261 }
5262
5263 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
5264 // when source != target (identity tests keep the Literal::Timestamp for native handling)
5265 if source != target {
5266 if let Expression::Literal(Literal::Timestamp(ref s)) = e {
5267 let s = s.clone();
5268 // MySQL: TIMESTAMP handling depends on source dialect
5269 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
5270 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
5271 if matches!(target, DialectType::MySQL) {
5272 if matches!(source, DialectType::BigQuery) {
5273 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
5274 return Ok(Expression::Function(Box::new(Function::new(
5275 "TIMESTAMP".to_string(),
5276 vec![Expression::Literal(Literal::String(s))],
5277 ))));
5278 } else {
5279 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
5280 return Ok(Expression::Cast(Box::new(Cast {
5281 this: Expression::Literal(Literal::String(s)),
5282 to: DataType::Custom {
5283 name: "DATETIME".to_string(),
5284 },
5285 trailing_comments: Vec::new(),
5286 double_colon_syntax: false,
5287 format: None,
5288 default: None,
5289 inferred_type: None,
5290 })));
5291 }
5292 }
5293 let dt = match target {
5294 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
5295 name: "DATETIME".to_string(),
5296 },
5297 DialectType::Snowflake => {
5298 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
5299 if matches!(source, DialectType::BigQuery) {
5300 DataType::Custom {
5301 name: "TIMESTAMPTZ".to_string(),
5302 }
5303 } else if matches!(
5304 source,
5305 DialectType::PostgreSQL
5306 | DialectType::Redshift
5307 | DialectType::Snowflake
5308 ) {
5309 DataType::Timestamp {
5310 precision: None,
5311 timezone: false,
5312 }
5313 } else {
5314 DataType::Custom {
5315 name: "TIMESTAMPNTZ".to_string(),
5316 }
5317 }
5318 }
5319 DialectType::Spark | DialectType::Databricks => {
5320 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
5321 if matches!(source, DialectType::BigQuery) {
5322 DataType::Timestamp {
5323 precision: None,
5324 timezone: false,
5325 }
5326 } else {
5327 DataType::Custom {
5328 name: "TIMESTAMP_NTZ".to_string(),
5329 }
5330 }
5331 }
5332 DialectType::ClickHouse => DataType::Nullable {
5333 inner: Box::new(DataType::Custom {
5334 name: "DateTime".to_string(),
5335 }),
5336 },
5337 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
5338 name: "DATETIME2".to_string(),
5339 },
5340 DialectType::DuckDB => {
5341 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
5342 // or when the timestamp string explicitly has timezone info
5343 if matches!(source, DialectType::BigQuery)
5344 || Self::timestamp_string_has_timezone(&s)
5345 {
5346 DataType::Custom {
5347 name: "TIMESTAMPTZ".to_string(),
5348 }
5349 } else {
5350 DataType::Timestamp {
5351 precision: None,
5352 timezone: false,
5353 }
5354 }
5355 }
5356 _ => DataType::Timestamp {
5357 precision: None,
5358 timezone: false,
5359 },
5360 };
5361 return Ok(Expression::Cast(Box::new(Cast {
5362 this: Expression::Literal(Literal::String(s)),
5363 to: dt,
5364 trailing_comments: vec![],
5365 double_colon_syntax: false,
5366 format: None,
5367 default: None,
5368 inferred_type: None,
5369 })));
5370 }
5371 }
5372
5373 // PostgreSQL DELETE requires explicit AS for table aliases
5374 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
5375 if let Expression::Delete(ref del) = e {
5376 if del.alias.is_some() && !del.alias_explicit_as {
5377 let mut new_del = del.clone();
5378 new_del.alias_explicit_as = true;
5379 return Ok(Expression::Delete(new_del));
5380 }
5381 }
5382 }
5383
5384 // UNION/INTERSECT/EXCEPT DISTINCT handling:
5385 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
5386 // while others don't support it (Presto, Spark, DuckDB, etc.)
5387 {
5388 let needs_distinct =
5389 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
5390 let drop_distinct = matches!(
5391 target,
5392 DialectType::Presto
5393 | DialectType::Trino
5394 | DialectType::Athena
5395 | DialectType::Spark
5396 | DialectType::Databricks
5397 | DialectType::DuckDB
5398 | DialectType::Hive
5399 | DialectType::MySQL
5400 | DialectType::PostgreSQL
5401 | DialectType::SQLite
5402 | DialectType::TSQL
5403 | DialectType::Redshift
5404 | DialectType::Snowflake
5405 | DialectType::Oracle
5406 | DialectType::Teradata
5407 | DialectType::Drill
5408 | DialectType::Doris
5409 | DialectType::StarRocks
5410 );
5411 match &e {
5412 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
5413 let mut new_u = (**u).clone();
5414 new_u.distinct = true;
5415 return Ok(Expression::Union(Box::new(new_u)));
5416 }
5417 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
5418 let mut new_i = (**i).clone();
5419 new_i.distinct = true;
5420 return Ok(Expression::Intersect(Box::new(new_i)));
5421 }
5422 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
5423 let mut new_ex = (**ex).clone();
5424 new_ex.distinct = true;
5425 return Ok(Expression::Except(Box::new(new_ex)));
5426 }
5427 Expression::Union(u) if u.distinct && drop_distinct => {
5428 let mut new_u = (**u).clone();
5429 new_u.distinct = false;
5430 return Ok(Expression::Union(Box::new(new_u)));
5431 }
5432 Expression::Intersect(i) if i.distinct && drop_distinct => {
5433 let mut new_i = (**i).clone();
5434 new_i.distinct = false;
5435 return Ok(Expression::Intersect(Box::new(new_i)));
5436 }
5437 Expression::Except(ex) if ex.distinct && drop_distinct => {
5438 let mut new_ex = (**ex).clone();
5439 new_ex.distinct = false;
5440 return Ok(Expression::Except(Box::new(new_ex)));
5441 }
5442 _ => {}
5443 }
5444 }
5445
5446 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
5447 if matches!(target, DialectType::ClickHouse) {
5448 if let Expression::Function(ref f) = e {
5449 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
5450 let mut new_f = f.as_ref().clone();
5451 new_f.name = "map".to_string();
5452 return Ok(Expression::Function(Box::new(new_f)));
5453 }
5454 }
5455 }
5456
5457 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
5458 if matches!(target, DialectType::ClickHouse) {
5459 if let Expression::Intersect(ref i) = e {
5460 if i.all {
5461 let mut new_i = (**i).clone();
5462 new_i.all = false;
5463 return Ok(Expression::Intersect(Box::new(new_i)));
5464 }
5465 }
5466 }
5467
5468 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
5469 // Only from Generic source, to prevent double-wrapping
5470 if matches!(source, DialectType::Generic) {
5471 if let Expression::Div(ref op) = e {
5472 let cast_type = match target {
5473 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
5474 precision: None,
5475 scale: None,
5476 real_spelling: false,
5477 }),
5478 DialectType::Drill
5479 | DialectType::Trino
5480 | DialectType::Athena
5481 | DialectType::Presto => Some(DataType::Double {
5482 precision: None,
5483 scale: None,
5484 }),
5485 DialectType::PostgreSQL
5486 | DialectType::Redshift
5487 | DialectType::Materialize
5488 | DialectType::Teradata
5489 | DialectType::RisingWave => Some(DataType::Double {
5490 precision: None,
5491 scale: None,
5492 }),
5493 _ => None,
5494 };
5495 if let Some(dt) = cast_type {
5496 let cast_left = Expression::Cast(Box::new(Cast {
5497 this: op.left.clone(),
5498 to: dt,
5499 double_colon_syntax: false,
5500 trailing_comments: Vec::new(),
5501 format: None,
5502 default: None,
5503 inferred_type: None,
5504 }));
5505 let new_op = crate::expressions::BinaryOp {
5506 left: cast_left,
5507 right: op.right.clone(),
5508 left_comments: op.left_comments.clone(),
5509 operator_comments: op.operator_comments.clone(),
5510 trailing_comments: op.trailing_comments.clone(),
5511 inferred_type: None,
5512 };
5513 return Ok(Expression::Div(Box::new(new_op)));
5514 }
5515 }
5516 }
5517
5518 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
5519 if matches!(target, DialectType::DuckDB) {
5520 if let Expression::CreateDatabase(db) = e {
5521 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
5522 schema.if_not_exists = db.if_not_exists;
5523 return Ok(Expression::CreateSchema(Box::new(schema)));
5524 }
5525 if let Expression::DropDatabase(db) = e {
5526 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
5527 schema.if_exists = db.if_exists;
5528 return Ok(Expression::DropSchema(Box::new(schema)));
5529 }
5530 }
5531
5532 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
5533 if matches!(source, DialectType::ClickHouse)
5534 && !matches!(target, DialectType::ClickHouse)
5535 {
5536 if let Expression::Cast(ref c) = e {
5537 if let DataType::Custom { ref name } = c.to {
5538 if name.len() >= 9 && name[..9].eq_ignore_ascii_case("NULLABLE(") && name.ends_with(")") {
5539 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
5540 let inner_upper = inner.to_ascii_uppercase();
5541 let new_dt = match inner_upper.as_str() {
5542 "DATETIME" | "DATETIME64" => DataType::Timestamp {
5543 precision: None,
5544 timezone: false,
5545 },
5546 "DATE" => DataType::Date,
5547 "INT64" | "BIGINT" => DataType::BigInt { length: None },
5548 "INT32" | "INT" | "INTEGER" => DataType::Int {
5549 length: None,
5550 integer_spelling: false,
5551 },
5552 "FLOAT64" | "DOUBLE" => DataType::Double {
5553 precision: None,
5554 scale: None,
5555 },
5556 "STRING" => DataType::Text,
5557 _ => DataType::Custom {
5558 name: inner.to_string(),
5559 },
5560 };
5561 let mut new_cast = c.clone();
5562 new_cast.to = new_dt;
5563 return Ok(Expression::Cast(new_cast));
5564 }
5565 }
5566 }
5567 }
5568
5569 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
5570 if matches!(target, DialectType::Snowflake) {
5571 if let Expression::ArrayConcatAgg(ref agg) = e {
5572 let mut agg_clone = agg.as_ref().clone();
5573 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
5574 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
5575 let flatten = Expression::Function(Box::new(Function::new(
5576 "ARRAY_FLATTEN".to_string(),
5577 vec![array_agg],
5578 )));
5579 return Ok(flatten);
5580 }
5581 }
5582
5583 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
5584 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
5585 if let Expression::ArrayConcatAgg(agg) = e {
5586 let arg = agg.this;
5587 return Ok(Expression::Function(Box::new(Function::new(
5588 "ARRAY_CONCAT_AGG".to_string(),
5589 vec![arg],
5590 ))));
5591 }
5592 }
5593
5594 // Determine what action to take by inspecting e immutably
5595 let action = {
5596 let source_propagates_nulls =
5597 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
5598 let target_ignores_nulls =
5599 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
5600
5601 match &e {
5602 Expression::Function(f) => {
5603 let name = f.name.to_ascii_uppercase();
5604 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
5605 if (name == "DATE_PART" || name == "DATEPART")
5606 && f.args.len() == 2
5607 && matches!(target, DialectType::Snowflake)
5608 && !matches!(source, DialectType::Snowflake)
5609 && matches!(
5610 &f.args[0],
5611 Expression::Literal(crate::expressions::Literal::String(_))
5612 )
5613 {
5614 Action::DatePartUnquote
5615 } else if source_propagates_nulls
5616 && target_ignores_nulls
5617 && (name == "GREATEST" || name == "LEAST")
5618 && f.args.len() >= 2
5619 {
5620 Action::GreatestLeastNull
5621 } else if matches!(source, DialectType::Snowflake)
5622 && name == "ARRAY_GENERATE_RANGE"
5623 && f.args.len() >= 2
5624 {
5625 Action::ArrayGenerateRange
5626 } else if matches!(source, DialectType::Snowflake)
5627 && matches!(target, DialectType::DuckDB)
5628 && name == "DATE_TRUNC"
5629 && f.args.len() == 2
5630 {
5631 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
5632 // Logic based on Python sqlglot's input_type_preserved flag:
5633 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
5634 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
5635 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
5636 let unit_str = match &f.args[0] {
5637 Expression::Literal(crate::expressions::Literal::String(s)) => {
5638 Some(s.to_ascii_uppercase())
5639 }
5640 _ => None,
5641 };
5642 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
5643 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
5644 });
5645 match &f.args[1] {
5646 Expression::Cast(c) => match &c.to {
5647 DataType::Time { .. } => Action::DateTruncWrapCast,
5648 DataType::Custom { name }
5649 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
5650 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
5651 {
5652 Action::DateTruncWrapCast
5653 }
5654 DataType::Timestamp { timezone: true, .. } => {
5655 Action::DateTruncWrapCast
5656 }
5657 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
5658 DataType::Timestamp {
5659 timezone: false, ..
5660 } if is_date_unit => Action::DateTruncWrapCast,
5661 _ => Action::None,
5662 },
5663 _ => Action::None,
5664 }
5665 } else if matches!(source, DialectType::Snowflake)
5666 && matches!(target, DialectType::DuckDB)
5667 && name == "TO_DATE"
5668 && f.args.len() == 1
5669 && !matches!(
5670 &f.args[0],
5671 Expression::Literal(crate::expressions::Literal::String(_))
5672 )
5673 {
5674 Action::ToDateToCast
5675 } else if !matches!(source, DialectType::Redshift)
5676 && matches!(target, DialectType::Redshift)
5677 && name == "CONVERT_TIMEZONE"
5678 && (f.args.len() == 2 || f.args.len() == 3)
5679 {
5680 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
5681 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
5682 // The Redshift parser adds 'UTC' as default source_tz, but when
5683 // transpiling from other dialects, we should preserve the original form.
5684 Action::ConvertTimezoneToExpr
5685 } else if matches!(source, DialectType::Snowflake)
5686 && matches!(target, DialectType::DuckDB)
5687 && name == "REGEXP_REPLACE"
5688 && f.args.len() == 4
5689 && !matches!(
5690 &f.args[3],
5691 Expression::Literal(crate::expressions::Literal::String(_))
5692 )
5693 {
5694 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
5695 Action::RegexpReplaceSnowflakeToDuckDB
5696 } else if matches!(source, DialectType::Snowflake)
5697 && matches!(target, DialectType::DuckDB)
5698 && name == "REGEXP_REPLACE"
5699 && f.args.len() == 5
5700 {
5701 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB
5702 Action::RegexpReplacePositionSnowflakeToDuckDB
5703 } else if matches!(source, DialectType::Snowflake)
5704 && matches!(target, DialectType::DuckDB)
5705 && name == "REGEXP_SUBSTR"
5706 {
5707 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
5708 Action::RegexpSubstrSnowflakeToDuckDB
5709 } else if matches!(source, DialectType::Snowflake)
5710 && matches!(target, DialectType::Snowflake)
5711 && (name == "REGEXP_SUBSTR" || name == "REGEXP_SUBSTR_ALL")
5712 && f.args.len() == 6
5713 {
5714 // Snowflake identity: strip trailing group=0
5715 Action::RegexpSubstrSnowflakeIdentity
5716 } else if matches!(source, DialectType::Snowflake)
5717 && matches!(target, DialectType::DuckDB)
5718 && name == "REGEXP_SUBSTR_ALL"
5719 {
5720 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
5721 Action::RegexpSubstrAllSnowflakeToDuckDB
5722 } else if matches!(source, DialectType::Snowflake)
5723 && matches!(target, DialectType::DuckDB)
5724 && name == "REGEXP_COUNT"
5725 {
5726 // Snowflake REGEXP_COUNT -> DuckDB LENGTH(REGEXP_EXTRACT_ALL(...))
5727 Action::RegexpCountSnowflakeToDuckDB
5728 } else if matches!(source, DialectType::Snowflake)
5729 && matches!(target, DialectType::DuckDB)
5730 && name == "REGEXP_INSTR"
5731 {
5732 // Snowflake REGEXP_INSTR -> DuckDB complex CASE expression
5733 Action::RegexpInstrSnowflakeToDuckDB
5734 } else if matches!(source, DialectType::BigQuery)
5735 && matches!(target, DialectType::Snowflake)
5736 && name == "REGEXP_EXTRACT_ALL"
5737 {
5738 // BigQuery REGEXP_EXTRACT_ALL -> Snowflake REGEXP_SUBSTR_ALL
5739 Action::RegexpExtractAllToSnowflake
5740 } else if name == "_BQ_TO_HEX" {
5741 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
5742 Action::BigQueryToHexBare
5743 } else if matches!(source, DialectType::BigQuery)
5744 && !matches!(target, DialectType::BigQuery)
5745 {
5746 // BigQuery-specific functions that need to be converted to standard forms
5747 match name.as_str() {
5748 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
5749 | "DATE_DIFF"
5750 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
5751 | "DATETIME_ADD" | "DATETIME_SUB"
5752 | "TIME_ADD" | "TIME_SUB"
5753 | "DATE_ADD" | "DATE_SUB"
5754 | "SAFE_DIVIDE"
5755 | "GENERATE_UUID"
5756 | "COUNTIF"
5757 | "EDIT_DISTANCE"
5758 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
5759 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
5760 | "TO_HEX"
5761 | "TO_JSON_STRING"
5762 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
5763 | "DIV"
5764 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
5765 | "LAST_DAY"
5766 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
5767 | "REGEXP_CONTAINS"
5768 | "CONTAINS_SUBSTR"
5769 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
5770 | "SAFE_CAST"
5771 | "GENERATE_DATE_ARRAY"
5772 | "PARSE_DATE" | "PARSE_TIMESTAMP"
5773 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
5774 | "ARRAY_CONCAT"
5775 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
5776 | "INSTR"
5777 | "MD5" | "SHA1" | "SHA256" | "SHA512"
5778 | "GENERATE_UUID()" // just in case
5779 | "REGEXP_EXTRACT_ALL"
5780 | "REGEXP_EXTRACT"
5781 | "INT64"
5782 | "ARRAY_CONCAT_AGG"
5783 | "DATE_DIFF(" // just in case
5784 | "TO_HEX_MD5" // internal
5785 | "MOD"
5786 | "CONCAT"
5787 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
5788 | "STRUCT"
5789 | "ROUND"
5790 | "MAKE_INTERVAL"
5791 | "ARRAY_TO_STRING"
5792 | "PERCENTILE_CONT"
5793 => Action::BigQueryFunctionNormalize,
5794 "ARRAY" if matches!(target, DialectType::Snowflake)
5795 && f.args.len() == 1
5796 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
5797 => Action::BigQueryArraySelectAsStructToSnowflake,
5798 _ => Action::None,
5799 }
5800 } else if matches!(source, DialectType::BigQuery)
5801 && matches!(target, DialectType::BigQuery)
5802 {
5803 // BigQuery -> BigQuery normalizations
5804 match name.as_str() {
5805 "TIMESTAMP_DIFF"
5806 | "DATETIME_DIFF"
5807 | "TIME_DIFF"
5808 | "DATE_DIFF"
5809 | "DATE_ADD"
5810 | "TO_HEX"
5811 | "CURRENT_TIMESTAMP"
5812 | "CURRENT_DATE"
5813 | "CURRENT_TIME"
5814 | "CURRENT_DATETIME"
5815 | "GENERATE_DATE_ARRAY"
5816 | "INSTR"
5817 | "FORMAT_DATETIME"
5818 | "DATETIME"
5819 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
5820 _ => Action::None,
5821 }
5822 } else {
5823 // Generic function normalization for non-BigQuery sources
5824 match name.as_str() {
5825 "ARBITRARY" | "AGGREGATE"
5826 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
5827 | "STRUCT_EXTRACT"
5828 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
5829 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
5830 | "SUBSTRINGINDEX"
5831 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
5832 | "UNICODE"
5833 | "XOR"
5834 | "ARRAY_REVERSE_SORT"
5835 | "ENCODE" | "DECODE"
5836 | "QUANTILE"
5837 | "EPOCH" | "EPOCH_MS"
5838 | "HASHBYTES"
5839 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
5840 | "APPROX_DISTINCT"
5841 | "DATE_PARSE" | "FORMAT_DATETIME"
5842 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
5843 | "RLIKE"
5844 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
5845 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
5846 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
5847 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
5848 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
5849 | "MAP" | "MAP_FROM_ENTRIES"
5850 | "COLLECT_LIST" | "COLLECT_SET"
5851 | "ISNAN" | "IS_NAN"
5852 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
5853 | "FORMAT_NUMBER"
5854 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
5855 | "ELEMENT_AT"
5856 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
5857 | "SPLIT_PART"
5858 // GENERATE_SERIES: handled separately below
5859 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
5860 | "JSON_QUERY" | "JSON_VALUE"
5861 | "JSON_SEARCH"
5862 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
5863 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
5864 | "CURDATE" | "CURTIME"
5865 | "ARRAY_TO_STRING"
5866 | "ARRAY_SORT" | "SORT_ARRAY"
5867 | "LEFT" | "RIGHT"
5868 | "MAP_FROM_ARRAYS"
5869 | "LIKE" | "ILIKE"
5870 | "ARRAY_CONCAT" | "LIST_CONCAT"
5871 | "QUANTILE_CONT" | "QUANTILE_DISC"
5872 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
5873 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
5874 | "LOCATE" | "STRPOS" | "INSTR"
5875 | "CHAR"
5876 // CONCAT: handled separately for COALESCE wrapping
5877 | "ARRAY_JOIN"
5878 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
5879 | "ISNULL"
5880 | "MONTHNAME"
5881 | "TO_TIMESTAMP"
5882 | "TO_DATE"
5883 | "TO_JSON"
5884 | "REGEXP_SPLIT"
5885 | "SPLIT"
5886 | "FORMATDATETIME"
5887 | "ARRAYJOIN"
5888 | "SPLITBYSTRING" | "SPLITBYREGEXP"
5889 | "NVL"
5890 | "TO_CHAR"
5891 | "DBMS_RANDOM.VALUE"
5892 | "REGEXP_LIKE"
5893 | "REPLICATE"
5894 | "LEN"
5895 | "COUNT_BIG"
5896 | "DATEFROMPARTS"
5897 | "DATETIMEFROMPARTS"
5898 | "CONVERT" | "TRY_CONVERT"
5899 | "STRFTIME" | "STRPTIME"
5900 | "DATE_FORMAT" | "FORMAT_DATE"
5901 | "PARSE_TIMESTAMP" | "PARSE_DATE"
5902 | "FROM_BASE64" | "TO_BASE64"
5903 | "GETDATE"
5904 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
5905 | "TO_UTF8" | "FROM_UTF8"
5906 | "STARTS_WITH" | "STARTSWITH"
5907 | "APPROX_COUNT_DISTINCT"
5908 | "JSON_FORMAT"
5909 | "SYSDATE"
5910 | "LOGICAL_OR" | "LOGICAL_AND"
5911 | "MONTHS_ADD"
5912 | "SCHEMA_NAME"
5913 | "STRTOL"
5914 | "EDITDIST3"
5915 | "FORMAT"
5916 | "LIST_CONTAINS" | "LIST_HAS"
5917 | "VARIANCE" | "STDDEV"
5918 | "ISINF"
5919 | "TO_UNIXTIME"
5920 | "FROM_UNIXTIME"
5921 | "DATEPART" | "DATE_PART"
5922 | "DATENAME"
5923 | "STRING_AGG"
5924 | "JSON_ARRAYAGG"
5925 | "APPROX_QUANTILE"
5926 | "MAKE_DATE"
5927 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
5928 | "RANGE"
5929 | "TRY_ELEMENT_AT"
5930 | "STR_TO_MAP"
5931 | "STRING"
5932 | "STR_TO_TIME"
5933 | "CURRENT_SCHEMA"
5934 | "LTRIM" | "RTRIM"
5935 | "UUID"
5936 | "FARM_FINGERPRINT"
5937 | "JSON_KEYS"
5938 | "WEEKOFYEAR"
5939 | "CONCAT_WS"
5940 | "ARRAY_SLICE"
5941 | "ARRAY_PREPEND"
5942 | "ARRAY_REMOVE"
5943 | "GENERATE_DATE_ARRAY"
5944 | "PARSE_JSON"
5945 | "JSON_REMOVE"
5946 | "JSON_SET"
5947 | "LEVENSHTEIN"
5948 | "CURRENT_VERSION"
5949 | "ARRAY_MAX"
5950 | "ARRAY_MIN"
5951 | "JAROWINKLER_SIMILARITY"
5952 | "CURRENT_SCHEMAS"
5953 => Action::GenericFunctionNormalize,
5954 // Canonical date functions -> dialect-specific
5955 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
5956 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
5957 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
5958 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
5959 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
5960 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
5961 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
5962 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
5963 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
5964 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
5965 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
5966 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
5967 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
5968 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
5969 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
5970 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
5971 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
5972 // STR_TO_DATE(x, fmt) -> dialect-specific
5973 "STR_TO_DATE" if f.args.len() == 2
5974 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
5975 "STR_TO_DATE" => Action::GenericFunctionNormalize,
5976 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
5977 "TS_OR_DS_ADD" if f.args.len() == 3
5978 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
5979 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
5980 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
5981 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
5982 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
5983 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
5984 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
5985 // IS_ASCII(x) -> dialect-specific
5986 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
5987 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
5988 "STR_POSITION" => Action::StrPositionConvert,
5989 // ARRAY_SUM -> dialect-specific
5990 "ARRAY_SUM" => Action::ArraySumConvert,
5991 // ARRAY_SIZE -> dialect-specific (Drill only)
5992 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
5993 // ARRAY_ANY -> dialect-specific
5994 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
5995 // Functions needing specific cross-dialect transforms
5996 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
5997 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
5998 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
5999 "ARRAY" if matches!(source, DialectType::BigQuery)
6000 && matches!(target, DialectType::Snowflake)
6001 && f.args.len() == 1
6002 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
6003 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
6004 "TRUNC" if f.args.len() == 2 && matches!(&f.args[1], Expression::Literal(Literal::String(_))) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
6005 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 && !matches!(f.args.get(1), Some(Expression::Literal(Literal::String(_)))) => Action::GenericFunctionNormalize,
6006 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
6007 "DATE_TRUNC" if f.args.len() == 2
6008 && matches!(source, DialectType::Generic)
6009 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
6010 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
6011 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
6012 "TIMESTAMP_TRUNC" if f.args.len() >= 2
6013 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
6014 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
6015 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
6016 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6017 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
6018 // GENERATE_SERIES with interval normalization for PG target
6019 "GENERATE_SERIES" if f.args.len() >= 3
6020 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6021 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
6022 "GENERATE_SERIES" => Action::None, // passthrough for other cases
6023 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
6024 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6025 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
6026 "CONCAT" => Action::GenericFunctionNormalize,
6027 // DIV(a, b) -> target-specific integer division
6028 "DIV" if f.args.len() == 2
6029 && matches!(source, DialectType::PostgreSQL)
6030 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
6031 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
6032 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
6033 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
6034 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
6035 "JSONB_EXISTS" if f.args.len() == 2
6036 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
6037 // DATE_BIN -> TIME_BUCKET for DuckDB
6038 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
6039 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
6040 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
6041 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
6042 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
6043 // ClickHouse any -> ANY_VALUE for other dialects
6044 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
6045 _ => Action::None,
6046 }
6047 }
6048 }
6049 Expression::AggregateFunction(af) => {
6050 let name = af.name.to_ascii_uppercase();
6051 match name.as_str() {
6052 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
6053 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
6054 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
6055 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
6056 if matches!(target, DialectType::DuckDB) =>
6057 {
6058 Action::JsonObjectAggConvert
6059 }
6060 "ARRAY_AGG"
6061 if matches!(
6062 target,
6063 DialectType::Hive
6064 | DialectType::Spark
6065 | DialectType::Databricks
6066 ) =>
6067 {
6068 Action::ArrayAggToCollectList
6069 }
6070 "MAX_BY" | "MIN_BY"
6071 if matches!(
6072 target,
6073 DialectType::ClickHouse
6074 | DialectType::Spark
6075 | DialectType::Databricks
6076 | DialectType::DuckDB
6077 ) =>
6078 {
6079 Action::MaxByMinByConvert
6080 }
6081 "COLLECT_LIST"
6082 if matches!(
6083 target,
6084 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
6085 ) =>
6086 {
6087 Action::CollectListToArrayAgg
6088 }
6089 "COLLECT_SET"
6090 if matches!(
6091 target,
6092 DialectType::Presto
6093 | DialectType::Trino
6094 | DialectType::Snowflake
6095 | DialectType::DuckDB
6096 ) =>
6097 {
6098 Action::CollectSetConvert
6099 }
6100 "PERCENTILE"
6101 if matches!(
6102 target,
6103 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6104 ) =>
6105 {
6106 Action::PercentileConvert
6107 }
6108 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
6109 "CORR"
6110 if matches!(target, DialectType::DuckDB)
6111 && matches!(source, DialectType::Snowflake) =>
6112 {
6113 Action::CorrIsnanWrap
6114 }
6115 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6116 "APPROX_QUANTILES"
6117 if matches!(source, DialectType::BigQuery)
6118 && matches!(target, DialectType::DuckDB) =>
6119 {
6120 Action::BigQueryApproxQuantiles
6121 }
6122 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
6123 "PERCENTILE_CONT"
6124 if matches!(source, DialectType::BigQuery)
6125 && matches!(target, DialectType::DuckDB)
6126 && af.args.len() >= 2 =>
6127 {
6128 Action::BigQueryPercentileContToDuckDB
6129 }
6130 _ => Action::None,
6131 }
6132 }
6133 Expression::JSONArrayAgg(_) => match target {
6134 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
6135 _ => Action::None,
6136 },
6137 Expression::ToNumber(tn) => {
6138 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
6139 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
6140 match target {
6141 DialectType::Oracle
6142 | DialectType::Snowflake
6143 | DialectType::Teradata => Action::None,
6144 _ => Action::GenericFunctionNormalize,
6145 }
6146 } else {
6147 Action::None
6148 }
6149 }
6150 Expression::Nvl2(_) => {
6151 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
6152 // Keep as NVL2 for dialects that support it natively
6153 match target {
6154 DialectType::Oracle
6155 | DialectType::Snowflake
6156 | DialectType::Teradata
6157 | DialectType::Spark
6158 | DialectType::Databricks
6159 | DialectType::Redshift => Action::None,
6160 _ => Action::Nvl2Expand,
6161 }
6162 }
6163 Expression::Decode(_) | Expression::DecodeCase(_) => {
6164 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
6165 // Keep as DECODE for Oracle/Snowflake
6166 match target {
6167 DialectType::Oracle | DialectType::Snowflake => Action::None,
6168 _ => Action::DecodeSimplify,
6169 }
6170 }
6171 Expression::Coalesce(ref cf) => {
6172 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
6173 // BigQuery keeps IFNULL natively when source is also BigQuery
6174 if cf.original_name.as_deref() == Some("IFNULL")
6175 && !(matches!(source, DialectType::BigQuery)
6176 && matches!(target, DialectType::BigQuery))
6177 {
6178 Action::IfnullToCoalesce
6179 } else {
6180 Action::None
6181 }
6182 }
6183 Expression::IfFunc(if_func) => {
6184 if matches!(source, DialectType::Snowflake)
6185 && matches!(
6186 target,
6187 DialectType::Presto | DialectType::Trino | DialectType::SQLite
6188 )
6189 && matches!(if_func.false_value, Some(Expression::Div(_)))
6190 {
6191 Action::Div0TypedDivision
6192 } else {
6193 Action::None
6194 }
6195 }
6196 Expression::ToJson(_) => match target {
6197 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
6198 DialectType::BigQuery => Action::ToJsonConvert,
6199 DialectType::DuckDB => Action::ToJsonConvert,
6200 _ => Action::None,
6201 },
6202 Expression::ArrayAgg(ref agg) => {
6203 if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
6204 Action::ArrayAggToGroupConcat
6205 } else if matches!(
6206 target,
6207 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6208 ) {
6209 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
6210 Action::ArrayAggToCollectList
6211 } else if matches!(
6212 source,
6213 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6214 ) && matches!(target, DialectType::DuckDB)
6215 && agg.filter.is_some()
6216 {
6217 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
6218 // Need to add NOT x IS NULL to existing filter
6219 Action::ArrayAggNullFilter
6220 } else if matches!(target, DialectType::DuckDB)
6221 && agg.ignore_nulls == Some(true)
6222 && !agg.order_by.is_empty()
6223 {
6224 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
6225 Action::ArrayAggIgnoreNullsDuckDB
6226 } else if !matches!(source, DialectType::Snowflake) {
6227 Action::None
6228 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6229 let is_array_agg = agg.name.as_deref().map_or(false, |n| n.eq_ignore_ascii_case("ARRAY_AGG"))
6230 || agg.name.is_none();
6231 if is_array_agg {
6232 Action::ArrayAggCollectList
6233 } else {
6234 Action::None
6235 }
6236 } else if matches!(
6237 target,
6238 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6239 ) && agg.filter.is_none()
6240 {
6241 Action::ArrayAggFilter
6242 } else {
6243 Action::None
6244 }
6245 }
6246 Expression::WithinGroup(wg) => {
6247 if matches!(source, DialectType::Snowflake)
6248 && matches!(
6249 target,
6250 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6251 )
6252 && matches!(wg.this, Expression::ArrayAgg(_))
6253 {
6254 Action::ArrayAggWithinGroupFilter
6255 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
6256 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
6257 || matches!(&wg.this, Expression::StringAgg(_))
6258 {
6259 Action::StringAggConvert
6260 } else if matches!(
6261 target,
6262 DialectType::Presto
6263 | DialectType::Trino
6264 | DialectType::Athena
6265 | DialectType::Spark
6266 | DialectType::Databricks
6267 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
6268 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
6269 || matches!(&wg.this, Expression::PercentileCont(_)))
6270 {
6271 Action::PercentileContConvert
6272 } else {
6273 Action::None
6274 }
6275 }
6276 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
6277 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
6278 // DATETIME is the timezone-unaware type
6279 Expression::Cast(ref c) => {
6280 if c.format.is_some()
6281 && (matches!(source, DialectType::BigQuery)
6282 || matches!(source, DialectType::Teradata))
6283 {
6284 Action::BigQueryCastFormat
6285 } else if matches!(target, DialectType::BigQuery)
6286 && !matches!(source, DialectType::BigQuery)
6287 && matches!(
6288 c.to,
6289 DataType::Timestamp {
6290 timezone: false,
6291 ..
6292 }
6293 )
6294 {
6295 Action::CastTimestampToDatetime
6296 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
6297 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
6298 && matches!(
6299 c.to,
6300 DataType::Timestamp {
6301 timezone: false,
6302 ..
6303 }
6304 )
6305 {
6306 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
6307 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
6308 Action::CastTimestampToDatetime
6309 } else if matches!(
6310 source,
6311 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6312 ) && matches!(
6313 target,
6314 DialectType::Presto
6315 | DialectType::Trino
6316 | DialectType::Athena
6317 | DialectType::DuckDB
6318 | DialectType::Snowflake
6319 | DialectType::BigQuery
6320 | DialectType::Databricks
6321 | DialectType::TSQL
6322 ) {
6323 Action::HiveCastToTryCast
6324 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6325 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
6326 {
6327 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
6328 Action::CastTimestamptzToFunc
6329 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6330 && matches!(
6331 target,
6332 DialectType::Hive
6333 | DialectType::Spark
6334 | DialectType::Databricks
6335 | DialectType::BigQuery
6336 )
6337 {
6338 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
6339 Action::CastTimestampStripTz
6340 } else if matches!(&c.to, DataType::Json)
6341 && matches!(&c.this, Expression::Literal(Literal::String(_)))
6342 && matches!(
6343 target,
6344 DialectType::Presto
6345 | DialectType::Trino
6346 | DialectType::Athena
6347 | DialectType::Snowflake
6348 )
6349 {
6350 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
6351 // Only when the input is a string literal (JSON 'value' syntax)
6352 Action::JsonLiteralToJsonParse
6353 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
6354 && matches!(target, DialectType::Spark | DialectType::Databricks)
6355 {
6356 // CAST(x AS JSON) -> TO_JSON(x) for Spark
6357 Action::CastToJsonForSpark
6358 } else if (matches!(
6359 &c.to,
6360 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
6361 )) && matches!(
6362 target,
6363 DialectType::Spark | DialectType::Databricks
6364 ) && (matches!(&c.this, Expression::ParseJson(_))
6365 || matches!(
6366 &c.this,
6367 Expression::Function(f)
6368 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
6369 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
6370 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
6371 ))
6372 {
6373 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
6374 // -> FROM_JSON(..., type_string) for Spark
6375 Action::CastJsonToFromJson
6376 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6377 && matches!(
6378 c.to,
6379 DataType::Timestamp {
6380 timezone: false,
6381 ..
6382 }
6383 )
6384 && matches!(source, DialectType::DuckDB)
6385 {
6386 Action::StrftimeCastTimestamp
6387 } else if matches!(source, DialectType::DuckDB)
6388 && matches!(
6389 c.to,
6390 DataType::Decimal {
6391 precision: None,
6392 ..
6393 }
6394 )
6395 {
6396 Action::DecimalDefaultPrecision
6397 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
6398 && matches!(c.to, DataType::Char { length: None })
6399 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
6400 {
6401 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
6402 Action::MysqlCastCharToText
6403 } else if matches!(
6404 source,
6405 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6406 ) && matches!(
6407 target,
6408 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6409 ) && Self::has_varchar_char_type(&c.to)
6410 {
6411 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
6412 Action::SparkCastVarcharToString
6413 } else {
6414 Action::None
6415 }
6416 }
6417 Expression::SafeCast(ref c) => {
6418 if c.format.is_some()
6419 && matches!(source, DialectType::BigQuery)
6420 && !matches!(target, DialectType::BigQuery)
6421 {
6422 Action::BigQueryCastFormat
6423 } else {
6424 Action::None
6425 }
6426 }
6427 // For DuckDB: DATE_TRUNC should preserve the input type
6428 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
6429 if matches!(source, DialectType::Snowflake)
6430 && matches!(target, DialectType::DuckDB)
6431 {
6432 Action::DateTruncWrapCast
6433 } else {
6434 Action::None
6435 }
6436 }
6437 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
6438 Expression::SetStatement(s) => {
6439 if matches!(target, DialectType::DuckDB)
6440 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
6441 && s.items.iter().any(|item| item.kind.is_none())
6442 {
6443 Action::SetToVariable
6444 } else {
6445 Action::None
6446 }
6447 }
6448 // Cross-dialect NULL ordering normalization.
6449 // When nulls_first is not specified, fill in the source dialect's implied
6450 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
6451 Expression::Ordered(o) => {
6452 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
6453 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
6454 Action::MysqlNullsOrdering
6455 } else {
6456 // Skip targets that don't support NULLS FIRST/LAST syntax
6457 let target_supports_nulls = !matches!(
6458 target,
6459 DialectType::MySQL
6460 | DialectType::TSQL
6461 | DialectType::StarRocks
6462 | DialectType::Doris
6463 );
6464 if o.nulls_first.is_none() && source != target && target_supports_nulls
6465 {
6466 Action::NullsOrdering
6467 } else {
6468 Action::None
6469 }
6470 }
6471 }
6472 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
6473 Expression::DataType(dt) => {
6474 if matches!(source, DialectType::BigQuery)
6475 && !matches!(target, DialectType::BigQuery)
6476 {
6477 match dt {
6478 DataType::Custom { ref name }
6479 if name.eq_ignore_ascii_case("INT64")
6480 || name.eq_ignore_ascii_case("FLOAT64")
6481 || name.eq_ignore_ascii_case("BOOL")
6482 || name.eq_ignore_ascii_case("BYTES")
6483 || name.eq_ignore_ascii_case("NUMERIC")
6484 || name.eq_ignore_ascii_case("STRING")
6485 || name.eq_ignore_ascii_case("DATETIME") =>
6486 {
6487 Action::BigQueryCastType
6488 }
6489 _ => Action::None,
6490 }
6491 } else if matches!(source, DialectType::TSQL) {
6492 // For TSQL source -> any target (including TSQL itself for REAL)
6493 match dt {
6494 // REAL -> FLOAT even for TSQL->TSQL
6495 DataType::Custom { ref name }
6496 if name.eq_ignore_ascii_case("REAL") =>
6497 {
6498 Action::TSQLTypeNormalize
6499 }
6500 DataType::Float {
6501 real_spelling: true,
6502 ..
6503 } => Action::TSQLTypeNormalize,
6504 // Other TSQL type normalizations only for non-TSQL targets
6505 DataType::Custom { ref name }
6506 if !matches!(target, DialectType::TSQL)
6507 && (name.eq_ignore_ascii_case("MONEY")
6508 || name.eq_ignore_ascii_case("SMALLMONEY")
6509 || name.eq_ignore_ascii_case("DATETIME2")
6510 || name.eq_ignore_ascii_case("IMAGE")
6511 || name.eq_ignore_ascii_case("BIT")
6512 || name.eq_ignore_ascii_case("ROWVERSION")
6513 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
6514 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
6515 || (name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC"))
6516 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("DATETIME2("))
6517 || (name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME("))) =>
6518 {
6519 Action::TSQLTypeNormalize
6520 }
6521 DataType::Float {
6522 precision: Some(_), ..
6523 } if !matches!(target, DialectType::TSQL) => {
6524 Action::TSQLTypeNormalize
6525 }
6526 DataType::TinyInt { .. }
6527 if !matches!(target, DialectType::TSQL) =>
6528 {
6529 Action::TSQLTypeNormalize
6530 }
6531 // INTEGER -> INT for Databricks/Spark targets
6532 DataType::Int {
6533 integer_spelling: true,
6534 ..
6535 } if matches!(
6536 target,
6537 DialectType::Databricks | DialectType::Spark
6538 ) =>
6539 {
6540 Action::TSQLTypeNormalize
6541 }
6542 _ => Action::None,
6543 }
6544 } else if (matches!(source, DialectType::Oracle)
6545 || matches!(source, DialectType::Generic))
6546 && !matches!(target, DialectType::Oracle)
6547 {
6548 match dt {
6549 DataType::Custom { ref name }
6550 if (name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2("))
6551 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2("))
6552 || name.eq_ignore_ascii_case("VARCHAR2")
6553 || name.eq_ignore_ascii_case("NVARCHAR2") =>
6554 {
6555 Action::OracleVarchar2ToVarchar
6556 }
6557 _ => Action::None,
6558 }
6559 } else if matches!(target, DialectType::Snowflake)
6560 && !matches!(source, DialectType::Snowflake)
6561 {
6562 // When target is Snowflake but source is NOT Snowflake,
6563 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
6564 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
6565 // should keep their FLOAT spelling.
6566 match dt {
6567 DataType::Float { .. } => Action::SnowflakeFloatProtect,
6568 _ => Action::None,
6569 }
6570 } else {
6571 Action::None
6572 }
6573 }
6574 // LOWER patterns from BigQuery TO_HEX conversions:
6575 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
6576 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
6577 Expression::Lower(uf) => {
6578 if matches!(source, DialectType::BigQuery) {
6579 match &uf.this {
6580 Expression::Lower(_) => Action::BigQueryToHexLower,
6581 Expression::Function(f)
6582 if f.name == "TO_HEX"
6583 && matches!(target, DialectType::BigQuery) =>
6584 {
6585 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
6586 Action::BigQueryToHexLower
6587 }
6588 _ => Action::None,
6589 }
6590 } else {
6591 Action::None
6592 }
6593 }
6594 // UPPER patterns from BigQuery TO_HEX conversions:
6595 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
6596 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
6597 Expression::Upper(uf) => {
6598 if matches!(source, DialectType::BigQuery) {
6599 match &uf.this {
6600 Expression::Lower(_) => Action::BigQueryToHexUpper,
6601 _ => Action::None,
6602 }
6603 } else {
6604 Action::None
6605 }
6606 }
6607 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
6608 // Snowflake supports LAST_DAY with unit, so keep it there
6609 Expression::LastDay(ld) => {
6610 if matches!(source, DialectType::BigQuery)
6611 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
6612 && ld.unit.is_some()
6613 {
6614 Action::BigQueryLastDayStripUnit
6615 } else {
6616 Action::None
6617 }
6618 }
6619 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
6620 Expression::SafeDivide(_) => {
6621 if matches!(source, DialectType::BigQuery)
6622 && !matches!(target, DialectType::BigQuery)
6623 {
6624 Action::BigQuerySafeDivide
6625 } else {
6626 Action::None
6627 }
6628 }
6629 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
6630 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
6631 Expression::AnyValue(ref agg) => {
6632 if matches!(source, DialectType::BigQuery)
6633 && matches!(target, DialectType::DuckDB)
6634 && agg.having_max.is_some()
6635 {
6636 Action::BigQueryAnyValueHaving
6637 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6638 && !matches!(source, DialectType::Spark | DialectType::Databricks)
6639 && agg.ignore_nulls.is_none()
6640 {
6641 Action::AnyValueIgnoreNulls
6642 } else {
6643 Action::None
6644 }
6645 }
6646 Expression::Any(ref q) => {
6647 if matches!(source, DialectType::PostgreSQL)
6648 && matches!(
6649 target,
6650 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6651 )
6652 && q.op.is_some()
6653 && !matches!(
6654 q.subquery,
6655 Expression::Select(_) | Expression::Subquery(_)
6656 )
6657 {
6658 Action::AnyToExists
6659 } else {
6660 Action::None
6661 }
6662 }
6663 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6664 // Snowflake RLIKE does full-string match; DuckDB REGEXP_MATCHES is partial
6665 // So anchor the pattern with ^(...) $ for Snowflake -> DuckDB
6666 Expression::RegexpLike(_)
6667 if matches!(source, DialectType::Snowflake)
6668 && matches!(target, DialectType::DuckDB) =>
6669 {
6670 Action::RlikeSnowflakeToDuckDB
6671 }
6672 // RegexpLike from non-DuckDB sources -> REGEXP_MATCHES for DuckDB target
6673 // DuckDB's ~ is a full match, but other dialects' REGEXP/RLIKE is a partial match
6674 Expression::RegexpLike(_)
6675 if !matches!(source, DialectType::DuckDB)
6676 && matches!(target, DialectType::DuckDB) =>
6677 {
6678 Action::RegexpLikeToDuckDB
6679 }
6680 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
6681 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
6682 Expression::Div(ref op)
6683 if matches!(
6684 source,
6685 DialectType::MySQL
6686 | DialectType::DuckDB
6687 | DialectType::SingleStore
6688 | DialectType::TiDB
6689 | DialectType::ClickHouse
6690 | DialectType::Doris
6691 ) && matches!(
6692 target,
6693 DialectType::PostgreSQL
6694 | DialectType::Redshift
6695 | DialectType::Drill
6696 | DialectType::Trino
6697 | DialectType::Presto
6698 | DialectType::Athena
6699 | DialectType::TSQL
6700 | DialectType::Teradata
6701 | DialectType::SQLite
6702 | DialectType::BigQuery
6703 | DialectType::Snowflake
6704 | DialectType::Databricks
6705 | DialectType::Oracle
6706 | DialectType::Materialize
6707 | DialectType::RisingWave
6708 ) =>
6709 {
6710 // Only wrap if RHS is not already NULLIF
6711 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
6712 {
6713 Action::MySQLSafeDivide
6714 } else {
6715 Action::None
6716 }
6717 }
6718 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
6719 // For TSQL/Fabric, convert to sp_rename instead
6720 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
6721 if let Some(crate::expressions::AlterTableAction::RenameTable(
6722 ref new_tbl,
6723 )) = at.actions.first()
6724 {
6725 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
6726 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
6727 Action::AlterTableToSpRename
6728 } else if new_tbl.schema.is_some()
6729 && matches!(
6730 target,
6731 DialectType::BigQuery
6732 | DialectType::Doris
6733 | DialectType::StarRocks
6734 | DialectType::DuckDB
6735 | DialectType::PostgreSQL
6736 | DialectType::Redshift
6737 )
6738 {
6739 Action::AlterTableRenameStripSchema
6740 } else {
6741 Action::None
6742 }
6743 } else {
6744 Action::None
6745 }
6746 }
6747 // EPOCH(x) expression -> target-specific epoch conversion
6748 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
6749 Action::EpochConvert
6750 }
6751 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
6752 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
6753 Action::EpochMsConvert
6754 }
6755 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
6756 Expression::StringAgg(_) => {
6757 if matches!(
6758 target,
6759 DialectType::MySQL
6760 | DialectType::SingleStore
6761 | DialectType::Doris
6762 | DialectType::StarRocks
6763 | DialectType::SQLite
6764 ) {
6765 Action::StringAggConvert
6766 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6767 Action::StringAggConvert
6768 } else {
6769 Action::None
6770 }
6771 }
6772 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
6773 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
6774 Expression::GroupConcat(_) => Action::GroupConcatConvert,
6775 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
6776 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
6777 Action::ArrayLengthConvert
6778 }
6779 Expression::ArraySize(_) => {
6780 if matches!(target, DialectType::Drill) {
6781 Action::ArraySizeDrill
6782 } else {
6783 Action::ArrayLengthConvert
6784 }
6785 }
6786 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
6787 Expression::ArrayRemove(_) => match target {
6788 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
6789 Action::ArrayRemoveConvert
6790 }
6791 _ => Action::None,
6792 },
6793 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
6794 Expression::ArrayReverse(_) => match target {
6795 DialectType::ClickHouse => Action::ArrayReverseConvert,
6796 _ => Action::None,
6797 },
6798 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
6799 Expression::JsonKeys(_) => match target {
6800 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
6801 Action::JsonKeysConvert
6802 }
6803 _ => Action::None,
6804 },
6805 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
6806 Expression::ParseJson(_) => match target {
6807 DialectType::SQLite
6808 | DialectType::Doris
6809 | DialectType::MySQL
6810 | DialectType::StarRocks => Action::ParseJsonStrip,
6811 _ => Action::None,
6812 },
6813 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
6814 Expression::WeekOfYear(_)
6815 if matches!(target, DialectType::Snowflake)
6816 && !matches!(source, DialectType::Snowflake) =>
6817 {
6818 Action::WeekOfYearToWeekIso
6819 }
6820 // NVL: clear original_name so generator uses dialect-specific function names
6821 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
6822 // XOR: expand for dialects that don't support the XOR keyword
6823 Expression::Xor(_) => {
6824 let target_supports_xor = matches!(
6825 target,
6826 DialectType::MySQL
6827 | DialectType::SingleStore
6828 | DialectType::Doris
6829 | DialectType::StarRocks
6830 );
6831 if !target_supports_xor {
6832 Action::XorExpand
6833 } else {
6834 Action::None
6835 }
6836 }
6837 // TSQL #table -> temp table normalization (CREATE TABLE)
6838 Expression::CreateTable(ct)
6839 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6840 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6841 && ct.name.name.name.starts_with('#') =>
6842 {
6843 Action::TempTableHash
6844 }
6845 // TSQL #table -> strip # from table references in SELECT/etc.
6846 Expression::Table(tr)
6847 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6848 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6849 && tr.name.name.starts_with('#') =>
6850 {
6851 Action::TempTableHash
6852 }
6853 // TSQL #table -> strip # from DROP TABLE names
6854 Expression::DropTable(ref dt)
6855 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6856 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6857 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
6858 {
6859 Action::TempTableHash
6860 }
6861 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6862 Expression::JsonExtract(_)
6863 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6864 {
6865 Action::JsonExtractToTsql
6866 }
6867 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6868 Expression::JsonExtractScalar(_)
6869 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6870 {
6871 Action::JsonExtractToTsql
6872 }
6873 // JSON_EXTRACT -> JSONExtractString for ClickHouse
6874 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
6875 Action::JsonExtractToClickHouse
6876 }
6877 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
6878 Expression::JsonExtractScalar(_)
6879 if matches!(target, DialectType::ClickHouse) =>
6880 {
6881 Action::JsonExtractToClickHouse
6882 }
6883 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
6884 Expression::JsonExtract(ref f)
6885 if !f.arrow_syntax
6886 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
6887 {
6888 Action::JsonExtractToArrow
6889 }
6890 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
6891 Expression::JsonExtract(ref f)
6892 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
6893 && !matches!(
6894 source,
6895 DialectType::PostgreSQL
6896 | DialectType::Redshift
6897 | DialectType::Materialize
6898 )
6899 && matches!(&f.path, Expression::Literal(Literal::String(s)) if s.starts_with('$')) =>
6900 {
6901 Action::JsonExtractToGetJsonObject
6902 }
6903 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
6904 Expression::JsonExtract(_)
6905 if matches!(
6906 target,
6907 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6908 ) =>
6909 {
6910 Action::JsonExtractToGetJsonObject
6911 }
6912 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
6913 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
6914 Expression::JsonExtractScalar(ref f)
6915 if !f.arrow_syntax
6916 && !f.hash_arrow_syntax
6917 && matches!(
6918 target,
6919 DialectType::PostgreSQL
6920 | DialectType::Redshift
6921 | DialectType::Snowflake
6922 | DialectType::SQLite
6923 | DialectType::DuckDB
6924 ) =>
6925 {
6926 Action::JsonExtractScalarConvert
6927 }
6928 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
6929 Expression::JsonExtractScalar(_)
6930 if matches!(
6931 target,
6932 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6933 ) =>
6934 {
6935 Action::JsonExtractScalarToGetJsonObject
6936 }
6937 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
6938 Expression::JsonExtract(ref f)
6939 if !f.arrow_syntax
6940 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
6941 {
6942 Action::JsonPathNormalize
6943 }
6944 // JsonQuery (parsed JSON_QUERY) -> target-specific
6945 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
6946 // JsonValue (parsed JSON_VALUE) -> target-specific
6947 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
6948 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
6949 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
6950 Expression::AtTimeZone(_)
6951 if matches!(
6952 target,
6953 DialectType::Presto
6954 | DialectType::Trino
6955 | DialectType::Athena
6956 | DialectType::Spark
6957 | DialectType::Databricks
6958 | DialectType::BigQuery
6959 | DialectType::Snowflake
6960 ) =>
6961 {
6962 Action::AtTimeZoneConvert
6963 }
6964 // DAY_OF_WEEK -> dialect-specific
6965 Expression::DayOfWeek(_)
6966 if matches!(
6967 target,
6968 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
6969 ) =>
6970 {
6971 Action::DayOfWeekConvert
6972 }
6973 // CURRENT_USER -> CURRENT_USER() for Snowflake
6974 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
6975 Action::CurrentUserParens
6976 }
6977 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
6978 Expression::ElementAt(_)
6979 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
6980 {
6981 Action::ElementAtConvert
6982 }
6983 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
6984 Expression::ArrayFunc(ref arr)
6985 if !arr.bracket_notation
6986 && matches!(
6987 target,
6988 DialectType::Spark
6989 | DialectType::Databricks
6990 | DialectType::Hive
6991 | DialectType::BigQuery
6992 | DialectType::DuckDB
6993 | DialectType::Snowflake
6994 | DialectType::Presto
6995 | DialectType::Trino
6996 | DialectType::Athena
6997 | DialectType::ClickHouse
6998 | DialectType::StarRocks
6999 ) =>
7000 {
7001 Action::ArraySyntaxConvert
7002 }
7003 // VARIANCE expression -> varSamp for ClickHouse
7004 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
7005 Action::VarianceToClickHouse
7006 }
7007 // STDDEV expression -> stddevSamp for ClickHouse
7008 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
7009 Action::StddevToClickHouse
7010 }
7011 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
7012 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
7013 Action::ApproxQuantileConvert
7014 }
7015 // MonthsBetween -> target-specific
7016 Expression::MonthsBetween(_)
7017 if !matches!(
7018 target,
7019 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7020 ) =>
7021 {
7022 Action::MonthsBetweenConvert
7023 }
7024 // AddMonths -> target-specific DATEADD/DATE_ADD
7025 Expression::AddMonths(_) => Action::AddMonthsConvert,
7026 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
7027 Expression::MapFromArrays(_)
7028 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
7029 {
7030 Action::MapFromArraysConvert
7031 }
7032 // CURRENT_USER -> CURRENT_USER() for Spark
7033 Expression::CurrentUser(_)
7034 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
7035 {
7036 Action::CurrentUserSparkParens
7037 }
7038 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
7039 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
7040 if matches!(
7041 source,
7042 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7043 ) && matches!(&f.this, Expression::Literal(Literal::String(_)))
7044 && matches!(
7045 target,
7046 DialectType::DuckDB
7047 | DialectType::Presto
7048 | DialectType::Trino
7049 | DialectType::Athena
7050 | DialectType::PostgreSQL
7051 | DialectType::Redshift
7052 ) =>
7053 {
7054 Action::SparkDateFuncCast
7055 }
7056 // $parameter -> @parameter for BigQuery
7057 Expression::Parameter(ref p)
7058 if matches!(target, DialectType::BigQuery)
7059 && matches!(source, DialectType::DuckDB)
7060 && (p.style == crate::expressions::ParameterStyle::Dollar
7061 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
7062 {
7063 Action::DollarParamConvert
7064 }
7065 // EscapeString literal: normalize literal newlines to \n
7066 Expression::Literal(Literal::EscapeString(ref s))
7067 if s.contains('\n') || s.contains('\r') || s.contains('\t') =>
7068 {
7069 Action::EscapeStringNormalize
7070 }
7071 // straight_join: keep lowercase for DuckDB, quote for MySQL
7072 Expression::Column(ref col)
7073 if col.name.name == "STRAIGHT_JOIN"
7074 && col.table.is_none()
7075 && matches!(source, DialectType::DuckDB)
7076 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
7077 {
7078 Action::StraightJoinCase
7079 }
7080 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
7081 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
7082 Expression::Interval(ref iv)
7083 if matches!(
7084 target,
7085 DialectType::Snowflake
7086 | DialectType::PostgreSQL
7087 | DialectType::Redshift
7088 ) && iv.unit.is_some()
7089 && matches!(
7090 &iv.this,
7091 Some(Expression::Literal(Literal::String(_)))
7092 ) =>
7093 {
7094 Action::SnowflakeIntervalFormat
7095 }
7096 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
7097 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
7098 if let Some(ref sample) = ts.sample {
7099 if !sample.explicit_method {
7100 Action::TablesampleReservoir
7101 } else {
7102 Action::None
7103 }
7104 } else {
7105 Action::None
7106 }
7107 }
7108 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
7109 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
7110 Expression::TableSample(ref ts)
7111 if matches!(target, DialectType::Snowflake)
7112 && !matches!(source, DialectType::Snowflake)
7113 && ts.sample.is_some() =>
7114 {
7115 if let Some(ref sample) = ts.sample {
7116 if !sample.explicit_method {
7117 Action::TablesampleSnowflakeStrip
7118 } else {
7119 Action::None
7120 }
7121 } else {
7122 Action::None
7123 }
7124 }
7125 Expression::Table(ref t)
7126 if matches!(target, DialectType::Snowflake)
7127 && !matches!(source, DialectType::Snowflake)
7128 && t.table_sample.is_some() =>
7129 {
7130 if let Some(ref sample) = t.table_sample {
7131 if !sample.explicit_method {
7132 Action::TablesampleSnowflakeStrip
7133 } else {
7134 Action::None
7135 }
7136 } else {
7137 Action::None
7138 }
7139 }
7140 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
7141 Expression::AlterTable(ref at)
7142 if matches!(target, DialectType::TSQL | DialectType::Fabric)
7143 && !at.actions.is_empty()
7144 && matches!(
7145 at.actions.first(),
7146 Some(crate::expressions::AlterTableAction::RenameTable(_))
7147 ) =>
7148 {
7149 Action::AlterTableToSpRename
7150 }
7151 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
7152 Expression::Subscript(ref sub)
7153 if matches!(
7154 target,
7155 DialectType::BigQuery
7156 | DialectType::Hive
7157 | DialectType::Spark
7158 | DialectType::Databricks
7159 ) && matches!(
7160 source,
7161 DialectType::DuckDB
7162 | DialectType::PostgreSQL
7163 | DialectType::Presto
7164 | DialectType::Trino
7165 | DialectType::Redshift
7166 | DialectType::ClickHouse
7167 ) && matches!(&sub.index, Expression::Literal(Literal::Number(ref n)) if n.parse::<i64>().unwrap_or(0) > 0) =>
7168 {
7169 Action::ArrayIndexConvert
7170 }
7171 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
7172 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
7173 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
7174 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
7175 Expression::WindowFunction(ref wf) => {
7176 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
7177 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
7178 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
7179 if matches!(target, DialectType::BigQuery)
7180 && !is_row_number
7181 && !wf.over.order_by.is_empty()
7182 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
7183 {
7184 Action::BigQueryNullsOrdering
7185 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
7186 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
7187 } else {
7188 let source_nulls_last = matches!(source, DialectType::DuckDB);
7189 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
7190 matches!(
7191 f.kind,
7192 crate::expressions::WindowFrameKind::Range
7193 | crate::expressions::WindowFrameKind::Groups
7194 )
7195 });
7196 if source_nulls_last
7197 && matches!(target, DialectType::MySQL)
7198 && !wf.over.order_by.is_empty()
7199 && wf.over.order_by.iter().any(|o| !o.desc)
7200 && !has_range_frame
7201 {
7202 Action::MysqlNullsLastRewrite
7203 } else {
7204 match &wf.this {
7205 Expression::FirstValue(ref vf)
7206 | Expression::LastValue(ref vf)
7207 if vf.ignore_nulls == Some(false) =>
7208 {
7209 // RESPECT NULLS
7210 match target {
7211 DialectType::SQLite => Action::RespectNullsConvert,
7212 _ => Action::None,
7213 }
7214 }
7215 _ => Action::None,
7216 }
7217 }
7218 }
7219 }
7220 // CREATE TABLE a LIKE b -> dialect-specific transformations
7221 Expression::CreateTable(ref ct)
7222 if ct.columns.is_empty()
7223 && ct.constraints.iter().any(|c| {
7224 matches!(c, crate::expressions::TableConstraint::Like { .. })
7225 })
7226 && matches!(
7227 target,
7228 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
7229 ) =>
7230 {
7231 Action::CreateTableLikeToCtas
7232 }
7233 Expression::CreateTable(ref ct)
7234 if ct.columns.is_empty()
7235 && ct.constraints.iter().any(|c| {
7236 matches!(c, crate::expressions::TableConstraint::Like { .. })
7237 })
7238 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
7239 {
7240 Action::CreateTableLikeToSelectInto
7241 }
7242 Expression::CreateTable(ref ct)
7243 if ct.columns.is_empty()
7244 && ct.constraints.iter().any(|c| {
7245 matches!(c, crate::expressions::TableConstraint::Like { .. })
7246 })
7247 && matches!(target, DialectType::ClickHouse) =>
7248 {
7249 Action::CreateTableLikeToAs
7250 }
7251 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
7252 Expression::CreateTable(ref ct)
7253 if matches!(target, DialectType::DuckDB)
7254 && matches!(
7255 source,
7256 DialectType::DuckDB
7257 | DialectType::Spark
7258 | DialectType::Databricks
7259 | DialectType::Hive
7260 ) =>
7261 {
7262 let has_comment = ct.columns.iter().any(|c| {
7263 c.comment.is_some()
7264 || c.constraints.iter().any(|con| {
7265 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
7266 })
7267 });
7268 let has_props = !ct.properties.is_empty();
7269 if has_comment || has_props {
7270 Action::CreateTableStripComment
7271 } else {
7272 Action::None
7273 }
7274 }
7275 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
7276 Expression::Array(_)
7277 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
7278 {
7279 Action::ArrayConcatBracketConvert
7280 }
7281 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
7282 Expression::ArrayFunc(ref arr)
7283 if arr.bracket_notation
7284 && matches!(source, DialectType::BigQuery)
7285 && matches!(target, DialectType::Redshift) =>
7286 {
7287 Action::ArrayConcatBracketConvert
7288 }
7289 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
7290 Expression::BitwiseOrAgg(ref f)
7291 | Expression::BitwiseAndAgg(ref f)
7292 | Expression::BitwiseXorAgg(ref f) => {
7293 if matches!(target, DialectType::DuckDB) {
7294 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
7295 if let Expression::Cast(ref c) = f.this {
7296 match &c.to {
7297 DataType::Float { .. }
7298 | DataType::Double { .. }
7299 | DataType::Decimal { .. } => Action::BitAggFloatCast,
7300 DataType::Custom { ref name }
7301 if name.eq_ignore_ascii_case("REAL") =>
7302 {
7303 Action::BitAggFloatCast
7304 }
7305 _ => Action::None,
7306 }
7307 } else {
7308 Action::None
7309 }
7310 } else if matches!(target, DialectType::Snowflake) {
7311 Action::BitAggSnowflakeRename
7312 } else {
7313 Action::None
7314 }
7315 }
7316 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
7317 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
7318 Action::FilterToIff
7319 }
7320 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
7321 Expression::Avg(ref f)
7322 | Expression::Sum(ref f)
7323 | Expression::Min(ref f)
7324 | Expression::Max(ref f)
7325 | Expression::CountIf(ref f)
7326 | Expression::Stddev(ref f)
7327 | Expression::StddevPop(ref f)
7328 | Expression::StddevSamp(ref f)
7329 | Expression::Variance(ref f)
7330 | Expression::VarPop(ref f)
7331 | Expression::VarSamp(ref f)
7332 | Expression::Median(ref f)
7333 | Expression::Mode(ref f)
7334 | Expression::First(ref f)
7335 | Expression::Last(ref f)
7336 | Expression::ApproxDistinct(ref f)
7337 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7338 {
7339 Action::AggFilterToIff
7340 }
7341 Expression::Count(ref c)
7342 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7343 {
7344 Action::AggFilterToIff
7345 }
7346 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
7347 Expression::Count(ref c)
7348 if c.distinct
7349 && matches!(&c.this, Some(Expression::Tuple(_)))
7350 && matches!(
7351 target,
7352 DialectType::Presto
7353 | DialectType::Trino
7354 | DialectType::DuckDB
7355 | DialectType::PostgreSQL
7356 ) =>
7357 {
7358 Action::CountDistinctMultiArg
7359 }
7360 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
7361 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
7362 Action::JsonToGetPath
7363 }
7364 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
7365 Expression::Struct(_)
7366 if matches!(
7367 target,
7368 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7369 ) && matches!(source, DialectType::DuckDB) =>
7370 {
7371 Action::StructToRow
7372 }
7373 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
7374 Expression::MapFunc(ref m)
7375 if m.curly_brace_syntax
7376 && matches!(
7377 target,
7378 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7379 )
7380 && matches!(source, DialectType::DuckDB) =>
7381 {
7382 Action::StructToRow
7383 }
7384 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
7385 Expression::ApproxCountDistinct(_)
7386 if matches!(
7387 target,
7388 DialectType::Presto | DialectType::Trino | DialectType::Athena
7389 ) =>
7390 {
7391 Action::ApproxCountDistinctToApproxDistinct
7392 }
7393 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
7394 Expression::ArrayContains(_)
7395 if matches!(
7396 target,
7397 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
7398 ) && !(matches!(source, DialectType::Snowflake) && matches!(target, DialectType::Snowflake)) =>
7399 {
7400 Action::ArrayContainsConvert
7401 }
7402 // ARRAY_CONTAINS -> DuckDB NULL-aware CASE (from Snowflake source with check_null semantics)
7403 Expression::ArrayContains(_)
7404 if matches!(target, DialectType::DuckDB)
7405 && matches!(source, DialectType::Snowflake) =>
7406 {
7407 Action::ArrayContainsDuckDBConvert
7408 }
7409 // ARRAY_EXCEPT -> target-specific conversion
7410 Expression::ArrayExcept(_)
7411 if matches!(
7412 target,
7413 DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena
7414 ) =>
7415 {
7416 Action::ArrayExceptConvert
7417 }
7418 // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
7419 Expression::ArrayDistinct(_)
7420 if matches!(target, DialectType::DuckDB)
7421 && matches!(source, DialectType::Snowflake) =>
7422 {
7423 Action::ArrayDistinctConvert
7424 }
7425 // StrPosition with position -> complex expansion for Presto/DuckDB
7426 // STRPOS doesn't support a position arg in these dialects
7427 Expression::StrPosition(ref sp)
7428 if sp.position.is_some()
7429 && matches!(
7430 target,
7431 DialectType::Presto
7432 | DialectType::Trino
7433 | DialectType::Athena
7434 | DialectType::DuckDB
7435 ) =>
7436 {
7437 Action::StrPositionExpand
7438 }
7439 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
7440 Expression::First(ref f)
7441 if f.ignore_nulls == Some(true)
7442 && matches!(target, DialectType::DuckDB) =>
7443 {
7444 Action::FirstToAnyValue
7445 }
7446 // BEGIN -> START TRANSACTION for Presto/Trino
7447 Expression::Command(ref cmd)
7448 if cmd.this.eq_ignore_ascii_case("BEGIN")
7449 && matches!(
7450 target,
7451 DialectType::Presto | DialectType::Trino | DialectType::Athena
7452 ) =>
7453 {
7454 // Handled inline below
7455 Action::None // We'll handle it directly
7456 }
7457 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
7458 // PostgreSQL # is parsed as BitwiseXor (which is correct).
7459 // a || b (Concat operator) -> CONCAT function for Presto/Trino
7460 Expression::Concat(ref _op)
7461 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7462 && matches!(target, DialectType::Presto | DialectType::Trino) =>
7463 {
7464 Action::PipeConcatToConcat
7465 }
7466 _ => Action::None,
7467 }
7468 };
7469
7470 match action {
7471 Action::None => {
7472 // Handle inline transforms that don't need a dedicated action
7473
7474 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
7475 if let Expression::Between(ref b) = e {
7476 if let Some(sym) = b.symmetric {
7477 let keeps_symmetric =
7478 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
7479 if !keeps_symmetric {
7480 if sym {
7481 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
7482 let b = if let Expression::Between(b) = e {
7483 *b
7484 } else {
7485 unreachable!()
7486 };
7487 let between1 = Expression::Between(Box::new(
7488 crate::expressions::Between {
7489 this: b.this.clone(),
7490 low: b.low.clone(),
7491 high: b.high.clone(),
7492 not: b.not,
7493 symmetric: None,
7494 },
7495 ));
7496 let between2 = Expression::Between(Box::new(
7497 crate::expressions::Between {
7498 this: b.this,
7499 low: b.high,
7500 high: b.low,
7501 not: b.not,
7502 symmetric: None,
7503 },
7504 ));
7505 return Ok(Expression::Paren(Box::new(
7506 crate::expressions::Paren {
7507 this: Expression::Or(Box::new(
7508 crate::expressions::BinaryOp::new(
7509 between1, between2,
7510 ),
7511 )),
7512 trailing_comments: vec![],
7513 },
7514 )));
7515 } else {
7516 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
7517 let b = if let Expression::Between(b) = e {
7518 *b
7519 } else {
7520 unreachable!()
7521 };
7522 return Ok(Expression::Between(Box::new(
7523 crate::expressions::Between {
7524 this: b.this,
7525 low: b.low,
7526 high: b.high,
7527 not: b.not,
7528 symmetric: None,
7529 },
7530 )));
7531 }
7532 }
7533 }
7534 }
7535
7536 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
7537 if let Expression::ILike(ref _like) = e {
7538 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
7539 let like = if let Expression::ILike(l) = e {
7540 *l
7541 } else {
7542 unreachable!()
7543 };
7544 let lower_left = Expression::Function(Box::new(Function::new(
7545 "LOWER".to_string(),
7546 vec![like.left],
7547 )));
7548 let lower_right = Expression::Function(Box::new(Function::new(
7549 "LOWER".to_string(),
7550 vec![like.right],
7551 )));
7552 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
7553 left: lower_left,
7554 right: lower_right,
7555 escape: like.escape,
7556 quantifier: like.quantifier,
7557 inferred_type: None,
7558 })));
7559 }
7560 }
7561
7562 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
7563 if let Expression::MethodCall(ref mc) = e {
7564 if matches!(source, DialectType::Oracle)
7565 && mc.method.name.eq_ignore_ascii_case("VALUE")
7566 && mc.args.is_empty()
7567 {
7568 let is_dbms_random = match &mc.this {
7569 Expression::Identifier(id) => {
7570 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
7571 }
7572 Expression::Column(col) => {
7573 col.table.is_none()
7574 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
7575 }
7576 _ => false,
7577 };
7578 if is_dbms_random {
7579 let func_name = match target {
7580 DialectType::PostgreSQL
7581 | DialectType::Redshift
7582 | DialectType::DuckDB
7583 | DialectType::SQLite => "RANDOM",
7584 DialectType::Oracle => "DBMS_RANDOM.VALUE",
7585 _ => "RAND",
7586 };
7587 return Ok(Expression::Function(Box::new(Function::new(
7588 func_name.to_string(),
7589 vec![],
7590 ))));
7591 }
7592 }
7593 }
7594 // TRIM without explicit position -> add BOTH for ClickHouse
7595 if let Expression::Trim(ref trim) = e {
7596 if matches!(target, DialectType::ClickHouse)
7597 && trim.sql_standard_syntax
7598 && trim.characters.is_some()
7599 && !trim.position_explicit
7600 {
7601 let mut new_trim = (**trim).clone();
7602 new_trim.position_explicit = true;
7603 return Ok(Expression::Trim(Box::new(new_trim)));
7604 }
7605 }
7606 // BEGIN -> START TRANSACTION for Presto/Trino
7607 if let Expression::Transaction(ref txn) = e {
7608 if matches!(
7609 target,
7610 DialectType::Presto | DialectType::Trino | DialectType::Athena
7611 ) {
7612 // Convert BEGIN to START TRANSACTION by setting mark to "START"
7613 let mut txn = txn.clone();
7614 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
7615 "START".to_string(),
7616 ))));
7617 return Ok(Expression::Transaction(Box::new(*txn)));
7618 }
7619 }
7620 // IS TRUE/FALSE -> simplified forms for Presto/Trino
7621 if matches!(
7622 target,
7623 DialectType::Presto | DialectType::Trino | DialectType::Athena
7624 ) {
7625 match &e {
7626 Expression::IsTrue(itf) if !itf.not => {
7627 // x IS TRUE -> x
7628 return Ok(itf.this.clone());
7629 }
7630 Expression::IsTrue(itf) if itf.not => {
7631 // x IS NOT TRUE -> NOT x
7632 return Ok(Expression::Not(Box::new(
7633 crate::expressions::UnaryOp {
7634 this: itf.this.clone(),
7635 inferred_type: None,
7636 },
7637 )));
7638 }
7639 Expression::IsFalse(itf) if !itf.not => {
7640 // x IS FALSE -> NOT x
7641 return Ok(Expression::Not(Box::new(
7642 crate::expressions::UnaryOp {
7643 this: itf.this.clone(),
7644 inferred_type: None,
7645 },
7646 )));
7647 }
7648 Expression::IsFalse(itf) if itf.not => {
7649 // x IS NOT FALSE -> NOT NOT x
7650 let not_x =
7651 Expression::Not(Box::new(crate::expressions::UnaryOp {
7652 this: itf.this.clone(),
7653 inferred_type: None,
7654 }));
7655 return Ok(Expression::Not(Box::new(
7656 crate::expressions::UnaryOp {
7657 this: not_x,
7658 inferred_type: None,
7659 },
7660 )));
7661 }
7662 _ => {}
7663 }
7664 }
7665 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
7666 if matches!(target, DialectType::Redshift) {
7667 if let Expression::IsFalse(ref itf) = e {
7668 if itf.not {
7669 return Ok(Expression::Not(Box::new(
7670 crate::expressions::UnaryOp {
7671 this: Expression::IsFalse(Box::new(
7672 crate::expressions::IsTrueFalse {
7673 this: itf.this.clone(),
7674 not: false,
7675 },
7676 )),
7677 inferred_type: None,
7678 },
7679 )));
7680 }
7681 }
7682 }
7683 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
7684 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
7685 if let Expression::Function(ref f) = e {
7686 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
7687 && matches!(source, DialectType::Snowflake)
7688 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
7689 {
7690 if f.args.len() == 3 {
7691 let mut args = f.args.clone();
7692 args.push(Expression::string("g"));
7693 return Ok(Expression::Function(Box::new(Function::new(
7694 "REGEXP_REPLACE".to_string(),
7695 args,
7696 ))));
7697 } else if f.args.len() == 4 {
7698 // 4th arg might be position, add 'g' as 5th
7699 let mut args = f.args.clone();
7700 args.push(Expression::string("g"));
7701 return Ok(Expression::Function(Box::new(Function::new(
7702 "REGEXP_REPLACE".to_string(),
7703 args,
7704 ))));
7705 }
7706 }
7707 }
7708 Ok(e)
7709 }
7710
7711 Action::GreatestLeastNull => {
7712 let f = if let Expression::Function(f) = e {
7713 *f
7714 } else {
7715 unreachable!("action only triggered for Function expressions")
7716 };
7717 let mut null_checks: Vec<Expression> = f
7718 .args
7719 .iter()
7720 .map(|a| {
7721 Expression::IsNull(Box::new(IsNull {
7722 this: a.clone(),
7723 not: false,
7724 postfix_form: false,
7725 }))
7726 })
7727 .collect();
7728 let condition = if null_checks.len() == 1 {
7729 null_checks.remove(0)
7730 } else {
7731 let first = null_checks.remove(0);
7732 null_checks.into_iter().fold(first, |acc, check| {
7733 Expression::Or(Box::new(BinaryOp::new(acc, check)))
7734 })
7735 };
7736 Ok(Expression::Case(Box::new(Case {
7737 operand: None,
7738 whens: vec![(condition, Expression::Null(Null))],
7739 else_: Some(Expression::Function(Box::new(Function::new(
7740 f.name, f.args,
7741 )))),
7742 comments: Vec::new(),
7743 inferred_type: None,
7744 })))
7745 }
7746
7747 Action::ArrayGenerateRange => {
7748 let f = if let Expression::Function(f) = e {
7749 *f
7750 } else {
7751 unreachable!("action only triggered for Function expressions")
7752 };
7753 let start = f.args[0].clone();
7754 let end = f.args[1].clone();
7755 let step = f.args.get(2).cloned();
7756
7757 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
7758 end.clone(),
7759 Expression::number(1),
7760 )));
7761
7762 match target {
7763 DialectType::PostgreSQL | DialectType::Redshift => {
7764 let mut args = vec![start, end_minus_1];
7765 if let Some(s) = step {
7766 args.push(s);
7767 }
7768 Ok(Expression::Function(Box::new(Function::new(
7769 "GENERATE_SERIES".to_string(),
7770 args,
7771 ))))
7772 }
7773 DialectType::Presto | DialectType::Trino => {
7774 let mut args = vec![start, end_minus_1];
7775 if let Some(s) = step {
7776 args.push(s);
7777 }
7778 Ok(Expression::Function(Box::new(Function::new(
7779 "SEQUENCE".to_string(),
7780 args,
7781 ))))
7782 }
7783 DialectType::BigQuery => {
7784 let mut args = vec![start, end_minus_1];
7785 if let Some(s) = step {
7786 args.push(s);
7787 }
7788 Ok(Expression::Function(Box::new(Function::new(
7789 "GENERATE_ARRAY".to_string(),
7790 args,
7791 ))))
7792 }
7793 DialectType::Snowflake => {
7794 let normalized_end = Expression::Add(Box::new(BinaryOp::new(
7795 Expression::Paren(Box::new(Paren {
7796 this: end_minus_1,
7797 trailing_comments: vec![],
7798 })),
7799 Expression::number(1),
7800 )));
7801 let mut args = vec![start, normalized_end];
7802 if let Some(s) = step {
7803 args.push(s);
7804 }
7805 Ok(Expression::Function(Box::new(Function::new(
7806 "ARRAY_GENERATE_RANGE".to_string(),
7807 args,
7808 ))))
7809 }
7810 _ => Ok(Expression::Function(Box::new(Function::new(
7811 f.name, f.args,
7812 )))),
7813 }
7814 }
7815
7816 Action::Div0TypedDivision => {
7817 let if_func = if let Expression::IfFunc(f) = e {
7818 *f
7819 } else {
7820 unreachable!("action only triggered for IfFunc expressions")
7821 };
7822 if let Some(Expression::Div(div)) = if_func.false_value {
7823 let cast_type = if matches!(target, DialectType::SQLite) {
7824 DataType::Float {
7825 precision: None,
7826 scale: None,
7827 real_spelling: true,
7828 }
7829 } else {
7830 DataType::Double {
7831 precision: None,
7832 scale: None,
7833 }
7834 };
7835 let casted_left = Expression::Cast(Box::new(Cast {
7836 this: div.left,
7837 to: cast_type,
7838 trailing_comments: vec![],
7839 double_colon_syntax: false,
7840 format: None,
7841 default: None,
7842 inferred_type: None,
7843 }));
7844 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
7845 condition: if_func.condition,
7846 true_value: if_func.true_value,
7847 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
7848 casted_left,
7849 div.right,
7850 )))),
7851 original_name: if_func.original_name,
7852 inferred_type: None,
7853 })))
7854 } else {
7855 // Not actually a Div, reconstruct
7856 Ok(Expression::IfFunc(Box::new(if_func)))
7857 }
7858 }
7859
7860 Action::ArrayAggCollectList => {
7861 let agg = if let Expression::ArrayAgg(a) = e {
7862 *a
7863 } else {
7864 unreachable!("action only triggered for ArrayAgg expressions")
7865 };
7866 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7867 name: Some("COLLECT_LIST".to_string()),
7868 ..agg
7869 })))
7870 }
7871
7872 Action::ArrayAggToGroupConcat => {
7873 let agg = if let Expression::ArrayAgg(a) = e {
7874 *a
7875 } else {
7876 unreachable!("action only triggered for ArrayAgg expressions")
7877 };
7878 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7879 name: Some("GROUP_CONCAT".to_string()),
7880 ..agg
7881 })))
7882 }
7883
7884 Action::ArrayAggWithinGroupFilter => {
7885 let wg = if let Expression::WithinGroup(w) = e {
7886 *w
7887 } else {
7888 unreachable!("action only triggered for WithinGroup expressions")
7889 };
7890 if let Expression::ArrayAgg(inner_agg) = wg.this {
7891 let col = inner_agg.this.clone();
7892 let filter = Expression::IsNull(Box::new(IsNull {
7893 this: col,
7894 not: true,
7895 postfix_form: false,
7896 }));
7897 // For DuckDB, add explicit NULLS FIRST for DESC ordering
7898 let order_by = if matches!(target, DialectType::DuckDB) {
7899 wg.order_by
7900 .into_iter()
7901 .map(|mut o| {
7902 if o.desc && o.nulls_first.is_none() {
7903 o.nulls_first = Some(true);
7904 }
7905 o
7906 })
7907 .collect()
7908 } else {
7909 wg.order_by
7910 };
7911 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7912 this: inner_agg.this,
7913 distinct: inner_agg.distinct,
7914 filter: Some(filter),
7915 order_by,
7916 name: inner_agg.name,
7917 ignore_nulls: inner_agg.ignore_nulls,
7918 having_max: inner_agg.having_max,
7919 limit: inner_agg.limit,
7920 inferred_type: None,
7921 })))
7922 } else {
7923 Ok(Expression::WithinGroup(Box::new(wg)))
7924 }
7925 }
7926
7927 Action::ArrayAggFilter => {
7928 let agg = if let Expression::ArrayAgg(a) = e {
7929 *a
7930 } else {
7931 unreachable!("action only triggered for ArrayAgg expressions")
7932 };
7933 let col = agg.this.clone();
7934 let filter = Expression::IsNull(Box::new(IsNull {
7935 this: col,
7936 not: true,
7937 postfix_form: false,
7938 }));
7939 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7940 filter: Some(filter),
7941 ..agg
7942 })))
7943 }
7944
7945 Action::ArrayAggNullFilter => {
7946 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
7947 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
7948 let agg = if let Expression::ArrayAgg(a) = e {
7949 *a
7950 } else {
7951 unreachable!("action only triggered for ArrayAgg expressions")
7952 };
7953 let col = agg.this.clone();
7954 let not_null = Expression::IsNull(Box::new(IsNull {
7955 this: col,
7956 not: true,
7957 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
7958 }));
7959 let new_filter = if let Some(existing_filter) = agg.filter {
7960 // AND the NOT IS NULL with existing filter
7961 Expression::And(Box::new(crate::expressions::BinaryOp::new(
7962 existing_filter,
7963 not_null,
7964 )))
7965 } else {
7966 not_null
7967 };
7968 Ok(Expression::ArrayAgg(Box::new(AggFunc {
7969 filter: Some(new_filter),
7970 ..agg
7971 })))
7972 }
7973
7974 Action::BigQueryArraySelectAsStructToSnowflake => {
7975 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
7976 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
7977 if let Expression::Function(mut f) = e {
7978 let is_match = f.args.len() == 1
7979 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
7980 if is_match {
7981 let inner_select = match f.args.remove(0) {
7982 Expression::Select(s) => *s,
7983 _ => unreachable!(
7984 "argument already verified to be a Select expression"
7985 ),
7986 };
7987 // Build OBJECT_CONSTRUCT args from SELECT expressions
7988 let mut oc_args = Vec::new();
7989 for expr in &inner_select.expressions {
7990 match expr {
7991 Expression::Alias(a) => {
7992 let key = Expression::Literal(Literal::String(
7993 a.alias.name.clone(),
7994 ));
7995 let value = a.this.clone();
7996 oc_args.push(key);
7997 oc_args.push(value);
7998 }
7999 Expression::Column(c) => {
8000 let key = Expression::Literal(Literal::String(
8001 c.name.name.clone(),
8002 ));
8003 oc_args.push(key);
8004 oc_args.push(expr.clone());
8005 }
8006 _ => {
8007 oc_args.push(expr.clone());
8008 }
8009 }
8010 }
8011 let object_construct = Expression::Function(Box::new(Function::new(
8012 "OBJECT_CONSTRUCT".to_string(),
8013 oc_args,
8014 )));
8015 let array_agg = Expression::Function(Box::new(Function::new(
8016 "ARRAY_AGG".to_string(),
8017 vec![object_construct],
8018 )));
8019 let mut new_select = crate::expressions::Select::new();
8020 new_select.expressions = vec![array_agg];
8021 new_select.from = inner_select.from.clone();
8022 new_select.where_clause = inner_select.where_clause.clone();
8023 new_select.group_by = inner_select.group_by.clone();
8024 new_select.having = inner_select.having.clone();
8025 new_select.joins = inner_select.joins.clone();
8026 Ok(Expression::Subquery(Box::new(
8027 crate::expressions::Subquery {
8028 this: Expression::Select(Box::new(new_select)),
8029 alias: None,
8030 column_aliases: Vec::new(),
8031 order_by: None,
8032 limit: None,
8033 offset: None,
8034 distribute_by: None,
8035 sort_by: None,
8036 cluster_by: None,
8037 lateral: false,
8038 modifiers_inside: false,
8039 trailing_comments: Vec::new(),
8040 inferred_type: None,
8041 },
8042 )))
8043 } else {
8044 Ok(Expression::Function(f))
8045 }
8046 } else {
8047 Ok(e)
8048 }
8049 }
8050
8051 Action::BigQueryPercentileContToDuckDB => {
8052 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
8053 if let Expression::AggregateFunction(mut af) = e {
8054 af.name = "QUANTILE_CONT".to_string();
8055 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
8056 // Keep only first 2 args
8057 if af.args.len() > 2 {
8058 af.args.truncate(2);
8059 }
8060 Ok(Expression::AggregateFunction(af))
8061 } else {
8062 Ok(e)
8063 }
8064 }
8065
8066 Action::ArrayAggIgnoreNullsDuckDB => {
8067 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
8068 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
8069 let mut agg = if let Expression::ArrayAgg(a) = e {
8070 *a
8071 } else {
8072 unreachable!("action only triggered for ArrayAgg expressions")
8073 };
8074 agg.ignore_nulls = None; // Strip IGNORE NULLS
8075 if !agg.order_by.is_empty() {
8076 agg.order_by[0].nulls_first = Some(true);
8077 }
8078 Ok(Expression::ArrayAgg(Box::new(agg)))
8079 }
8080
8081 Action::CountDistinctMultiArg => {
8082 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
8083 if let Expression::Count(c) = e {
8084 if let Some(Expression::Tuple(t)) = c.this {
8085 let args = t.expressions;
8086 // Build CASE expression:
8087 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
8088 let mut whens = Vec::new();
8089 for arg in &args {
8090 whens.push((
8091 Expression::IsNull(Box::new(IsNull {
8092 this: arg.clone(),
8093 not: false,
8094 postfix_form: false,
8095 })),
8096 Expression::Null(crate::expressions::Null),
8097 ));
8098 }
8099 // Build the tuple for ELSE
8100 let tuple_expr =
8101 Expression::Tuple(Box::new(crate::expressions::Tuple {
8102 expressions: args,
8103 }));
8104 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
8105 operand: None,
8106 whens,
8107 else_: Some(tuple_expr),
8108 comments: Vec::new(),
8109 inferred_type: None,
8110 }));
8111 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
8112 this: Some(case_expr),
8113 star: false,
8114 distinct: true,
8115 filter: c.filter,
8116 ignore_nulls: c.ignore_nulls,
8117 original_name: c.original_name,
8118 inferred_type: None,
8119 })))
8120 } else {
8121 Ok(Expression::Count(c))
8122 }
8123 } else {
8124 Ok(e)
8125 }
8126 }
8127
8128 Action::CastTimestampToDatetime => {
8129 let c = if let Expression::Cast(c) = e {
8130 *c
8131 } else {
8132 unreachable!("action only triggered for Cast expressions")
8133 };
8134 Ok(Expression::Cast(Box::new(Cast {
8135 to: DataType::Custom {
8136 name: "DATETIME".to_string(),
8137 },
8138 ..c
8139 })))
8140 }
8141
8142 Action::CastTimestampStripTz => {
8143 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
8144 let c = if let Expression::Cast(c) = e {
8145 *c
8146 } else {
8147 unreachable!("action only triggered for Cast expressions")
8148 };
8149 Ok(Expression::Cast(Box::new(Cast {
8150 to: DataType::Timestamp {
8151 precision: None,
8152 timezone: false,
8153 },
8154 ..c
8155 })))
8156 }
8157
8158 Action::CastTimestamptzToFunc => {
8159 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
8160 let c = if let Expression::Cast(c) = e {
8161 *c
8162 } else {
8163 unreachable!("action only triggered for Cast expressions")
8164 };
8165 Ok(Expression::Function(Box::new(Function::new(
8166 "TIMESTAMP".to_string(),
8167 vec![c.this],
8168 ))))
8169 }
8170
8171 Action::ToDateToCast => {
8172 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
8173 if let Expression::Function(f) = e {
8174 let arg = f.args.into_iter().next().unwrap();
8175 Ok(Expression::Cast(Box::new(Cast {
8176 this: arg,
8177 to: DataType::Date,
8178 double_colon_syntax: false,
8179 trailing_comments: vec![],
8180 format: None,
8181 default: None,
8182 inferred_type: None,
8183 })))
8184 } else {
8185 Ok(e)
8186 }
8187 }
8188 Action::DateTruncWrapCast => {
8189 // Handle both Expression::DateTrunc/TimestampTrunc and
8190 // Expression::Function("DATE_TRUNC", [unit, expr])
8191 match e {
8192 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
8193 let input_type = match &d.this {
8194 Expression::Cast(c) => Some(c.to.clone()),
8195 _ => None,
8196 };
8197 if let Some(cast_type) = input_type {
8198 let is_time = matches!(cast_type, DataType::Time { .. });
8199 if is_time {
8200 let date_expr = Expression::Cast(Box::new(Cast {
8201 this: Expression::Literal(
8202 crate::expressions::Literal::String(
8203 "1970-01-01".to_string(),
8204 ),
8205 ),
8206 to: DataType::Date,
8207 double_colon_syntax: false,
8208 trailing_comments: vec![],
8209 format: None,
8210 default: None,
8211 inferred_type: None,
8212 }));
8213 let add_expr =
8214 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
8215 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
8216 this: add_expr,
8217 unit: d.unit,
8218 }));
8219 Ok(Expression::Cast(Box::new(Cast {
8220 this: inner,
8221 to: cast_type,
8222 double_colon_syntax: false,
8223 trailing_comments: vec![],
8224 format: None,
8225 default: None,
8226 inferred_type: None,
8227 })))
8228 } else {
8229 let inner = Expression::DateTrunc(Box::new(*d));
8230 Ok(Expression::Cast(Box::new(Cast {
8231 this: inner,
8232 to: cast_type,
8233 double_colon_syntax: false,
8234 trailing_comments: vec![],
8235 format: None,
8236 default: None,
8237 inferred_type: None,
8238 })))
8239 }
8240 } else {
8241 Ok(Expression::DateTrunc(d))
8242 }
8243 }
8244 Expression::Function(f) if f.args.len() == 2 => {
8245 // Function-based DATE_TRUNC(unit, expr)
8246 let input_type = match &f.args[1] {
8247 Expression::Cast(c) => Some(c.to.clone()),
8248 _ => None,
8249 };
8250 if let Some(cast_type) = input_type {
8251 let is_time = matches!(cast_type, DataType::Time { .. });
8252 if is_time {
8253 let date_expr = Expression::Cast(Box::new(Cast {
8254 this: Expression::Literal(
8255 crate::expressions::Literal::String(
8256 "1970-01-01".to_string(),
8257 ),
8258 ),
8259 to: DataType::Date,
8260 double_colon_syntax: false,
8261 trailing_comments: vec![],
8262 format: None,
8263 default: None,
8264 inferred_type: None,
8265 }));
8266 let mut args = f.args;
8267 let unit_arg = args.remove(0);
8268 let time_expr = args.remove(0);
8269 let add_expr = Expression::Add(Box::new(BinaryOp::new(
8270 date_expr, time_expr,
8271 )));
8272 let inner = Expression::Function(Box::new(Function::new(
8273 "DATE_TRUNC".to_string(),
8274 vec![unit_arg, add_expr],
8275 )));
8276 Ok(Expression::Cast(Box::new(Cast {
8277 this: inner,
8278 to: cast_type,
8279 double_colon_syntax: false,
8280 trailing_comments: vec![],
8281 format: None,
8282 default: None,
8283 inferred_type: None,
8284 })))
8285 } else {
8286 // Wrap the function in CAST
8287 Ok(Expression::Cast(Box::new(Cast {
8288 this: Expression::Function(f),
8289 to: cast_type,
8290 double_colon_syntax: false,
8291 trailing_comments: vec![],
8292 format: None,
8293 default: None,
8294 inferred_type: None,
8295 })))
8296 }
8297 } else {
8298 Ok(Expression::Function(f))
8299 }
8300 }
8301 other => Ok(other),
8302 }
8303 }
8304
8305 Action::RegexpReplaceSnowflakeToDuckDB => {
8306 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
8307 if let Expression::Function(f) = e {
8308 let mut args = f.args;
8309 let subject = args.remove(0);
8310 let pattern = args.remove(0);
8311 let replacement = args.remove(0);
8312 Ok(Expression::Function(Box::new(Function::new(
8313 "REGEXP_REPLACE".to_string(),
8314 vec![
8315 subject,
8316 pattern,
8317 replacement,
8318 Expression::Literal(crate::expressions::Literal::String(
8319 "g".to_string(),
8320 )),
8321 ],
8322 ))))
8323 } else {
8324 Ok(e)
8325 }
8326 }
8327
8328 Action::RegexpReplacePositionSnowflakeToDuckDB => {
8329 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
8330 // pos=1, occ=1 -> REGEXP_REPLACE(s, p, r) (single replace, no 'g')
8331 // pos>1, occ=0 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r, 'g')
8332 // pos>1, occ=1 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r)
8333 // pos=1, occ=0 -> REGEXP_REPLACE(s, p, r, 'g') (replace all)
8334 if let Expression::Function(f) = e {
8335 let mut args = f.args;
8336 let subject = args.remove(0);
8337 let pattern = args.remove(0);
8338 let replacement = args.remove(0);
8339 let position = args.remove(0);
8340 let occurrence = args.remove(0);
8341
8342 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
8343 let is_occ_0 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "0");
8344 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
8345
8346 if is_pos_1 && is_occ_1 {
8347 // REGEXP_REPLACE(s, p, r) - single replace, no flags
8348 Ok(Expression::Function(Box::new(Function::new(
8349 "REGEXP_REPLACE".to_string(),
8350 vec![subject, pattern, replacement],
8351 ))))
8352 } else if is_pos_1 && is_occ_0 {
8353 // REGEXP_REPLACE(s, p, r, 'g') - global replace
8354 Ok(Expression::Function(Box::new(Function::new(
8355 "REGEXP_REPLACE".to_string(),
8356 vec![
8357 subject,
8358 pattern,
8359 replacement,
8360 Expression::Literal(Literal::String("g".to_string())),
8361 ],
8362 ))))
8363 } else {
8364 // pos>1: SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r[, 'g'])
8365 // Pre-compute pos-1 when position is a numeric literal
8366 let pos_minus_1 = if let Expression::Literal(Literal::Number(ref n)) = position {
8367 if let Ok(val) = n.parse::<i64>() {
8368 Expression::number(val - 1)
8369 } else {
8370 Expression::Sub(Box::new(BinaryOp::new(
8371 position.clone(),
8372 Expression::number(1),
8373 )))
8374 }
8375 } else {
8376 Expression::Sub(Box::new(BinaryOp::new(
8377 position.clone(),
8378 Expression::number(1),
8379 )))
8380 };
8381 let prefix = Expression::Function(Box::new(Function::new(
8382 "SUBSTRING".to_string(),
8383 vec![subject.clone(), Expression::number(1), pos_minus_1],
8384 )));
8385 let suffix_subject = Expression::Function(Box::new(Function::new(
8386 "SUBSTRING".to_string(),
8387 vec![subject, position],
8388 )));
8389 let mut replace_args = vec![suffix_subject, pattern, replacement];
8390 if is_occ_0 {
8391 replace_args.push(Expression::Literal(Literal::String(
8392 "g".to_string(),
8393 )));
8394 }
8395 let replace_expr = Expression::Function(Box::new(Function::new(
8396 "REGEXP_REPLACE".to_string(),
8397 replace_args,
8398 )));
8399 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
8400 this: Box::new(prefix),
8401 expression: Box::new(replace_expr),
8402 safe: None,
8403 })))
8404 }
8405 } else {
8406 Ok(e)
8407 }
8408 }
8409
8410 Action::RegexpSubstrSnowflakeToDuckDB => {
8411 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
8412 if let Expression::Function(f) = e {
8413 let mut args = f.args;
8414 let arg_count = args.len();
8415 match arg_count {
8416 // REGEXP_SUBSTR(s, p) -> REGEXP_EXTRACT(s, p)
8417 0..=2 => {
8418 Ok(Expression::Function(Box::new(Function::new(
8419 "REGEXP_EXTRACT".to_string(),
8420 args,
8421 ))))
8422 }
8423 // REGEXP_SUBSTR(s, p, pos) -> REGEXP_EXTRACT(NULLIF(SUBSTRING(s, pos), ''), p)
8424 3 => {
8425 let subject = args.remove(0);
8426 let pattern = args.remove(0);
8427 let position = args.remove(0);
8428 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
8429 if is_pos_1 {
8430 Ok(Expression::Function(Box::new(Function::new(
8431 "REGEXP_EXTRACT".to_string(),
8432 vec![subject, pattern],
8433 ))))
8434 } else {
8435 let substring_expr = Expression::Function(Box::new(
8436 Function::new(
8437 "SUBSTRING".to_string(),
8438 vec![subject, position],
8439 ),
8440 ));
8441 let nullif_expr = Expression::Function(Box::new(
8442 Function::new(
8443 "NULLIF".to_string(),
8444 vec![
8445 substring_expr,
8446 Expression::Literal(Literal::String(
8447 String::new(),
8448 )),
8449 ],
8450 ),
8451 ));
8452 Ok(Expression::Function(Box::new(Function::new(
8453 "REGEXP_EXTRACT".to_string(),
8454 vec![nullif_expr, pattern],
8455 ))))
8456 }
8457 }
8458 // REGEXP_SUBSTR(s, p, pos, occ) -> depends on pos and occ
8459 4 => {
8460 let subject = args.remove(0);
8461 let pattern = args.remove(0);
8462 let position = args.remove(0);
8463 let occurrence = args.remove(0);
8464 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
8465 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
8466
8467 let effective_subject = if is_pos_1 {
8468 subject
8469 } else {
8470 let substring_expr = Expression::Function(Box::new(
8471 Function::new(
8472 "SUBSTRING".to_string(),
8473 vec![subject, position],
8474 ),
8475 ));
8476 Expression::Function(Box::new(Function::new(
8477 "NULLIF".to_string(),
8478 vec![
8479 substring_expr,
8480 Expression::Literal(Literal::String(String::new())),
8481 ],
8482 )))
8483 };
8484
8485 if is_occ_1 {
8486 Ok(Expression::Function(Box::new(Function::new(
8487 "REGEXP_EXTRACT".to_string(),
8488 vec![effective_subject, pattern],
8489 ))))
8490 } else {
8491 // ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, p), occ)
8492 let extract_all = Expression::Function(Box::new(
8493 Function::new(
8494 "REGEXP_EXTRACT_ALL".to_string(),
8495 vec![effective_subject, pattern],
8496 ),
8497 ));
8498 Ok(Expression::Function(Box::new(Function::new(
8499 "ARRAY_EXTRACT".to_string(),
8500 vec![extract_all, occurrence],
8501 ))))
8502 }
8503 }
8504 // REGEXP_SUBSTR(s, p, 1, 1, 'e') -> REGEXP_EXTRACT(s, p)
8505 5 => {
8506 let subject = args.remove(0);
8507 let pattern = args.remove(0);
8508 let _position = args.remove(0);
8509 let _occurrence = args.remove(0);
8510 let _flags = args.remove(0);
8511 // Strip 'e' flag, convert to REGEXP_EXTRACT
8512 Ok(Expression::Function(Box::new(Function::new(
8513 "REGEXP_EXTRACT".to_string(),
8514 vec![subject, pattern],
8515 ))))
8516 }
8517 // REGEXP_SUBSTR(s, p, 1, 1, 'e', group) -> REGEXP_EXTRACT(s, p[, group])
8518 _ => {
8519 let subject = args.remove(0);
8520 let pattern = args.remove(0);
8521 let _position = args.remove(0);
8522 let _occurrence = args.remove(0);
8523 let _flags = args.remove(0);
8524 let group = args.remove(0);
8525 let is_group_0 = matches!(&group, Expression::Literal(Literal::Number(n)) if n == "0");
8526 if is_group_0 {
8527 // Strip group=0 (default)
8528 Ok(Expression::Function(Box::new(Function::new(
8529 "REGEXP_EXTRACT".to_string(),
8530 vec![subject, pattern],
8531 ))))
8532 } else {
8533 Ok(Expression::Function(Box::new(Function::new(
8534 "REGEXP_EXTRACT".to_string(),
8535 vec![subject, pattern, group],
8536 ))))
8537 }
8538 }
8539 }
8540 } else {
8541 Ok(e)
8542 }
8543 }
8544
8545 Action::RegexpSubstrSnowflakeIdentity => {
8546 // Snowflake→Snowflake: REGEXP_SUBSTR/REGEXP_SUBSTR_ALL with 6 args
8547 // Strip trailing group=0
8548 if let Expression::Function(f) = e {
8549 let func_name = f.name.clone();
8550 let mut args = f.args;
8551 if args.len() == 6 {
8552 let is_group_0 = matches!(&args[5], Expression::Literal(Literal::Number(n)) if n == "0");
8553 if is_group_0 {
8554 args.truncate(5);
8555 }
8556 }
8557 Ok(Expression::Function(Box::new(Function::new(
8558 func_name,
8559 args,
8560 ))))
8561 } else {
8562 Ok(e)
8563 }
8564 }
8565
8566 Action::RegexpSubstrAllSnowflakeToDuckDB => {
8567 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
8568 if let Expression::Function(f) = e {
8569 let mut args = f.args;
8570 let arg_count = args.len();
8571 match arg_count {
8572 // REGEXP_SUBSTR_ALL(s, p) -> REGEXP_EXTRACT_ALL(s, p)
8573 0..=2 => {
8574 Ok(Expression::Function(Box::new(Function::new(
8575 "REGEXP_EXTRACT_ALL".to_string(),
8576 args,
8577 ))))
8578 }
8579 // REGEXP_SUBSTR_ALL(s, p, pos) -> REGEXP_EXTRACT_ALL(SUBSTRING(s, pos), p)
8580 3 => {
8581 let subject = args.remove(0);
8582 let pattern = args.remove(0);
8583 let position = args.remove(0);
8584 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
8585 if is_pos_1 {
8586 Ok(Expression::Function(Box::new(Function::new(
8587 "REGEXP_EXTRACT_ALL".to_string(),
8588 vec![subject, pattern],
8589 ))))
8590 } else {
8591 let substring_expr = Expression::Function(Box::new(
8592 Function::new(
8593 "SUBSTRING".to_string(),
8594 vec![subject, position],
8595 ),
8596 ));
8597 Ok(Expression::Function(Box::new(Function::new(
8598 "REGEXP_EXTRACT_ALL".to_string(),
8599 vec![substring_expr, pattern],
8600 ))))
8601 }
8602 }
8603 // REGEXP_SUBSTR_ALL(s, p, 1, occ) -> REGEXP_EXTRACT_ALL(s, p)[occ:]
8604 4 => {
8605 let subject = args.remove(0);
8606 let pattern = args.remove(0);
8607 let position = args.remove(0);
8608 let occurrence = args.remove(0);
8609 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
8610 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
8611
8612 let effective_subject = if is_pos_1 {
8613 subject
8614 } else {
8615 Expression::Function(Box::new(Function::new(
8616 "SUBSTRING".to_string(),
8617 vec![subject, position],
8618 )))
8619 };
8620
8621 if is_occ_1 {
8622 Ok(Expression::Function(Box::new(Function::new(
8623 "REGEXP_EXTRACT_ALL".to_string(),
8624 vec![effective_subject, pattern],
8625 ))))
8626 } else {
8627 // REGEXP_EXTRACT_ALL(s, p)[occ:]
8628 let extract_all = Expression::Function(Box::new(
8629 Function::new(
8630 "REGEXP_EXTRACT_ALL".to_string(),
8631 vec![effective_subject, pattern],
8632 ),
8633 ));
8634 Ok(Expression::ArraySlice(Box::new(
8635 crate::expressions::ArraySlice {
8636 this: extract_all,
8637 start: Some(occurrence),
8638 end: None,
8639 },
8640 )))
8641 }
8642 }
8643 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e') -> REGEXP_EXTRACT_ALL(s, p)
8644 5 => {
8645 let subject = args.remove(0);
8646 let pattern = args.remove(0);
8647 let _position = args.remove(0);
8648 let _occurrence = args.remove(0);
8649 let _flags = args.remove(0);
8650 Ok(Expression::Function(Box::new(Function::new(
8651 "REGEXP_EXTRACT_ALL".to_string(),
8652 vec![subject, pattern],
8653 ))))
8654 }
8655 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e', 0) -> REGEXP_EXTRACT_ALL(s, p)
8656 _ => {
8657 let subject = args.remove(0);
8658 let pattern = args.remove(0);
8659 let _position = args.remove(0);
8660 let _occurrence = args.remove(0);
8661 let _flags = args.remove(0);
8662 let group = args.remove(0);
8663 let is_group_0 = matches!(&group, Expression::Literal(Literal::Number(n)) if n == "0");
8664 if is_group_0 {
8665 Ok(Expression::Function(Box::new(Function::new(
8666 "REGEXP_EXTRACT_ALL".to_string(),
8667 vec![subject, pattern],
8668 ))))
8669 } else {
8670 Ok(Expression::Function(Box::new(Function::new(
8671 "REGEXP_EXTRACT_ALL".to_string(),
8672 vec![subject, pattern, group],
8673 ))))
8674 }
8675 }
8676 }
8677 } else {
8678 Ok(e)
8679 }
8680 }
8681
8682 Action::RegexpCountSnowflakeToDuckDB => {
8683 // Snowflake REGEXP_COUNT(s, p[, pos[, flags]]) ->
8684 // DuckDB: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
8685 if let Expression::Function(f) = e {
8686 let mut args = f.args;
8687 let arg_count = args.len();
8688 let subject = args.remove(0);
8689 let pattern = args.remove(0);
8690
8691 // Handle position arg
8692 let effective_subject = if arg_count >= 3 {
8693 let position = args.remove(0);
8694 Expression::Function(Box::new(Function::new(
8695 "SUBSTRING".to_string(),
8696 vec![subject, position],
8697 )))
8698 } else {
8699 subject
8700 };
8701
8702 // Handle flags arg -> embed as (?flags) prefix in pattern
8703 let effective_pattern = if arg_count >= 4 {
8704 let flags = args.remove(0);
8705 match &flags {
8706 Expression::Literal(Literal::String(f_str)) if !f_str.is_empty() => {
8707 // Always use concatenation: '(?flags)' || pattern
8708 let prefix = Expression::Literal(Literal::String(
8709 format!("(?{})", f_str),
8710 ));
8711 Expression::DPipe(Box::new(crate::expressions::DPipe {
8712 this: Box::new(prefix),
8713 expression: Box::new(pattern.clone()),
8714 safe: None,
8715 }))
8716 }
8717 _ => pattern.clone(),
8718 }
8719 } else {
8720 pattern.clone()
8721 };
8722
8723 // Build: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
8724 let extract_all = Expression::Function(Box::new(Function::new(
8725 "REGEXP_EXTRACT_ALL".to_string(),
8726 vec![effective_subject, effective_pattern.clone()],
8727 )));
8728 let length_expr = Expression::Length(Box::new(
8729 crate::expressions::UnaryFunc {
8730 this: extract_all,
8731 original_name: None,
8732 inferred_type: None,
8733 },
8734 ));
8735 let condition = Expression::Eq(Box::new(BinaryOp::new(
8736 effective_pattern,
8737 Expression::Literal(Literal::String(String::new())),
8738 )));
8739 Ok(Expression::Case(Box::new(Case {
8740 operand: None,
8741 whens: vec![(condition, Expression::number(0))],
8742 else_: Some(length_expr),
8743 comments: vec![],
8744 inferred_type: None,
8745 })))
8746 } else {
8747 Ok(e)
8748 }
8749 }
8750
8751 Action::RegexpInstrSnowflakeToDuckDB => {
8752 // Snowflake REGEXP_INSTR(s, p[, pos[, occ[, option[, flags[, group]]]]]) ->
8753 // DuckDB: CASE WHEN s IS NULL OR p IS NULL [OR ...] THEN NULL
8754 // WHEN p = '' THEN 0
8755 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
8756 // ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(eff_s, eff_p)[1:occ], x -> LENGTH(x))), 0)
8757 // + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(eff_s, eff_p)[1:occ - 1], x -> LENGTH(x))), 0)
8758 // + pos_offset
8759 // END
8760 if let Expression::Function(f) = e {
8761 let mut args = f.args;
8762 let subject = args.remove(0);
8763 let pattern = if !args.is_empty() { args.remove(0) } else {
8764 Expression::Literal(Literal::String(String::new()))
8765 };
8766
8767 // Collect all original args for NULL checks
8768 let position = if !args.is_empty() { Some(args.remove(0)) } else { None };
8769 let occurrence = if !args.is_empty() { Some(args.remove(0)) } else { None };
8770 let option = if !args.is_empty() { Some(args.remove(0)) } else { None };
8771 let flags = if !args.is_empty() { Some(args.remove(0)) } else { None };
8772 let _group = if !args.is_empty() { Some(args.remove(0)) } else { None };
8773
8774 let is_pos_1 = position.as_ref().map_or(true, |p| matches!(p, Expression::Literal(Literal::Number(n)) if n == "1"));
8775 let occurrence_expr = occurrence.clone().unwrap_or(Expression::number(1));
8776
8777 // Build NULL check: subject IS NULL OR pattern IS NULL [OR pos IS NULL ...]
8778 let mut null_checks: Vec<Expression> = vec![
8779 Expression::Is(Box::new(BinaryOp::new(
8780 subject.clone(),
8781 Expression::Null(Null),
8782 ))),
8783 Expression::Is(Box::new(BinaryOp::new(
8784 pattern.clone(),
8785 Expression::Null(Null),
8786 ))),
8787 ];
8788 // Add NULL checks for all provided optional args
8789 for opt_arg in [&position, &occurrence, &option, &flags].iter() {
8790 if let Some(arg) = opt_arg {
8791 null_checks.push(Expression::Is(Box::new(BinaryOp::new(
8792 (*arg).clone(),
8793 Expression::Null(Null),
8794 ))));
8795 }
8796 }
8797 // Chain with OR
8798 let null_condition = null_checks.into_iter().reduce(|a, b| {
8799 Expression::Or(Box::new(BinaryOp::new(a, b)))
8800 }).unwrap();
8801
8802 // Effective subject (apply position offset)
8803 let effective_subject = if is_pos_1 {
8804 subject.clone()
8805 } else {
8806 let pos = position.clone().unwrap_or(Expression::number(1));
8807 Expression::Function(Box::new(Function::new(
8808 "SUBSTRING".to_string(),
8809 vec![subject.clone(), pos],
8810 )))
8811 };
8812
8813 // Effective pattern (apply flags if present)
8814 let effective_pattern = if let Some(ref fl) = flags {
8815 if let Expression::Literal(Literal::String(f_str)) = fl {
8816 if !f_str.is_empty() {
8817 let prefix = Expression::Literal(Literal::String(
8818 format!("(?{})", f_str),
8819 ));
8820 Expression::DPipe(Box::new(crate::expressions::DPipe {
8821 this: Box::new(prefix),
8822 expression: Box::new(pattern.clone()),
8823 safe: None,
8824 }))
8825 } else {
8826 pattern.clone()
8827 }
8828 } else {
8829 pattern.clone()
8830 }
8831 } else {
8832 pattern.clone()
8833 };
8834
8835 // WHEN pattern = '' THEN 0
8836 let empty_pattern_check = Expression::Eq(Box::new(BinaryOp::new(
8837 effective_pattern.clone(),
8838 Expression::Literal(Literal::String(String::new())),
8839 )));
8840
8841 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
8842 let match_count_check = Expression::Lt(Box::new(BinaryOp::new(
8843 Expression::Length(Box::new(crate::expressions::UnaryFunc {
8844 this: Expression::Function(Box::new(Function::new(
8845 "REGEXP_EXTRACT_ALL".to_string(),
8846 vec![effective_subject.clone(), effective_pattern.clone()],
8847 ))),
8848 original_name: None,
8849 inferred_type: None,
8850 })),
8851 occurrence_expr.clone(),
8852 )));
8853
8854 // Helper: build LENGTH lambda for LIST_TRANSFORM
8855 let make_len_lambda = || Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
8856 parameters: vec![crate::expressions::Identifier::new("x")],
8857 body: Expression::Length(Box::new(crate::expressions::UnaryFunc {
8858 this: Expression::Identifier(crate::expressions::Identifier::new("x")),
8859 original_name: None,
8860 inferred_type: None,
8861 })),
8862 colon: false,
8863 parameter_types: vec![],
8864 }));
8865
8866 // COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(s, p)[1:occ], x -> LENGTH(x))), 0)
8867 let split_sliced = Expression::ArraySlice(Box::new(
8868 crate::expressions::ArraySlice {
8869 this: Expression::Function(Box::new(Function::new(
8870 "STRING_SPLIT_REGEX".to_string(),
8871 vec![effective_subject.clone(), effective_pattern.clone()],
8872 ))),
8873 start: Some(Expression::number(1)),
8874 end: Some(occurrence_expr.clone()),
8875 },
8876 ));
8877 let split_sum = Expression::Function(Box::new(Function::new(
8878 "COALESCE".to_string(),
8879 vec![
8880 Expression::Function(Box::new(Function::new(
8881 "LIST_SUM".to_string(),
8882 vec![Expression::Function(Box::new(Function::new(
8883 "LIST_TRANSFORM".to_string(),
8884 vec![split_sliced, make_len_lambda()],
8885 )))],
8886 ))),
8887 Expression::number(0),
8888 ],
8889 )));
8890
8891 // COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(s, p)[1:occ - 1], x -> LENGTH(x))), 0)
8892 let extract_sliced = Expression::ArraySlice(Box::new(
8893 crate::expressions::ArraySlice {
8894 this: Expression::Function(Box::new(Function::new(
8895 "REGEXP_EXTRACT_ALL".to_string(),
8896 vec![effective_subject.clone(), effective_pattern.clone()],
8897 ))),
8898 start: Some(Expression::number(1)),
8899 end: Some(Expression::Sub(Box::new(BinaryOp::new(
8900 occurrence_expr.clone(),
8901 Expression::number(1),
8902 )))),
8903 },
8904 ));
8905 let extract_sum = Expression::Function(Box::new(Function::new(
8906 "COALESCE".to_string(),
8907 vec![
8908 Expression::Function(Box::new(Function::new(
8909 "LIST_SUM".to_string(),
8910 vec![Expression::Function(Box::new(Function::new(
8911 "LIST_TRANSFORM".to_string(),
8912 vec![extract_sliced, make_len_lambda()],
8913 )))],
8914 ))),
8915 Expression::number(0),
8916 ],
8917 )));
8918
8919 // Position offset: pos - 1 when pos > 1, else 0
8920 let pos_offset: Expression = if !is_pos_1 {
8921 let pos = position.clone().unwrap_or(Expression::number(1));
8922 Expression::Sub(Box::new(BinaryOp::new(
8923 pos,
8924 Expression::number(1),
8925 )))
8926 } else {
8927 Expression::number(0)
8928 };
8929
8930 // ELSE: 1 + split_sum + extract_sum + pos_offset
8931 let else_expr = Expression::Add(Box::new(BinaryOp::new(
8932 Expression::Add(Box::new(BinaryOp::new(
8933 Expression::Add(Box::new(BinaryOp::new(
8934 Expression::number(1),
8935 split_sum,
8936 ))),
8937 extract_sum,
8938 ))),
8939 pos_offset,
8940 )));
8941
8942 Ok(Expression::Case(Box::new(Case {
8943 operand: None,
8944 whens: vec![
8945 (null_condition, Expression::Null(Null)),
8946 (empty_pattern_check, Expression::number(0)),
8947 (match_count_check, Expression::number(0)),
8948 ],
8949 else_: Some(else_expr),
8950 comments: vec![],
8951 inferred_type: None,
8952 })))
8953 } else {
8954 Ok(e)
8955 }
8956 }
8957
8958 Action::RlikeSnowflakeToDuckDB => {
8959 // Snowflake RLIKE(a, b[, flags]) -> DuckDB REGEXP_MATCHES(a, '^(' || (b) || ')$'[, flags])
8960 // Snowflake RLIKE does full-string match; DuckDB REGEXP_MATCHES does partial match
8961 // So we anchor the pattern with ^ and $
8962 // Can come as Expression::RegexpLike (from Snowflake transform_expr) or
8963 // Expression::Function("RLIKE", args) (if not transformed yet)
8964 let (subject, pattern, flags) = match e {
8965 Expression::RegexpLike(ref rl) => {
8966 (rl.this.clone(), rl.pattern.clone(), rl.flags.clone())
8967 }
8968 Expression::Function(ref f) if f.args.len() >= 2 => {
8969 let s = f.args[0].clone();
8970 let p = f.args[1].clone();
8971 let fl = f.args.get(2).cloned();
8972 (s, p, fl)
8973 }
8974 _ => return Ok(e),
8975 };
8976
8977 // Build anchored pattern: '^(' || (pattern) || ')$'
8978 let prefix = Expression::Literal(Literal::String("^(".to_string()));
8979 let suffix = Expression::Literal(Literal::String(")$".to_string()));
8980 let paren_pattern = Expression::Paren(Box::new(Paren {
8981 this: pattern,
8982 trailing_comments: vec![],
8983 }));
8984 let left_concat = Expression::DPipe(Box::new(
8985 crate::expressions::DPipe {
8986 this: Box::new(prefix),
8987 expression: Box::new(paren_pattern),
8988 safe: None,
8989 },
8990 ));
8991 let anchored = Expression::DPipe(Box::new(
8992 crate::expressions::DPipe {
8993 this: Box::new(left_concat),
8994 expression: Box::new(suffix),
8995 safe: None,
8996 },
8997 ));
8998
8999 let mut result_args = vec![subject, anchored];
9000 if let Some(fl) = flags {
9001 result_args.push(fl);
9002 }
9003 Ok(Expression::Function(Box::new(Function::new(
9004 "REGEXP_MATCHES".to_string(),
9005 result_args,
9006 ))))
9007 }
9008
9009 Action::RegexpExtractAllToSnowflake => {
9010 // BigQuery REGEXP_EXTRACT_ALL(s, p) -> Snowflake REGEXP_SUBSTR_ALL(s, p)
9011 // With capture group: REGEXP_SUBSTR_ALL(s, p, 1, 1, 'c', 1)
9012 if let Expression::Function(f) = e {
9013 let mut args = f.args;
9014 if args.len() >= 2 {
9015 let str_expr = args.remove(0);
9016 let pattern = args.remove(0);
9017
9018 let has_groups = match &pattern {
9019 Expression::Literal(Literal::String(s)) => {
9020 s.contains('(') && s.contains(')')
9021 }
9022 _ => false,
9023 };
9024
9025 if has_groups {
9026 Ok(Expression::Function(Box::new(Function::new(
9027 "REGEXP_SUBSTR_ALL".to_string(),
9028 vec![
9029 str_expr,
9030 pattern,
9031 Expression::number(1),
9032 Expression::number(1),
9033 Expression::Literal(Literal::String("c".to_string())),
9034 Expression::number(1),
9035 ],
9036 ))))
9037 } else {
9038 Ok(Expression::Function(Box::new(Function::new(
9039 "REGEXP_SUBSTR_ALL".to_string(),
9040 vec![str_expr, pattern],
9041 ))))
9042 }
9043 } else {
9044 Ok(Expression::Function(Box::new(Function::new(
9045 "REGEXP_SUBSTR_ALL".to_string(),
9046 args,
9047 ))))
9048 }
9049 } else {
9050 Ok(e)
9051 }
9052 }
9053
9054 Action::SetToVariable => {
9055 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
9056 if let Expression::SetStatement(mut s) = e {
9057 for item in &mut s.items {
9058 if item.kind.is_none() {
9059 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
9060 let already_variable = match &item.name {
9061 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
9062 _ => false,
9063 };
9064 if already_variable {
9065 // Extract the actual name and set kind
9066 if let Expression::Identifier(ref mut id) = item.name {
9067 let actual_name = id.name["VARIABLE ".len()..].to_string();
9068 id.name = actual_name;
9069 }
9070 }
9071 item.kind = Some("VARIABLE".to_string());
9072 }
9073 }
9074 Ok(Expression::SetStatement(s))
9075 } else {
9076 Ok(e)
9077 }
9078 }
9079
9080 Action::ConvertTimezoneToExpr => {
9081 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
9082 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
9083 if let Expression::Function(f) = e {
9084 if f.args.len() == 2 {
9085 let mut args = f.args;
9086 let target_tz = args.remove(0);
9087 let timestamp = args.remove(0);
9088 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
9089 source_tz: None,
9090 target_tz: Some(Box::new(target_tz)),
9091 timestamp: Some(Box::new(timestamp)),
9092 options: vec![],
9093 })))
9094 } else if f.args.len() == 3 {
9095 let mut args = f.args;
9096 let source_tz = args.remove(0);
9097 let target_tz = args.remove(0);
9098 let timestamp = args.remove(0);
9099 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
9100 source_tz: Some(Box::new(source_tz)),
9101 target_tz: Some(Box::new(target_tz)),
9102 timestamp: Some(Box::new(timestamp)),
9103 options: vec![],
9104 })))
9105 } else {
9106 Ok(Expression::Function(f))
9107 }
9108 } else {
9109 Ok(e)
9110 }
9111 }
9112
9113 Action::BigQueryCastType => {
9114 // Convert BigQuery types to standard SQL types
9115 if let Expression::DataType(dt) = e {
9116 match dt {
9117 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
9118 Ok(Expression::DataType(DataType::BigInt { length: None }))
9119 }
9120 DataType::Custom { ref name }
9121 if name.eq_ignore_ascii_case("FLOAT64") =>
9122 {
9123 Ok(Expression::DataType(DataType::Double {
9124 precision: None,
9125 scale: None,
9126 }))
9127 }
9128 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
9129 Ok(Expression::DataType(DataType::Boolean))
9130 }
9131 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
9132 Ok(Expression::DataType(DataType::VarBinary { length: None }))
9133 }
9134 DataType::Custom { ref name }
9135 if name.eq_ignore_ascii_case("NUMERIC") =>
9136 {
9137 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
9138 // default precision (18, 3) being added to bare DECIMAL
9139 if matches!(target, DialectType::DuckDB) {
9140 Ok(Expression::DataType(DataType::Custom {
9141 name: "DECIMAL".to_string(),
9142 }))
9143 } else {
9144 Ok(Expression::DataType(DataType::Decimal {
9145 precision: None,
9146 scale: None,
9147 }))
9148 }
9149 }
9150 DataType::Custom { ref name }
9151 if name.eq_ignore_ascii_case("STRING") =>
9152 {
9153 Ok(Expression::DataType(DataType::String { length: None }))
9154 }
9155 DataType::Custom { ref name }
9156 if name.eq_ignore_ascii_case("DATETIME") =>
9157 {
9158 Ok(Expression::DataType(DataType::Timestamp {
9159 precision: None,
9160 timezone: false,
9161 }))
9162 }
9163 _ => Ok(Expression::DataType(dt)),
9164 }
9165 } else {
9166 Ok(e)
9167 }
9168 }
9169
9170 Action::BigQuerySafeDivide => {
9171 // Convert SafeDivide expression to IF/CASE form for most targets
9172 if let Expression::SafeDivide(sd) = e {
9173 let x = *sd.this;
9174 let y = *sd.expression;
9175 // Wrap x and y in parens if they're complex expressions
9176 let y_ref = match &y {
9177 Expression::Column(_)
9178 | Expression::Literal(_)
9179 | Expression::Identifier(_) => y.clone(),
9180 _ => Expression::Paren(Box::new(Paren {
9181 this: y.clone(),
9182 trailing_comments: vec![],
9183 })),
9184 };
9185 let x_ref = match &x {
9186 Expression::Column(_)
9187 | Expression::Literal(_)
9188 | Expression::Identifier(_) => x.clone(),
9189 _ => Expression::Paren(Box::new(Paren {
9190 this: x.clone(),
9191 trailing_comments: vec![],
9192 })),
9193 };
9194 let condition = Expression::Neq(Box::new(BinaryOp::new(
9195 y_ref.clone(),
9196 Expression::number(0),
9197 )));
9198 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
9199
9200 if matches!(target, DialectType::Presto | DialectType::Trino) {
9201 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
9202 let cast_x = Expression::Cast(Box::new(Cast {
9203 this: match &x {
9204 Expression::Column(_)
9205 | Expression::Literal(_)
9206 | Expression::Identifier(_) => x,
9207 _ => Expression::Paren(Box::new(Paren {
9208 this: x,
9209 trailing_comments: vec![],
9210 })),
9211 },
9212 to: DataType::Double {
9213 precision: None,
9214 scale: None,
9215 },
9216 trailing_comments: vec![],
9217 double_colon_syntax: false,
9218 format: None,
9219 default: None,
9220 inferred_type: None,
9221 }));
9222 let cast_div = Expression::Div(Box::new(BinaryOp::new(
9223 cast_x,
9224 match &y {
9225 Expression::Column(_)
9226 | Expression::Literal(_)
9227 | Expression::Identifier(_) => y,
9228 _ => Expression::Paren(Box::new(Paren {
9229 this: y,
9230 trailing_comments: vec![],
9231 })),
9232 },
9233 )));
9234 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9235 condition,
9236 true_value: cast_div,
9237 false_value: Some(Expression::Null(Null)),
9238 original_name: None,
9239 inferred_type: None,
9240 })))
9241 } else if matches!(target, DialectType::PostgreSQL) {
9242 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
9243 let cast_x = Expression::Cast(Box::new(Cast {
9244 this: match &x {
9245 Expression::Column(_)
9246 | Expression::Literal(_)
9247 | Expression::Identifier(_) => x,
9248 _ => Expression::Paren(Box::new(Paren {
9249 this: x,
9250 trailing_comments: vec![],
9251 })),
9252 },
9253 to: DataType::Custom {
9254 name: "DOUBLE PRECISION".to_string(),
9255 },
9256 trailing_comments: vec![],
9257 double_colon_syntax: false,
9258 format: None,
9259 default: None,
9260 inferred_type: None,
9261 }));
9262 let y_paren = match &y {
9263 Expression::Column(_)
9264 | Expression::Literal(_)
9265 | Expression::Identifier(_) => y,
9266 _ => Expression::Paren(Box::new(Paren {
9267 this: y,
9268 trailing_comments: vec![],
9269 })),
9270 };
9271 let cast_div =
9272 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
9273 Ok(Expression::Case(Box::new(Case {
9274 operand: None,
9275 whens: vec![(condition, cast_div)],
9276 else_: Some(Expression::Null(Null)),
9277 comments: Vec::new(),
9278 inferred_type: None,
9279 })))
9280 } else if matches!(target, DialectType::DuckDB) {
9281 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
9282 Ok(Expression::Case(Box::new(Case {
9283 operand: None,
9284 whens: vec![(condition, div_expr)],
9285 else_: Some(Expression::Null(Null)),
9286 comments: Vec::new(),
9287 inferred_type: None,
9288 })))
9289 } else if matches!(target, DialectType::Snowflake) {
9290 // Snowflake: IFF(y <> 0, x / y, NULL)
9291 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9292 condition,
9293 true_value: div_expr,
9294 false_value: Some(Expression::Null(Null)),
9295 original_name: Some("IFF".to_string()),
9296 inferred_type: None,
9297 })))
9298 } else {
9299 // All others: IF(y <> 0, x / y, NULL)
9300 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9301 condition,
9302 true_value: div_expr,
9303 false_value: Some(Expression::Null(Null)),
9304 original_name: None,
9305 inferred_type: None,
9306 })))
9307 }
9308 } else {
9309 Ok(e)
9310 }
9311 }
9312
9313 Action::BigQueryLastDayStripUnit => {
9314 if let Expression::LastDay(mut ld) = e {
9315 ld.unit = None; // Strip the unit (MONTH is default)
9316 match target {
9317 DialectType::PostgreSQL => {
9318 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
9319 let date_trunc = Expression::Function(Box::new(Function::new(
9320 "DATE_TRUNC".to_string(),
9321 vec![
9322 Expression::Literal(crate::expressions::Literal::String(
9323 "MONTH".to_string(),
9324 )),
9325 ld.this.clone(),
9326 ],
9327 )));
9328 let plus_month =
9329 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9330 date_trunc,
9331 Expression::Interval(Box::new(
9332 crate::expressions::Interval {
9333 this: Some(Expression::Literal(
9334 crate::expressions::Literal::String(
9335 "1 MONTH".to_string(),
9336 ),
9337 )),
9338 unit: None,
9339 },
9340 )),
9341 )));
9342 let minus_day =
9343 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
9344 plus_month,
9345 Expression::Interval(Box::new(
9346 crate::expressions::Interval {
9347 this: Some(Expression::Literal(
9348 crate::expressions::Literal::String(
9349 "1 DAY".to_string(),
9350 ),
9351 )),
9352 unit: None,
9353 },
9354 )),
9355 )));
9356 Ok(Expression::Cast(Box::new(Cast {
9357 this: minus_day,
9358 to: DataType::Date,
9359 trailing_comments: vec![],
9360 double_colon_syntax: false,
9361 format: None,
9362 default: None,
9363 inferred_type: None,
9364 })))
9365 }
9366 DialectType::Presto => {
9367 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
9368 Ok(Expression::Function(Box::new(Function::new(
9369 "LAST_DAY_OF_MONTH".to_string(),
9370 vec![ld.this],
9371 ))))
9372 }
9373 DialectType::ClickHouse => {
9374 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
9375 // Need to wrap the DATE type in Nullable
9376 let nullable_date = match ld.this {
9377 Expression::Cast(mut c) => {
9378 c.to = DataType::Nullable {
9379 inner: Box::new(DataType::Date),
9380 };
9381 Expression::Cast(c)
9382 }
9383 other => other,
9384 };
9385 ld.this = nullable_date;
9386 Ok(Expression::LastDay(ld))
9387 }
9388 _ => Ok(Expression::LastDay(ld)),
9389 }
9390 } else {
9391 Ok(e)
9392 }
9393 }
9394
9395 Action::BigQueryCastFormat => {
9396 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
9397 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
9398 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
9399 let (this, to, format_expr, is_safe) = match e {
9400 Expression::Cast(ref c) if c.format.is_some() => (
9401 c.this.clone(),
9402 c.to.clone(),
9403 c.format.as_ref().unwrap().as_ref().clone(),
9404 false,
9405 ),
9406 Expression::SafeCast(ref c) if c.format.is_some() => (
9407 c.this.clone(),
9408 c.to.clone(),
9409 c.format.as_ref().unwrap().as_ref().clone(),
9410 true,
9411 ),
9412 _ => return Ok(e),
9413 };
9414 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
9415 if matches!(target, DialectType::BigQuery) {
9416 match &to {
9417 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
9418 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
9419 return Ok(e);
9420 }
9421 _ => {}
9422 }
9423 }
9424 // Extract timezone from format if AT TIME ZONE is present
9425 let (actual_format_expr, timezone) = match &format_expr {
9426 Expression::AtTimeZone(ref atz) => {
9427 (atz.this.clone(), Some(atz.zone.clone()))
9428 }
9429 _ => (format_expr.clone(), None),
9430 };
9431 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
9432 match target {
9433 DialectType::BigQuery => {
9434 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
9435 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
9436 let func_name = match &to {
9437 DataType::Date => "PARSE_DATE",
9438 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
9439 DataType::Time { .. } => "PARSE_TIMESTAMP",
9440 _ => "PARSE_TIMESTAMP",
9441 };
9442 let mut func_args = vec![strftime_fmt, this];
9443 if let Some(tz) = timezone {
9444 func_args.push(tz);
9445 }
9446 Ok(Expression::Function(Box::new(Function::new(
9447 func_name.to_string(),
9448 func_args,
9449 ))))
9450 }
9451 DialectType::DuckDB => {
9452 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
9453 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
9454 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
9455 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
9456 let parse_call = Expression::Function(Box::new(Function::new(
9457 parse_fn_name.to_string(),
9458 vec![this, duck_fmt],
9459 )));
9460 Ok(Expression::Cast(Box::new(Cast {
9461 this: parse_call,
9462 to,
9463 trailing_comments: vec![],
9464 double_colon_syntax: false,
9465 format: None,
9466 default: None,
9467 inferred_type: None,
9468 })))
9469 }
9470 _ => Ok(e),
9471 }
9472 }
9473
9474 Action::BigQueryFunctionNormalize => {
9475 Self::normalize_bigquery_function(e, source, target)
9476 }
9477
9478 Action::BigQueryToHexBare => {
9479 // Not used anymore - handled directly in normalize_bigquery_function
9480 Ok(e)
9481 }
9482
9483 Action::BigQueryToHexLower => {
9484 if let Expression::Lower(uf) = e {
9485 match uf.this {
9486 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
9487 Expression::Function(f)
9488 if matches!(target, DialectType::BigQuery)
9489 && f.name == "TO_HEX" =>
9490 {
9491 Ok(Expression::Function(f))
9492 }
9493 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
9494 Expression::Lower(inner_uf) => {
9495 if matches!(target, DialectType::BigQuery) {
9496 // BQ->BQ: extract TO_HEX
9497 if let Expression::Function(f) = inner_uf.this {
9498 Ok(Expression::Function(Box::new(Function::new(
9499 "TO_HEX".to_string(),
9500 f.args,
9501 ))))
9502 } else {
9503 Ok(Expression::Lower(inner_uf))
9504 }
9505 } else {
9506 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
9507 Ok(Expression::Lower(inner_uf))
9508 }
9509 }
9510 other => {
9511 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
9512 this: other,
9513 original_name: None,
9514 inferred_type: None,
9515 })))
9516 }
9517 }
9518 } else {
9519 Ok(e)
9520 }
9521 }
9522
9523 Action::BigQueryToHexUpper => {
9524 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
9525 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
9526 if let Expression::Upper(uf) = e {
9527 if let Expression::Lower(inner_uf) = uf.this {
9528 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
9529 if matches!(target, DialectType::BigQuery) {
9530 // Restore TO_HEX name in inner function
9531 if let Expression::Function(f) = inner_uf.this {
9532 let restored = Expression::Function(Box::new(Function::new(
9533 "TO_HEX".to_string(),
9534 f.args,
9535 )));
9536 Ok(Expression::Upper(Box::new(
9537 crate::expressions::UnaryFunc::new(restored),
9538 )))
9539 } else {
9540 Ok(Expression::Upper(inner_uf))
9541 }
9542 } else {
9543 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
9544 Ok(inner_uf.this)
9545 }
9546 } else {
9547 Ok(Expression::Upper(uf))
9548 }
9549 } else {
9550 Ok(e)
9551 }
9552 }
9553
9554 Action::BigQueryAnyValueHaving => {
9555 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
9556 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
9557 if let Expression::AnyValue(agg) = e {
9558 if let Some((having_expr, is_max)) = agg.having_max {
9559 let func_name = if is_max {
9560 "ARG_MAX_NULL"
9561 } else {
9562 "ARG_MIN_NULL"
9563 };
9564 Ok(Expression::Function(Box::new(Function::new(
9565 func_name.to_string(),
9566 vec![agg.this, *having_expr],
9567 ))))
9568 } else {
9569 Ok(Expression::AnyValue(agg))
9570 }
9571 } else {
9572 Ok(e)
9573 }
9574 }
9575
9576 Action::BigQueryApproxQuantiles => {
9577 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
9578 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
9579 if let Expression::AggregateFunction(agg) = e {
9580 if agg.args.len() >= 2 {
9581 let x_expr = agg.args[0].clone();
9582 let n_expr = &agg.args[1];
9583
9584 // Extract the numeric value from n_expr
9585 let n = match n_expr {
9586 Expression::Literal(crate::expressions::Literal::Number(s)) => {
9587 s.parse::<usize>().unwrap_or(2)
9588 }
9589 _ => 2,
9590 };
9591
9592 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
9593 let mut quantiles = Vec::new();
9594 for i in 0..=n {
9595 let q = i as f64 / n as f64;
9596 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
9597 if q == 0.0 {
9598 quantiles.push(Expression::number(0));
9599 } else if q == 1.0 {
9600 quantiles.push(Expression::number(1));
9601 } else {
9602 quantiles.push(Expression::Literal(
9603 crate::expressions::Literal::Number(format!("{}", q)),
9604 ));
9605 }
9606 }
9607
9608 let array_expr =
9609 Expression::Array(Box::new(crate::expressions::Array {
9610 expressions: quantiles,
9611 }));
9612
9613 // Preserve DISTINCT modifier
9614 let mut new_func = Function::new(
9615 "APPROX_QUANTILE".to_string(),
9616 vec![x_expr, array_expr],
9617 );
9618 new_func.distinct = agg.distinct;
9619 Ok(Expression::Function(Box::new(new_func)))
9620 } else {
9621 Ok(Expression::AggregateFunction(agg))
9622 }
9623 } else {
9624 Ok(e)
9625 }
9626 }
9627
9628 Action::GenericFunctionNormalize => {
9629 // Helper closure to convert ARBITRARY to target-specific function
9630 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
9631 let name = match target {
9632 DialectType::ClickHouse => "any",
9633 DialectType::TSQL | DialectType::SQLite => "MAX",
9634 DialectType::Hive => "FIRST",
9635 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9636 "ARBITRARY"
9637 }
9638 _ => "ANY_VALUE",
9639 };
9640 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
9641 }
9642
9643 if let Expression::Function(f) = e {
9644 let name = f.name.to_ascii_uppercase();
9645 match name.as_str() {
9646 "ARBITRARY" if f.args.len() == 1 => {
9647 let arg = f.args.into_iter().next().unwrap();
9648 Ok(convert_arbitrary(arg, target))
9649 }
9650 "TO_NUMBER" if f.args.len() == 1 => {
9651 let arg = f.args.into_iter().next().unwrap();
9652 match target {
9653 DialectType::Oracle | DialectType::Snowflake => {
9654 Ok(Expression::Function(Box::new(Function::new(
9655 "TO_NUMBER".to_string(),
9656 vec![arg],
9657 ))))
9658 }
9659 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
9660 this: arg,
9661 to: crate::expressions::DataType::Double {
9662 precision: None,
9663 scale: None,
9664 },
9665 double_colon_syntax: false,
9666 trailing_comments: Vec::new(),
9667 format: None,
9668 default: None,
9669 inferred_type: None,
9670 }))),
9671 }
9672 }
9673 "AGGREGATE" if f.args.len() >= 3 => match target {
9674 DialectType::DuckDB
9675 | DialectType::Hive
9676 | DialectType::Presto
9677 | DialectType::Trino => Ok(Expression::Function(Box::new(
9678 Function::new("REDUCE".to_string(), f.args),
9679 ))),
9680 _ => Ok(Expression::Function(f)),
9681 },
9682 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep for DuckDB
9683 "REGEXP_MATCHES" if f.args.len() >= 2 => {
9684 if matches!(target, DialectType::DuckDB) {
9685 Ok(Expression::Function(f))
9686 } else {
9687 let mut args = f.args;
9688 let this = args.remove(0);
9689 let pattern = args.remove(0);
9690 let flags = if args.is_empty() {
9691 None
9692 } else {
9693 Some(args.remove(0))
9694 };
9695 Ok(Expression::RegexpLike(Box::new(
9696 crate::expressions::RegexpFunc {
9697 this,
9698 pattern,
9699 flags,
9700 },
9701 )))
9702 }
9703 }
9704 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
9705 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
9706 if matches!(target, DialectType::DuckDB) {
9707 Ok(Expression::Function(f))
9708 } else {
9709 let mut args = f.args;
9710 let this = args.remove(0);
9711 let pattern = args.remove(0);
9712 let flags = if args.is_empty() {
9713 None
9714 } else {
9715 Some(args.remove(0))
9716 };
9717 Ok(Expression::RegexpLike(Box::new(
9718 crate::expressions::RegexpFunc {
9719 this,
9720 pattern,
9721 flags,
9722 },
9723 )))
9724 }
9725 }
9726 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
9727 "STRUCT_EXTRACT" if f.args.len() == 2 => {
9728 let mut args = f.args;
9729 let this = args.remove(0);
9730 let field_expr = args.remove(0);
9731 // Extract string literal to get field name
9732 let field_name = match &field_expr {
9733 Expression::Literal(crate::expressions::Literal::String(s)) => {
9734 s.clone()
9735 }
9736 Expression::Identifier(id) => id.name.clone(),
9737 _ => {
9738 return Ok(Expression::Function(Box::new(Function::new(
9739 "STRUCT_EXTRACT".to_string(),
9740 vec![this, field_expr],
9741 ))))
9742 }
9743 };
9744 Ok(Expression::StructExtract(Box::new(
9745 crate::expressions::StructExtractFunc {
9746 this,
9747 field: crate::expressions::Identifier::new(field_name),
9748 },
9749 )))
9750 }
9751 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
9752 "LIST_FILTER" if f.args.len() == 2 => {
9753 let name = match target {
9754 DialectType::DuckDB => "LIST_FILTER",
9755 _ => "FILTER",
9756 };
9757 Ok(Expression::Function(Box::new(Function::new(
9758 name.to_string(),
9759 f.args,
9760 ))))
9761 }
9762 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
9763 "LIST_TRANSFORM" if f.args.len() == 2 => {
9764 let name = match target {
9765 DialectType::DuckDB => "LIST_TRANSFORM",
9766 _ => "TRANSFORM",
9767 };
9768 Ok(Expression::Function(Box::new(Function::new(
9769 name.to_string(),
9770 f.args,
9771 ))))
9772 }
9773 // LIST_SORT(x) -> SORT_ARRAY(x) / ARRAY_SORT(x)
9774 "LIST_SORT" if f.args.len() >= 1 => {
9775 let name = match target {
9776 DialectType::DuckDB
9777 | DialectType::Presto
9778 | DialectType::Trino => "ARRAY_SORT",
9779 _ => "SORT_ARRAY",
9780 };
9781 Ok(Expression::Function(Box::new(Function::new(
9782 name.to_string(),
9783 f.args,
9784 ))))
9785 }
9786 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
9787 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
9788 match target {
9789 DialectType::DuckDB => Ok(Expression::Function(Box::new(
9790 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
9791 ))),
9792 DialectType::Spark
9793 | DialectType::Databricks
9794 | DialectType::Hive => {
9795 let mut args = f.args;
9796 args.push(Expression::Identifier(
9797 crate::expressions::Identifier::new("FALSE"),
9798 ));
9799 Ok(Expression::Function(Box::new(Function::new(
9800 "SORT_ARRAY".to_string(),
9801 args,
9802 ))))
9803 }
9804 DialectType::Presto
9805 | DialectType::Trino
9806 | DialectType::Athena => {
9807 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
9808 let arr = f.args.into_iter().next().unwrap();
9809 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
9810 parameters: vec![
9811 crate::expressions::Identifier::new("a"),
9812 crate::expressions::Identifier::new("b"),
9813 ],
9814 body: Expression::Case(Box::new(Case {
9815 operand: None,
9816 whens: vec![
9817 (
9818 Expression::Lt(Box::new(BinaryOp::new(
9819 Expression::Identifier(crate::expressions::Identifier::new("a")),
9820 Expression::Identifier(crate::expressions::Identifier::new("b")),
9821 ))),
9822 Expression::number(1),
9823 ),
9824 (
9825 Expression::Gt(Box::new(BinaryOp::new(
9826 Expression::Identifier(crate::expressions::Identifier::new("a")),
9827 Expression::Identifier(crate::expressions::Identifier::new("b")),
9828 ))),
9829 Expression::Literal(Literal::Number("-1".to_string())),
9830 ),
9831 ],
9832 else_: Some(Expression::number(0)),
9833 comments: Vec::new(),
9834 inferred_type: None,
9835 })),
9836 colon: false,
9837 parameter_types: Vec::new(),
9838 }));
9839 Ok(Expression::Function(Box::new(Function::new(
9840 "ARRAY_SORT".to_string(),
9841 vec![arr, lambda],
9842 ))))
9843 }
9844 _ => Ok(Expression::Function(Box::new(Function::new(
9845 "LIST_REVERSE_SORT".to_string(),
9846 f.args,
9847 )))),
9848 }
9849 }
9850 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
9851 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
9852 let mut args = f.args;
9853 args.push(Expression::string(","));
9854 let name = match target {
9855 DialectType::DuckDB => "STR_SPLIT",
9856 DialectType::Presto | DialectType::Trino => "SPLIT",
9857 DialectType::Spark
9858 | DialectType::Databricks
9859 | DialectType::Hive => "SPLIT",
9860 DialectType::PostgreSQL => "STRING_TO_ARRAY",
9861 DialectType::Redshift => "SPLIT_TO_ARRAY",
9862 _ => "SPLIT",
9863 };
9864 Ok(Expression::Function(Box::new(Function::new(
9865 name.to_string(),
9866 args,
9867 ))))
9868 }
9869 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
9870 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
9871 let name = match target {
9872 DialectType::DuckDB => "STR_SPLIT",
9873 DialectType::Presto | DialectType::Trino => "SPLIT",
9874 DialectType::Spark
9875 | DialectType::Databricks
9876 | DialectType::Hive => "SPLIT",
9877 DialectType::PostgreSQL => "STRING_TO_ARRAY",
9878 DialectType::Redshift => "SPLIT_TO_ARRAY",
9879 _ => "SPLIT",
9880 };
9881 Ok(Expression::Function(Box::new(Function::new(
9882 name.to_string(),
9883 f.args,
9884 ))))
9885 }
9886 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
9887 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
9888 let name = match target {
9889 DialectType::DuckDB => "STR_SPLIT",
9890 DialectType::Presto | DialectType::Trino => "SPLIT",
9891 DialectType::Spark
9892 | DialectType::Databricks
9893 | DialectType::Hive => "SPLIT",
9894 DialectType::Doris | DialectType::StarRocks => {
9895 "SPLIT_BY_STRING"
9896 }
9897 DialectType::PostgreSQL | DialectType::Redshift => {
9898 "STRING_TO_ARRAY"
9899 }
9900 _ => "SPLIT",
9901 };
9902 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
9903 if matches!(
9904 target,
9905 DialectType::Spark
9906 | DialectType::Databricks
9907 | DialectType::Hive
9908 ) {
9909 let mut args = f.args;
9910 let x = args.remove(0);
9911 let sep = args.remove(0);
9912 // Wrap separator in CONCAT('\\Q', sep, '\\E')
9913 let escaped_sep =
9914 Expression::Function(Box::new(Function::new(
9915 "CONCAT".to_string(),
9916 vec![
9917 Expression::string("\\Q"),
9918 sep,
9919 Expression::string("\\E"),
9920 ],
9921 )));
9922 Ok(Expression::Function(Box::new(Function::new(
9923 name.to_string(),
9924 vec![x, escaped_sep],
9925 ))))
9926 } else {
9927 Ok(Expression::Function(Box::new(Function::new(
9928 name.to_string(),
9929 f.args,
9930 ))))
9931 }
9932 }
9933 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
9934 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
9935 let name = match target {
9936 DialectType::DuckDB => "STR_SPLIT_REGEX",
9937 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
9938 DialectType::Spark
9939 | DialectType::Databricks
9940 | DialectType::Hive => "SPLIT",
9941 _ => "REGEXP_SPLIT",
9942 };
9943 Ok(Expression::Function(Box::new(Function::new(
9944 name.to_string(),
9945 f.args,
9946 ))))
9947 }
9948 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
9949 "SPLIT"
9950 if f.args.len() == 2
9951 && matches!(
9952 source,
9953 DialectType::Presto
9954 | DialectType::Trino
9955 | DialectType::Athena
9956 | DialectType::StarRocks
9957 | DialectType::Doris
9958 )
9959 && matches!(
9960 target,
9961 DialectType::Spark
9962 | DialectType::Databricks
9963 | DialectType::Hive
9964 ) =>
9965 {
9966 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
9967 let mut args = f.args;
9968 let x = args.remove(0);
9969 let sep = args.remove(0);
9970 let escaped_sep = Expression::Function(Box::new(Function::new(
9971 "CONCAT".to_string(),
9972 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
9973 )));
9974 Ok(Expression::Function(Box::new(Function::new(
9975 "SPLIT".to_string(),
9976 vec![x, escaped_sep],
9977 ))))
9978 }
9979 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
9980 // For ClickHouse target, preserve original name to maintain camelCase
9981 "SUBSTRINGINDEX" => {
9982 let name = if matches!(target, DialectType::ClickHouse) {
9983 f.name.clone()
9984 } else {
9985 "SUBSTRING_INDEX".to_string()
9986 };
9987 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
9988 }
9989 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
9990 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
9991 // Get the array argument (first arg, drop dimension args)
9992 let mut args = f.args;
9993 let arr = if args.is_empty() {
9994 return Ok(Expression::Function(Box::new(Function::new(
9995 name.to_string(),
9996 args,
9997 ))));
9998 } else {
9999 args.remove(0)
10000 };
10001 let name =
10002 match target {
10003 DialectType::Spark
10004 | DialectType::Databricks
10005 | DialectType::Hive => "SIZE",
10006 DialectType::Presto | DialectType::Trino => "CARDINALITY",
10007 DialectType::BigQuery => "ARRAY_LENGTH",
10008 DialectType::DuckDB => {
10009 // DuckDB: use ARRAY_LENGTH with all args
10010 let mut all_args = vec![arr];
10011 all_args.extend(args);
10012 return Ok(Expression::Function(Box::new(
10013 Function::new("ARRAY_LENGTH".to_string(), all_args),
10014 )));
10015 }
10016 DialectType::PostgreSQL | DialectType::Redshift => {
10017 // Keep ARRAY_LENGTH with dimension arg
10018 let mut all_args = vec![arr];
10019 all_args.extend(args);
10020 return Ok(Expression::Function(Box::new(
10021 Function::new("ARRAY_LENGTH".to_string(), all_args),
10022 )));
10023 }
10024 DialectType::ClickHouse => "LENGTH",
10025 _ => "ARRAY_LENGTH",
10026 };
10027 Ok(Expression::Function(Box::new(Function::new(
10028 name.to_string(),
10029 vec![arr],
10030 ))))
10031 }
10032 // UNICODE(x) -> target-specific codepoint function
10033 "UNICODE" if f.args.len() == 1 => {
10034 match target {
10035 DialectType::SQLite | DialectType::DuckDB => {
10036 Ok(Expression::Function(Box::new(Function::new(
10037 "UNICODE".to_string(),
10038 f.args,
10039 ))))
10040 }
10041 DialectType::Oracle => {
10042 // ASCII(UNISTR(x))
10043 let inner = Expression::Function(Box::new(Function::new(
10044 "UNISTR".to_string(),
10045 f.args,
10046 )));
10047 Ok(Expression::Function(Box::new(Function::new(
10048 "ASCII".to_string(),
10049 vec![inner],
10050 ))))
10051 }
10052 DialectType::MySQL => {
10053 // ORD(CONVERT(x USING utf32))
10054 let arg = f.args.into_iter().next().unwrap();
10055 let convert_expr = Expression::ConvertToCharset(Box::new(
10056 crate::expressions::ConvertToCharset {
10057 this: Box::new(arg),
10058 dest: Some(Box::new(Expression::Identifier(
10059 crate::expressions::Identifier::new("utf32"),
10060 ))),
10061 source: None,
10062 },
10063 ));
10064 Ok(Expression::Function(Box::new(Function::new(
10065 "ORD".to_string(),
10066 vec![convert_expr],
10067 ))))
10068 }
10069 _ => Ok(Expression::Function(Box::new(Function::new(
10070 "ASCII".to_string(),
10071 f.args,
10072 )))),
10073 }
10074 }
10075 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
10076 "XOR" if f.args.len() >= 2 => {
10077 match target {
10078 DialectType::ClickHouse => {
10079 // ClickHouse: keep as xor() function with lowercase name
10080 Ok(Expression::Function(Box::new(Function::new(
10081 "xor".to_string(),
10082 f.args,
10083 ))))
10084 }
10085 DialectType::Presto | DialectType::Trino => {
10086 if f.args.len() == 2 {
10087 Ok(Expression::Function(Box::new(Function::new(
10088 "BITWISE_XOR".to_string(),
10089 f.args,
10090 ))))
10091 } else {
10092 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
10093 let mut args = f.args;
10094 let first = args.remove(0);
10095 let second = args.remove(0);
10096 let mut result =
10097 Expression::Function(Box::new(Function::new(
10098 "BITWISE_XOR".to_string(),
10099 vec![first, second],
10100 )));
10101 for arg in args {
10102 result =
10103 Expression::Function(Box::new(Function::new(
10104 "BITWISE_XOR".to_string(),
10105 vec![result, arg],
10106 )));
10107 }
10108 Ok(result)
10109 }
10110 }
10111 DialectType::MySQL
10112 | DialectType::SingleStore
10113 | DialectType::Doris
10114 | DialectType::StarRocks => {
10115 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
10116 let args = f.args;
10117 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
10118 this: None,
10119 expression: None,
10120 expressions: args,
10121 })))
10122 }
10123 DialectType::PostgreSQL | DialectType::Redshift => {
10124 // PostgreSQL: a # b (hash operator for XOR)
10125 let mut args = f.args;
10126 let first = args.remove(0);
10127 let second = args.remove(0);
10128 let mut result = Expression::BitwiseXor(Box::new(
10129 BinaryOp::new(first, second),
10130 ));
10131 for arg in args {
10132 result = Expression::BitwiseXor(Box::new(
10133 BinaryOp::new(result, arg),
10134 ));
10135 }
10136 Ok(result)
10137 }
10138 DialectType::DuckDB => {
10139 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
10140 Ok(Expression::Function(Box::new(Function::new(
10141 "XOR".to_string(),
10142 f.args,
10143 ))))
10144 }
10145 DialectType::BigQuery => {
10146 // BigQuery: a ^ b (caret operator for XOR)
10147 let mut args = f.args;
10148 let first = args.remove(0);
10149 let second = args.remove(0);
10150 let mut result = Expression::BitwiseXor(Box::new(
10151 BinaryOp::new(first, second),
10152 ));
10153 for arg in args {
10154 result = Expression::BitwiseXor(Box::new(
10155 BinaryOp::new(result, arg),
10156 ));
10157 }
10158 Ok(result)
10159 }
10160 _ => Ok(Expression::Function(Box::new(Function::new(
10161 "XOR".to_string(),
10162 f.args,
10163 )))),
10164 }
10165 }
10166 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
10167 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
10168 match target {
10169 DialectType::Spark
10170 | DialectType::Databricks
10171 | DialectType::Hive => {
10172 let mut args = f.args;
10173 args.push(Expression::Identifier(
10174 crate::expressions::Identifier::new("FALSE"),
10175 ));
10176 Ok(Expression::Function(Box::new(Function::new(
10177 "SORT_ARRAY".to_string(),
10178 args,
10179 ))))
10180 }
10181 DialectType::Presto
10182 | DialectType::Trino
10183 | DialectType::Athena => {
10184 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
10185 let arr = f.args.into_iter().next().unwrap();
10186 let lambda = Expression::Lambda(Box::new(
10187 crate::expressions::LambdaExpr {
10188 parameters: vec![
10189 Identifier::new("a"),
10190 Identifier::new("b"),
10191 ],
10192 colon: false,
10193 parameter_types: Vec::new(),
10194 body: Expression::Case(Box::new(Case {
10195 operand: None,
10196 whens: vec![
10197 (
10198 Expression::Lt(Box::new(
10199 BinaryOp::new(
10200 Expression::Identifier(
10201 Identifier::new("a"),
10202 ),
10203 Expression::Identifier(
10204 Identifier::new("b"),
10205 ),
10206 ),
10207 )),
10208 Expression::number(1),
10209 ),
10210 (
10211 Expression::Gt(Box::new(
10212 BinaryOp::new(
10213 Expression::Identifier(
10214 Identifier::new("a"),
10215 ),
10216 Expression::Identifier(
10217 Identifier::new("b"),
10218 ),
10219 ),
10220 )),
10221 Expression::Neg(Box::new(
10222 crate::expressions::UnaryOp {
10223 this: Expression::number(1),
10224 inferred_type: None,
10225 },
10226 )),
10227 ),
10228 ],
10229 else_: Some(Expression::number(0)),
10230 comments: Vec::new(),
10231 inferred_type: None,
10232 })),
10233 },
10234 ));
10235 Ok(Expression::Function(Box::new(Function::new(
10236 "ARRAY_SORT".to_string(),
10237 vec![arr, lambda],
10238 ))))
10239 }
10240 _ => Ok(Expression::Function(Box::new(Function::new(
10241 "ARRAY_REVERSE_SORT".to_string(),
10242 f.args,
10243 )))),
10244 }
10245 }
10246 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
10247 "ENCODE" if f.args.len() == 1 => match target {
10248 DialectType::Spark
10249 | DialectType::Databricks
10250 | DialectType::Hive => {
10251 let mut args = f.args;
10252 args.push(Expression::string("utf-8"));
10253 Ok(Expression::Function(Box::new(Function::new(
10254 "ENCODE".to_string(),
10255 args,
10256 ))))
10257 }
10258 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10259 Ok(Expression::Function(Box::new(Function::new(
10260 "TO_UTF8".to_string(),
10261 f.args,
10262 ))))
10263 }
10264 _ => Ok(Expression::Function(Box::new(Function::new(
10265 "ENCODE".to_string(),
10266 f.args,
10267 )))),
10268 },
10269 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
10270 "DECODE" if f.args.len() == 1 => match target {
10271 DialectType::Spark
10272 | DialectType::Databricks
10273 | DialectType::Hive => {
10274 let mut args = f.args;
10275 args.push(Expression::string("utf-8"));
10276 Ok(Expression::Function(Box::new(Function::new(
10277 "DECODE".to_string(),
10278 args,
10279 ))))
10280 }
10281 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10282 Ok(Expression::Function(Box::new(Function::new(
10283 "FROM_UTF8".to_string(),
10284 f.args,
10285 ))))
10286 }
10287 _ => Ok(Expression::Function(Box::new(Function::new(
10288 "DECODE".to_string(),
10289 f.args,
10290 )))),
10291 },
10292 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
10293 "QUANTILE" if f.args.len() == 2 => {
10294 let name = match target {
10295 DialectType::Spark
10296 | DialectType::Databricks
10297 | DialectType::Hive => "PERCENTILE",
10298 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
10299 DialectType::BigQuery => "PERCENTILE_CONT",
10300 _ => "QUANTILE",
10301 };
10302 Ok(Expression::Function(Box::new(Function::new(
10303 name.to_string(),
10304 f.args,
10305 ))))
10306 }
10307 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
10308 "QUANTILE_CONT" if f.args.len() == 2 => {
10309 let mut args = f.args;
10310 let column = args.remove(0);
10311 let quantile = args.remove(0);
10312 match target {
10313 DialectType::DuckDB => {
10314 Ok(Expression::Function(Box::new(Function::new(
10315 "QUANTILE_CONT".to_string(),
10316 vec![column, quantile],
10317 ))))
10318 }
10319 DialectType::PostgreSQL
10320 | DialectType::Redshift
10321 | DialectType::Snowflake => {
10322 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
10323 let inner = Expression::PercentileCont(Box::new(
10324 crate::expressions::PercentileFunc {
10325 this: column.clone(),
10326 percentile: quantile,
10327 order_by: None,
10328 filter: None,
10329 },
10330 ));
10331 Ok(Expression::WithinGroup(Box::new(
10332 crate::expressions::WithinGroup {
10333 this: inner,
10334 order_by: vec![crate::expressions::Ordered {
10335 this: column,
10336 desc: false,
10337 nulls_first: None,
10338 explicit_asc: false,
10339 with_fill: None,
10340 }],
10341 },
10342 )))
10343 }
10344 _ => Ok(Expression::Function(Box::new(Function::new(
10345 "QUANTILE_CONT".to_string(),
10346 vec![column, quantile],
10347 )))),
10348 }
10349 }
10350 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
10351 "QUANTILE_DISC" if f.args.len() == 2 => {
10352 let mut args = f.args;
10353 let column = args.remove(0);
10354 let quantile = args.remove(0);
10355 match target {
10356 DialectType::DuckDB => {
10357 Ok(Expression::Function(Box::new(Function::new(
10358 "QUANTILE_DISC".to_string(),
10359 vec![column, quantile],
10360 ))))
10361 }
10362 DialectType::PostgreSQL
10363 | DialectType::Redshift
10364 | DialectType::Snowflake => {
10365 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
10366 let inner = Expression::PercentileDisc(Box::new(
10367 crate::expressions::PercentileFunc {
10368 this: column.clone(),
10369 percentile: quantile,
10370 order_by: None,
10371 filter: None,
10372 },
10373 ));
10374 Ok(Expression::WithinGroup(Box::new(
10375 crate::expressions::WithinGroup {
10376 this: inner,
10377 order_by: vec![crate::expressions::Ordered {
10378 this: column,
10379 desc: false,
10380 nulls_first: None,
10381 explicit_asc: false,
10382 with_fill: None,
10383 }],
10384 },
10385 )))
10386 }
10387 _ => Ok(Expression::Function(Box::new(Function::new(
10388 "QUANTILE_DISC".to_string(),
10389 vec![column, quantile],
10390 )))),
10391 }
10392 }
10393 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
10394 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
10395 let name = match target {
10396 DialectType::Presto
10397 | DialectType::Trino
10398 | DialectType::Athena => "APPROX_PERCENTILE",
10399 DialectType::Spark
10400 | DialectType::Databricks
10401 | DialectType::Hive => "PERCENTILE_APPROX",
10402 DialectType::DuckDB => "APPROX_QUANTILE",
10403 DialectType::PostgreSQL | DialectType::Redshift => {
10404 "PERCENTILE_CONT"
10405 }
10406 _ => &f.name,
10407 };
10408 Ok(Expression::Function(Box::new(Function::new(
10409 name.to_string(),
10410 f.args,
10411 ))))
10412 }
10413 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
10414 "EPOCH" if f.args.len() == 1 => {
10415 let name = match target {
10416 DialectType::Spark
10417 | DialectType::Databricks
10418 | DialectType::Hive => "UNIX_TIMESTAMP",
10419 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
10420 _ => "EPOCH",
10421 };
10422 Ok(Expression::Function(Box::new(Function::new(
10423 name.to_string(),
10424 f.args,
10425 ))))
10426 }
10427 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
10428 "EPOCH_MS" if f.args.len() == 1 => {
10429 match target {
10430 DialectType::Spark | DialectType::Databricks => {
10431 Ok(Expression::Function(Box::new(Function::new(
10432 "TIMESTAMP_MILLIS".to_string(),
10433 f.args,
10434 ))))
10435 }
10436 DialectType::Hive => {
10437 // Hive: FROM_UNIXTIME(x / 1000)
10438 let arg = f.args.into_iter().next().unwrap();
10439 let div_expr = Expression::Div(Box::new(
10440 crate::expressions::BinaryOp::new(
10441 arg,
10442 Expression::number(1000),
10443 ),
10444 ));
10445 Ok(Expression::Function(Box::new(Function::new(
10446 "FROM_UNIXTIME".to_string(),
10447 vec![div_expr],
10448 ))))
10449 }
10450 DialectType::Presto | DialectType::Trino => {
10451 Ok(Expression::Function(Box::new(Function::new(
10452 "FROM_UNIXTIME".to_string(),
10453 vec![Expression::Div(Box::new(
10454 crate::expressions::BinaryOp::new(
10455 f.args.into_iter().next().unwrap(),
10456 Expression::number(1000),
10457 ),
10458 ))],
10459 ))))
10460 }
10461 _ => Ok(Expression::Function(Box::new(Function::new(
10462 "EPOCH_MS".to_string(),
10463 f.args,
10464 )))),
10465 }
10466 }
10467 // HASHBYTES('algorithm', x) -> target-specific hash function
10468 "HASHBYTES" if f.args.len() == 2 => {
10469 // Keep HASHBYTES as-is for TSQL target
10470 if matches!(target, DialectType::TSQL) {
10471 return Ok(Expression::Function(f));
10472 }
10473 let algo_expr = &f.args[0];
10474 let algo = match algo_expr {
10475 Expression::Literal(crate::expressions::Literal::String(s)) => {
10476 s.to_ascii_uppercase()
10477 }
10478 _ => return Ok(Expression::Function(f)),
10479 };
10480 let data_arg = f.args.into_iter().nth(1).unwrap();
10481 match algo.as_str() {
10482 "SHA1" => {
10483 let name = match target {
10484 DialectType::Spark | DialectType::Databricks => "SHA",
10485 DialectType::Hive => "SHA1",
10486 _ => "SHA1",
10487 };
10488 Ok(Expression::Function(Box::new(Function::new(
10489 name.to_string(),
10490 vec![data_arg],
10491 ))))
10492 }
10493 "SHA2_256" => {
10494 Ok(Expression::Function(Box::new(Function::new(
10495 "SHA2".to_string(),
10496 vec![data_arg, Expression::number(256)],
10497 ))))
10498 }
10499 "SHA2_512" => {
10500 Ok(Expression::Function(Box::new(Function::new(
10501 "SHA2".to_string(),
10502 vec![data_arg, Expression::number(512)],
10503 ))))
10504 }
10505 "MD5" => Ok(Expression::Function(Box::new(Function::new(
10506 "MD5".to_string(),
10507 vec![data_arg],
10508 )))),
10509 _ => Ok(Expression::Function(Box::new(Function::new(
10510 "HASHBYTES".to_string(),
10511 vec![Expression::string(&algo), data_arg],
10512 )))),
10513 }
10514 }
10515 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
10516 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
10517 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
10518 let mut args = f.args;
10519 let json_expr = args.remove(0);
10520 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
10521 let mut json_path = "$".to_string();
10522 for a in &args {
10523 match a {
10524 Expression::Literal(
10525 crate::expressions::Literal::String(s),
10526 ) => {
10527 // Numeric string keys become array indices: [0]
10528 if s.chars().all(|c| c.is_ascii_digit()) {
10529 json_path.push('[');
10530 json_path.push_str(s);
10531 json_path.push(']');
10532 } else {
10533 json_path.push('.');
10534 json_path.push_str(s);
10535 }
10536 }
10537 _ => {
10538 json_path.push_str(".?");
10539 }
10540 }
10541 }
10542 match target {
10543 DialectType::Spark
10544 | DialectType::Databricks
10545 | DialectType::Hive => {
10546 Ok(Expression::Function(Box::new(Function::new(
10547 "GET_JSON_OBJECT".to_string(),
10548 vec![json_expr, Expression::string(&json_path)],
10549 ))))
10550 }
10551 DialectType::Presto | DialectType::Trino => {
10552 let func_name = if is_text {
10553 "JSON_EXTRACT_SCALAR"
10554 } else {
10555 "JSON_EXTRACT"
10556 };
10557 Ok(Expression::Function(Box::new(Function::new(
10558 func_name.to_string(),
10559 vec![json_expr, Expression::string(&json_path)],
10560 ))))
10561 }
10562 DialectType::BigQuery | DialectType::MySQL => {
10563 let func_name = if is_text {
10564 "JSON_EXTRACT_SCALAR"
10565 } else {
10566 "JSON_EXTRACT"
10567 };
10568 Ok(Expression::Function(Box::new(Function::new(
10569 func_name.to_string(),
10570 vec![json_expr, Expression::string(&json_path)],
10571 ))))
10572 }
10573 DialectType::PostgreSQL | DialectType::Materialize => {
10574 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
10575 let func_name = if is_text {
10576 "JSON_EXTRACT_PATH_TEXT"
10577 } else {
10578 "JSON_EXTRACT_PATH"
10579 };
10580 let mut new_args = vec![json_expr];
10581 new_args.extend(args);
10582 Ok(Expression::Function(Box::new(Function::new(
10583 func_name.to_string(),
10584 new_args,
10585 ))))
10586 }
10587 DialectType::DuckDB | DialectType::SQLite => {
10588 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
10589 if is_text {
10590 Ok(Expression::JsonExtractScalar(Box::new(
10591 crate::expressions::JsonExtractFunc {
10592 this: json_expr,
10593 path: Expression::string(&json_path),
10594 returning: None,
10595 arrow_syntax: true,
10596 hash_arrow_syntax: false,
10597 wrapper_option: None,
10598 quotes_option: None,
10599 on_scalar_string: false,
10600 on_error: None,
10601 },
10602 )))
10603 } else {
10604 Ok(Expression::JsonExtract(Box::new(
10605 crate::expressions::JsonExtractFunc {
10606 this: json_expr,
10607 path: Expression::string(&json_path),
10608 returning: None,
10609 arrow_syntax: true,
10610 hash_arrow_syntax: false,
10611 wrapper_option: None,
10612 quotes_option: None,
10613 on_scalar_string: false,
10614 on_error: None,
10615 },
10616 )))
10617 }
10618 }
10619 DialectType::Redshift => {
10620 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
10621 let mut new_args = vec![json_expr];
10622 new_args.extend(args);
10623 Ok(Expression::Function(Box::new(Function::new(
10624 "JSON_EXTRACT_PATH_TEXT".to_string(),
10625 new_args,
10626 ))))
10627 }
10628 DialectType::TSQL => {
10629 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
10630 let jq = Expression::Function(Box::new(Function::new(
10631 "JSON_QUERY".to_string(),
10632 vec![json_expr.clone(), Expression::string(&json_path)],
10633 )));
10634 let jv = Expression::Function(Box::new(Function::new(
10635 "JSON_VALUE".to_string(),
10636 vec![json_expr, Expression::string(&json_path)],
10637 )));
10638 Ok(Expression::Function(Box::new(Function::new(
10639 "ISNULL".to_string(),
10640 vec![jq, jv],
10641 ))))
10642 }
10643 DialectType::ClickHouse => {
10644 let func_name = if is_text {
10645 "JSONExtractString"
10646 } else {
10647 "JSONExtractRaw"
10648 };
10649 let mut new_args = vec![json_expr];
10650 new_args.extend(args);
10651 Ok(Expression::Function(Box::new(Function::new(
10652 func_name.to_string(),
10653 new_args,
10654 ))))
10655 }
10656 _ => {
10657 let func_name = if is_text {
10658 "JSON_EXTRACT_SCALAR"
10659 } else {
10660 "JSON_EXTRACT"
10661 };
10662 Ok(Expression::Function(Box::new(Function::new(
10663 func_name.to_string(),
10664 vec![json_expr, Expression::string(&json_path)],
10665 ))))
10666 }
10667 }
10668 }
10669 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
10670 "APPROX_DISTINCT" if f.args.len() >= 1 => {
10671 let name = match target {
10672 DialectType::Spark
10673 | DialectType::Databricks
10674 | DialectType::Hive
10675 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
10676 _ => "APPROX_DISTINCT",
10677 };
10678 let mut args = f.args;
10679 // Hive doesn't support the accuracy parameter
10680 if name == "APPROX_COUNT_DISTINCT"
10681 && matches!(target, DialectType::Hive)
10682 {
10683 args.truncate(1);
10684 }
10685 Ok(Expression::Function(Box::new(Function::new(
10686 name.to_string(),
10687 args,
10688 ))))
10689 }
10690 // REGEXP_EXTRACT(x, pattern) - normalize default group index
10691 "REGEXP_EXTRACT" if f.args.len() == 2 => {
10692 // Determine source default group index
10693 let source_default = match source {
10694 DialectType::Presto
10695 | DialectType::Trino
10696 | DialectType::DuckDB => 0,
10697 _ => 1, // Hive/Spark/Databricks default = 1
10698 };
10699 // Determine target default group index
10700 let target_default = match target {
10701 DialectType::Presto
10702 | DialectType::Trino
10703 | DialectType::DuckDB
10704 | DialectType::BigQuery => 0,
10705 DialectType::Snowflake => {
10706 // Snowflake uses REGEXP_SUBSTR
10707 return Ok(Expression::Function(Box::new(Function::new(
10708 "REGEXP_SUBSTR".to_string(),
10709 f.args,
10710 ))));
10711 }
10712 _ => 1, // Hive/Spark/Databricks default = 1
10713 };
10714 if source_default != target_default {
10715 let mut args = f.args;
10716 args.push(Expression::number(source_default));
10717 Ok(Expression::Function(Box::new(Function::new(
10718 "REGEXP_EXTRACT".to_string(),
10719 args,
10720 ))))
10721 } else {
10722 Ok(Expression::Function(Box::new(Function::new(
10723 "REGEXP_EXTRACT".to_string(),
10724 f.args,
10725 ))))
10726 }
10727 }
10728 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
10729 "RLIKE" if f.args.len() == 2 => {
10730 let mut args = f.args;
10731 let str_expr = args.remove(0);
10732 let pattern = args.remove(0);
10733 match target {
10734 DialectType::DuckDB => {
10735 // REGEXP_MATCHES(str, pattern)
10736 Ok(Expression::Function(Box::new(Function::new(
10737 "REGEXP_MATCHES".to_string(),
10738 vec![str_expr, pattern],
10739 ))))
10740 }
10741 _ => {
10742 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
10743 Ok(Expression::RegexpLike(Box::new(
10744 crate::expressions::RegexpFunc {
10745 this: str_expr,
10746 pattern,
10747 flags: None,
10748 },
10749 )))
10750 }
10751 }
10752 }
10753 // EOMONTH(date[, month_offset]) -> target-specific
10754 "EOMONTH" if f.args.len() >= 1 => {
10755 let mut args = f.args;
10756 let date_arg = args.remove(0);
10757 let month_offset = if !args.is_empty() {
10758 Some(args.remove(0))
10759 } else {
10760 None
10761 };
10762
10763 // Helper: wrap date in CAST to DATE
10764 let cast_to_date = |e: Expression| -> Expression {
10765 Expression::Cast(Box::new(Cast {
10766 this: e,
10767 to: DataType::Date,
10768 trailing_comments: vec![],
10769 double_colon_syntax: false,
10770 format: None,
10771 default: None,
10772 inferred_type: None,
10773 }))
10774 };
10775
10776 match target {
10777 DialectType::TSQL | DialectType::Fabric => {
10778 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
10779 let date = cast_to_date(date_arg);
10780 let date = if let Some(offset) = month_offset {
10781 Expression::Function(Box::new(Function::new(
10782 "DATEADD".to_string(),
10783 vec![
10784 Expression::Identifier(Identifier::new(
10785 "MONTH",
10786 )),
10787 offset,
10788 date,
10789 ],
10790 )))
10791 } else {
10792 date
10793 };
10794 Ok(Expression::Function(Box::new(Function::new(
10795 "EOMONTH".to_string(),
10796 vec![date],
10797 ))))
10798 }
10799 DialectType::Presto
10800 | DialectType::Trino
10801 | DialectType::Athena => {
10802 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
10803 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
10804 let cast_ts = Expression::Cast(Box::new(Cast {
10805 this: date_arg,
10806 to: DataType::Timestamp {
10807 timezone: false,
10808 precision: None,
10809 },
10810 trailing_comments: vec![],
10811 double_colon_syntax: false,
10812 format: None,
10813 default: None,
10814 inferred_type: None,
10815 }));
10816 let date = cast_to_date(cast_ts);
10817 let date = if let Some(offset) = month_offset {
10818 Expression::Function(Box::new(Function::new(
10819 "DATE_ADD".to_string(),
10820 vec![Expression::string("MONTH"), offset, date],
10821 )))
10822 } else {
10823 date
10824 };
10825 Ok(Expression::Function(Box::new(Function::new(
10826 "LAST_DAY_OF_MONTH".to_string(),
10827 vec![date],
10828 ))))
10829 }
10830 DialectType::PostgreSQL => {
10831 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
10832 let date = cast_to_date(date_arg);
10833 let date = if let Some(offset) = month_offset {
10834 let interval_str = format!(
10835 "{} MONTH",
10836 Self::expr_to_string_static(&offset)
10837 );
10838 Expression::Add(Box::new(
10839 crate::expressions::BinaryOp::new(
10840 date,
10841 Expression::Interval(Box::new(
10842 crate::expressions::Interval {
10843 this: Some(Expression::string(
10844 &interval_str,
10845 )),
10846 unit: None,
10847 },
10848 )),
10849 ),
10850 ))
10851 } else {
10852 date
10853 };
10854 let truncated =
10855 Expression::Function(Box::new(Function::new(
10856 "DATE_TRUNC".to_string(),
10857 vec![Expression::string("MONTH"), date],
10858 )));
10859 let plus_month = Expression::Add(Box::new(
10860 crate::expressions::BinaryOp::new(
10861 truncated,
10862 Expression::Interval(Box::new(
10863 crate::expressions::Interval {
10864 this: Some(Expression::string("1 MONTH")),
10865 unit: None,
10866 },
10867 )),
10868 ),
10869 ));
10870 let minus_day = Expression::Sub(Box::new(
10871 crate::expressions::BinaryOp::new(
10872 plus_month,
10873 Expression::Interval(Box::new(
10874 crate::expressions::Interval {
10875 this: Some(Expression::string("1 DAY")),
10876 unit: None,
10877 },
10878 )),
10879 ),
10880 ));
10881 Ok(Expression::Cast(Box::new(Cast {
10882 this: minus_day,
10883 to: DataType::Date,
10884 trailing_comments: vec![],
10885 double_colon_syntax: false,
10886 format: None,
10887 default: None,
10888 inferred_type: None,
10889 })))
10890 }
10891 DialectType::DuckDB => {
10892 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
10893 let date = cast_to_date(date_arg);
10894 let date = if let Some(offset) = month_offset {
10895 // Wrap negative numbers in parentheses for DuckDB INTERVAL
10896 let interval_val =
10897 if matches!(&offset, Expression::Neg(_)) {
10898 Expression::Paren(Box::new(
10899 crate::expressions::Paren {
10900 this: offset,
10901 trailing_comments: Vec::new(),
10902 },
10903 ))
10904 } else {
10905 offset
10906 };
10907 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
10908 date,
10909 Expression::Interval(Box::new(crate::expressions::Interval {
10910 this: Some(interval_val),
10911 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
10912 unit: crate::expressions::IntervalUnit::Month,
10913 use_plural: false,
10914 }),
10915 })),
10916 )))
10917 } else {
10918 date
10919 };
10920 Ok(Expression::Function(Box::new(Function::new(
10921 "LAST_DAY".to_string(),
10922 vec![date],
10923 ))))
10924 }
10925 DialectType::Snowflake | DialectType::Redshift => {
10926 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
10927 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
10928 let date = if matches!(target, DialectType::Snowflake) {
10929 Expression::Function(Box::new(Function::new(
10930 "TO_DATE".to_string(),
10931 vec![date_arg],
10932 )))
10933 } else {
10934 cast_to_date(date_arg)
10935 };
10936 let date = if let Some(offset) = month_offset {
10937 Expression::Function(Box::new(Function::new(
10938 "DATEADD".to_string(),
10939 vec![
10940 Expression::Identifier(Identifier::new(
10941 "MONTH",
10942 )),
10943 offset,
10944 date,
10945 ],
10946 )))
10947 } else {
10948 date
10949 };
10950 Ok(Expression::Function(Box::new(Function::new(
10951 "LAST_DAY".to_string(),
10952 vec![date],
10953 ))))
10954 }
10955 DialectType::Spark | DialectType::Databricks => {
10956 // Spark: LAST_DAY(TO_DATE(date))
10957 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
10958 let date = Expression::Function(Box::new(Function::new(
10959 "TO_DATE".to_string(),
10960 vec![date_arg],
10961 )));
10962 let date = if let Some(offset) = month_offset {
10963 Expression::Function(Box::new(Function::new(
10964 "ADD_MONTHS".to_string(),
10965 vec![date, offset],
10966 )))
10967 } else {
10968 date
10969 };
10970 Ok(Expression::Function(Box::new(Function::new(
10971 "LAST_DAY".to_string(),
10972 vec![date],
10973 ))))
10974 }
10975 DialectType::MySQL => {
10976 // MySQL: LAST_DAY(DATE(date)) - no offset
10977 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
10978 let date = if let Some(offset) = month_offset {
10979 let iu = crate::expressions::IntervalUnit::Month;
10980 Expression::DateAdd(Box::new(
10981 crate::expressions::DateAddFunc {
10982 this: date_arg,
10983 interval: offset,
10984 unit: iu,
10985 },
10986 ))
10987 } else {
10988 Expression::Function(Box::new(Function::new(
10989 "DATE".to_string(),
10990 vec![date_arg],
10991 )))
10992 };
10993 Ok(Expression::Function(Box::new(Function::new(
10994 "LAST_DAY".to_string(),
10995 vec![date],
10996 ))))
10997 }
10998 DialectType::BigQuery => {
10999 // BigQuery: LAST_DAY(CAST(date AS DATE))
11000 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
11001 let date = cast_to_date(date_arg);
11002 let date = if let Some(offset) = month_offset {
11003 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
11004 this: Some(offset),
11005 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
11006 unit: crate::expressions::IntervalUnit::Month,
11007 use_plural: false,
11008 }),
11009 }));
11010 Expression::Function(Box::new(Function::new(
11011 "DATE_ADD".to_string(),
11012 vec![date, interval],
11013 )))
11014 } else {
11015 date
11016 };
11017 Ok(Expression::Function(Box::new(Function::new(
11018 "LAST_DAY".to_string(),
11019 vec![date],
11020 ))))
11021 }
11022 DialectType::ClickHouse => {
11023 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
11024 let date = Expression::Cast(Box::new(Cast {
11025 this: date_arg,
11026 to: DataType::Nullable {
11027 inner: Box::new(DataType::Date),
11028 },
11029 trailing_comments: vec![],
11030 double_colon_syntax: false,
11031 format: None,
11032 default: None,
11033 inferred_type: None,
11034 }));
11035 let date = if let Some(offset) = month_offset {
11036 Expression::Function(Box::new(Function::new(
11037 "DATE_ADD".to_string(),
11038 vec![
11039 Expression::Identifier(Identifier::new(
11040 "MONTH",
11041 )),
11042 offset,
11043 date,
11044 ],
11045 )))
11046 } else {
11047 date
11048 };
11049 Ok(Expression::Function(Box::new(Function::new(
11050 "LAST_DAY".to_string(),
11051 vec![date],
11052 ))))
11053 }
11054 DialectType::Hive => {
11055 // Hive: LAST_DAY(date)
11056 let date = if let Some(offset) = month_offset {
11057 Expression::Function(Box::new(Function::new(
11058 "ADD_MONTHS".to_string(),
11059 vec![date_arg, offset],
11060 )))
11061 } else {
11062 date_arg
11063 };
11064 Ok(Expression::Function(Box::new(Function::new(
11065 "LAST_DAY".to_string(),
11066 vec![date],
11067 ))))
11068 }
11069 _ => {
11070 // Default: LAST_DAY(date)
11071 let date = if let Some(offset) = month_offset {
11072 let unit =
11073 Expression::Identifier(Identifier::new("MONTH"));
11074 Expression::Function(Box::new(Function::new(
11075 "DATEADD".to_string(),
11076 vec![unit, offset, date_arg],
11077 )))
11078 } else {
11079 date_arg
11080 };
11081 Ok(Expression::Function(Box::new(Function::new(
11082 "LAST_DAY".to_string(),
11083 vec![date],
11084 ))))
11085 }
11086 }
11087 }
11088 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
11089 "LAST_DAY" | "LAST_DAY_OF_MONTH"
11090 if !matches!(source, DialectType::BigQuery)
11091 && f.args.len() >= 1 =>
11092 {
11093 let first_arg = f.args.into_iter().next().unwrap();
11094 match target {
11095 DialectType::TSQL | DialectType::Fabric => {
11096 Ok(Expression::Function(Box::new(Function::new(
11097 "EOMONTH".to_string(),
11098 vec![first_arg],
11099 ))))
11100 }
11101 DialectType::Presto
11102 | DialectType::Trino
11103 | DialectType::Athena => {
11104 Ok(Expression::Function(Box::new(Function::new(
11105 "LAST_DAY_OF_MONTH".to_string(),
11106 vec![first_arg],
11107 ))))
11108 }
11109 _ => Ok(Expression::Function(Box::new(Function::new(
11110 "LAST_DAY".to_string(),
11111 vec![first_arg],
11112 )))),
11113 }
11114 }
11115 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
11116 "MAP"
11117 if f.args.len() == 2
11118 && matches!(
11119 source,
11120 DialectType::Presto
11121 | DialectType::Trino
11122 | DialectType::Athena
11123 ) =>
11124 {
11125 let keys_arg = f.args[0].clone();
11126 let vals_arg = f.args[1].clone();
11127
11128 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
11129 fn extract_array_elements(
11130 expr: &Expression,
11131 ) -> Option<&Vec<Expression>> {
11132 match expr {
11133 Expression::Array(arr) => Some(&arr.expressions),
11134 Expression::ArrayFunc(arr) => Some(&arr.expressions),
11135 Expression::Function(f)
11136 if f.name.eq_ignore_ascii_case("ARRAY") =>
11137 {
11138 Some(&f.args)
11139 }
11140 _ => None,
11141 }
11142 }
11143
11144 match target {
11145 DialectType::Spark | DialectType::Databricks => {
11146 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
11147 Ok(Expression::Function(Box::new(Function::new(
11148 "MAP_FROM_ARRAYS".to_string(),
11149 f.args,
11150 ))))
11151 }
11152 DialectType::Hive => {
11153 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
11154 if let (Some(keys), Some(vals)) = (
11155 extract_array_elements(&keys_arg),
11156 extract_array_elements(&vals_arg),
11157 ) {
11158 if keys.len() == vals.len() {
11159 let mut interleaved = Vec::new();
11160 for (k, v) in keys.iter().zip(vals.iter()) {
11161 interleaved.push(k.clone());
11162 interleaved.push(v.clone());
11163 }
11164 Ok(Expression::Function(Box::new(Function::new(
11165 "MAP".to_string(),
11166 interleaved,
11167 ))))
11168 } else {
11169 Ok(Expression::Function(Box::new(Function::new(
11170 "MAP".to_string(),
11171 f.args,
11172 ))))
11173 }
11174 } else {
11175 Ok(Expression::Function(Box::new(Function::new(
11176 "MAP".to_string(),
11177 f.args,
11178 ))))
11179 }
11180 }
11181 DialectType::Snowflake => {
11182 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
11183 if let (Some(keys), Some(vals)) = (
11184 extract_array_elements(&keys_arg),
11185 extract_array_elements(&vals_arg),
11186 ) {
11187 if keys.len() == vals.len() {
11188 let mut interleaved = Vec::new();
11189 for (k, v) in keys.iter().zip(vals.iter()) {
11190 interleaved.push(k.clone());
11191 interleaved.push(v.clone());
11192 }
11193 Ok(Expression::Function(Box::new(Function::new(
11194 "OBJECT_CONSTRUCT".to_string(),
11195 interleaved,
11196 ))))
11197 } else {
11198 Ok(Expression::Function(Box::new(Function::new(
11199 "MAP".to_string(),
11200 f.args,
11201 ))))
11202 }
11203 } else {
11204 Ok(Expression::Function(Box::new(Function::new(
11205 "MAP".to_string(),
11206 f.args,
11207 ))))
11208 }
11209 }
11210 _ => Ok(Expression::Function(f)),
11211 }
11212 }
11213 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
11214 "MAP"
11215 if f.args.is_empty()
11216 && matches!(
11217 source,
11218 DialectType::Hive
11219 | DialectType::Spark
11220 | DialectType::Databricks
11221 )
11222 && matches!(
11223 target,
11224 DialectType::Presto
11225 | DialectType::Trino
11226 | DialectType::Athena
11227 ) =>
11228 {
11229 let empty_keys =
11230 Expression::Array(Box::new(crate::expressions::Array {
11231 expressions: vec![],
11232 }));
11233 let empty_vals =
11234 Expression::Array(Box::new(crate::expressions::Array {
11235 expressions: vec![],
11236 }));
11237 Ok(Expression::Function(Box::new(Function::new(
11238 "MAP".to_string(),
11239 vec![empty_keys, empty_vals],
11240 ))))
11241 }
11242 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
11243 "MAP"
11244 if f.args.len() >= 2
11245 && f.args.len() % 2 == 0
11246 && matches!(
11247 source,
11248 DialectType::Hive
11249 | DialectType::Spark
11250 | DialectType::Databricks
11251 | DialectType::ClickHouse
11252 ) =>
11253 {
11254 let args = f.args;
11255 match target {
11256 DialectType::DuckDB => {
11257 // MAP([k1, k2], [v1, v2])
11258 let mut keys = Vec::new();
11259 let mut vals = Vec::new();
11260 for (i, arg) in args.into_iter().enumerate() {
11261 if i % 2 == 0 {
11262 keys.push(arg);
11263 } else {
11264 vals.push(arg);
11265 }
11266 }
11267 let keys_arr = Expression::Array(Box::new(
11268 crate::expressions::Array { expressions: keys },
11269 ));
11270 let vals_arr = Expression::Array(Box::new(
11271 crate::expressions::Array { expressions: vals },
11272 ));
11273 Ok(Expression::Function(Box::new(Function::new(
11274 "MAP".to_string(),
11275 vec![keys_arr, vals_arr],
11276 ))))
11277 }
11278 DialectType::Presto | DialectType::Trino => {
11279 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
11280 let mut keys = Vec::new();
11281 let mut vals = Vec::new();
11282 for (i, arg) in args.into_iter().enumerate() {
11283 if i % 2 == 0 {
11284 keys.push(arg);
11285 } else {
11286 vals.push(arg);
11287 }
11288 }
11289 let keys_arr = Expression::Array(Box::new(
11290 crate::expressions::Array { expressions: keys },
11291 ));
11292 let vals_arr = Expression::Array(Box::new(
11293 crate::expressions::Array { expressions: vals },
11294 ));
11295 Ok(Expression::Function(Box::new(Function::new(
11296 "MAP".to_string(),
11297 vec![keys_arr, vals_arr],
11298 ))))
11299 }
11300 DialectType::Snowflake => Ok(Expression::Function(Box::new(
11301 Function::new("OBJECT_CONSTRUCT".to_string(), args),
11302 ))),
11303 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
11304 Function::new("map".to_string(), args),
11305 ))),
11306 _ => Ok(Expression::Function(Box::new(Function::new(
11307 "MAP".to_string(),
11308 args,
11309 )))),
11310 }
11311 }
11312 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
11313 "COLLECT_LIST" if f.args.len() >= 1 => {
11314 let name = match target {
11315 DialectType::Spark
11316 | DialectType::Databricks
11317 | DialectType::Hive => "COLLECT_LIST",
11318 DialectType::DuckDB
11319 | DialectType::PostgreSQL
11320 | DialectType::Redshift
11321 | DialectType::Snowflake
11322 | DialectType::BigQuery => "ARRAY_AGG",
11323 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
11324 _ => "ARRAY_AGG",
11325 };
11326 Ok(Expression::Function(Box::new(Function::new(
11327 name.to_string(),
11328 f.args,
11329 ))))
11330 }
11331 // COLLECT_SET(x) -> target-specific distinct array aggregation
11332 "COLLECT_SET" if f.args.len() >= 1 => {
11333 let name = match target {
11334 DialectType::Spark
11335 | DialectType::Databricks
11336 | DialectType::Hive => "COLLECT_SET",
11337 DialectType::Presto
11338 | DialectType::Trino
11339 | DialectType::Athena => "SET_AGG",
11340 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
11341 _ => "ARRAY_AGG",
11342 };
11343 Ok(Expression::Function(Box::new(Function::new(
11344 name.to_string(),
11345 f.args,
11346 ))))
11347 }
11348 // ISNAN(x) / IS_NAN(x) - normalize
11349 "ISNAN" | "IS_NAN" => {
11350 let name = match target {
11351 DialectType::Spark
11352 | DialectType::Databricks
11353 | DialectType::Hive => "ISNAN",
11354 DialectType::Presto
11355 | DialectType::Trino
11356 | DialectType::Athena => "IS_NAN",
11357 DialectType::BigQuery
11358 | DialectType::PostgreSQL
11359 | DialectType::Redshift => "IS_NAN",
11360 DialectType::ClickHouse => "IS_NAN",
11361 _ => "ISNAN",
11362 };
11363 Ok(Expression::Function(Box::new(Function::new(
11364 name.to_string(),
11365 f.args,
11366 ))))
11367 }
11368 // SPLIT_PART(str, delim, index) -> target-specific
11369 "SPLIT_PART" if f.args.len() == 3 => {
11370 match target {
11371 DialectType::Spark | DialectType::Databricks => {
11372 // Keep as SPLIT_PART (Spark 3.4+)
11373 Ok(Expression::Function(Box::new(Function::new(
11374 "SPLIT_PART".to_string(),
11375 f.args,
11376 ))))
11377 }
11378 DialectType::DuckDB
11379 | DialectType::PostgreSQL
11380 | DialectType::Snowflake
11381 | DialectType::Redshift
11382 | DialectType::Trino
11383 | DialectType::Presto => Ok(Expression::Function(Box::new(
11384 Function::new("SPLIT_PART".to_string(), f.args),
11385 ))),
11386 DialectType::Hive => {
11387 // SPLIT(str, delim)[index]
11388 // Complex conversion, just keep as-is for now
11389 Ok(Expression::Function(Box::new(Function::new(
11390 "SPLIT_PART".to_string(),
11391 f.args,
11392 ))))
11393 }
11394 _ => Ok(Expression::Function(Box::new(Function::new(
11395 "SPLIT_PART".to_string(),
11396 f.args,
11397 )))),
11398 }
11399 }
11400 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
11401 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
11402 let is_scalar = name == "JSON_EXTRACT_SCALAR";
11403 match target {
11404 DialectType::Spark
11405 | DialectType::Databricks
11406 | DialectType::Hive => {
11407 let mut args = f.args;
11408 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
11409 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
11410 if let Some(Expression::Function(inner)) = args.first() {
11411 if inner.name.eq_ignore_ascii_case("TRY")
11412 && inner.args.len() == 1
11413 {
11414 let mut inner_args = inner.args.clone();
11415 args[0] = inner_args.remove(0);
11416 }
11417 }
11418 Ok(Expression::Function(Box::new(Function::new(
11419 "GET_JSON_OBJECT".to_string(),
11420 args,
11421 ))))
11422 }
11423 DialectType::DuckDB | DialectType::SQLite => {
11424 // json -> path syntax
11425 let mut args = f.args;
11426 let json_expr = args.remove(0);
11427 let path = args.remove(0);
11428 Ok(Expression::JsonExtract(Box::new(
11429 crate::expressions::JsonExtractFunc {
11430 this: json_expr,
11431 path,
11432 returning: None,
11433 arrow_syntax: true,
11434 hash_arrow_syntax: false,
11435 wrapper_option: None,
11436 quotes_option: None,
11437 on_scalar_string: false,
11438 on_error: None,
11439 },
11440 )))
11441 }
11442 DialectType::TSQL => {
11443 let func_name = if is_scalar {
11444 "JSON_VALUE"
11445 } else {
11446 "JSON_QUERY"
11447 };
11448 Ok(Expression::Function(Box::new(Function::new(
11449 func_name.to_string(),
11450 f.args,
11451 ))))
11452 }
11453 DialectType::PostgreSQL | DialectType::Redshift => {
11454 let func_name = if is_scalar {
11455 "JSON_EXTRACT_PATH_TEXT"
11456 } else {
11457 "JSON_EXTRACT_PATH"
11458 };
11459 Ok(Expression::Function(Box::new(Function::new(
11460 func_name.to_string(),
11461 f.args,
11462 ))))
11463 }
11464 _ => Ok(Expression::Function(Box::new(Function::new(
11465 name.to_string(),
11466 f.args,
11467 )))),
11468 }
11469 }
11470 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
11471 "JSON_SEARCH"
11472 if matches!(target, DialectType::DuckDB)
11473 && (3..=5).contains(&f.args.len()) =>
11474 {
11475 let args = &f.args;
11476
11477 // Only rewrite deterministic modes and NULL/no escape-char variant.
11478 let mode = match &args[1] {
11479 Expression::Literal(crate::expressions::Literal::String(s)) => {
11480 s.to_ascii_lowercase()
11481 }
11482 _ => return Ok(Expression::Function(f)),
11483 };
11484 if mode != "one" && mode != "all" {
11485 return Ok(Expression::Function(f));
11486 }
11487 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
11488 return Ok(Expression::Function(f));
11489 }
11490
11491 let json_doc_sql = match Generator::sql(&args[0]) {
11492 Ok(sql) => sql,
11493 Err(_) => return Ok(Expression::Function(f)),
11494 };
11495 let search_sql = match Generator::sql(&args[2]) {
11496 Ok(sql) => sql,
11497 Err(_) => return Ok(Expression::Function(f)),
11498 };
11499 let path_sql = if args.len() == 5 {
11500 match Generator::sql(&args[4]) {
11501 Ok(sql) => sql,
11502 Err(_) => return Ok(Expression::Function(f)),
11503 }
11504 } else {
11505 "'$'".to_string()
11506 };
11507
11508 let rewrite_sql = if mode == "all" {
11509 format!(
11510 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
11511 json_doc_sql, path_sql, search_sql
11512 )
11513 } else {
11514 format!(
11515 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
11516 json_doc_sql, path_sql, search_sql
11517 )
11518 };
11519
11520 Ok(Expression::Raw(crate::expressions::Raw {
11521 sql: rewrite_sql,
11522 }))
11523 }
11524 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
11525 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
11526 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
11527 if f.args.len() >= 2
11528 && matches!(source, DialectType::SingleStore) =>
11529 {
11530 let is_bson = name == "BSON_EXTRACT_BSON";
11531 let mut args = f.args;
11532 let json_expr = args.remove(0);
11533
11534 // Build JSONPath from remaining arguments
11535 let mut path = String::from("$");
11536 for arg in &args {
11537 if let Expression::Literal(
11538 crate::expressions::Literal::String(s),
11539 ) = arg
11540 {
11541 // Check if it's a numeric string (array index)
11542 if s.parse::<i64>().is_ok() {
11543 path.push('[');
11544 path.push_str(s);
11545 path.push(']');
11546 } else {
11547 path.push('.');
11548 path.push_str(s);
11549 }
11550 }
11551 }
11552
11553 let target_func = if is_bson {
11554 "JSONB_EXTRACT"
11555 } else {
11556 "JSON_EXTRACT"
11557 };
11558 Ok(Expression::Function(Box::new(Function::new(
11559 target_func.to_string(),
11560 vec![json_expr, Expression::string(&path)],
11561 ))))
11562 }
11563 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
11564 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
11565 Ok(Expression::Function(Box::new(Function {
11566 name: "arraySum".to_string(),
11567 args: f.args,
11568 distinct: f.distinct,
11569 trailing_comments: f.trailing_comments,
11570 use_bracket_syntax: f.use_bracket_syntax,
11571 no_parens: f.no_parens,
11572 quoted: f.quoted,
11573 span: None,
11574 inferred_type: None,
11575 })))
11576 }
11577 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
11578 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
11579 // and is handled by JsonQueryValueConvert action. This handles the case where
11580 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
11581 "JSON_QUERY" | "JSON_VALUE"
11582 if f.args.len() == 2
11583 && matches!(
11584 source,
11585 DialectType::TSQL | DialectType::Fabric
11586 ) =>
11587 {
11588 match target {
11589 DialectType::Spark
11590 | DialectType::Databricks
11591 | DialectType::Hive => Ok(Expression::Function(Box::new(
11592 Function::new("GET_JSON_OBJECT".to_string(), f.args),
11593 ))),
11594 _ => Ok(Expression::Function(Box::new(Function::new(
11595 name.to_string(),
11596 f.args,
11597 )))),
11598 }
11599 }
11600 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
11601 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
11602 let arg = f.args.into_iter().next().unwrap();
11603 let is_hive_source = matches!(
11604 source,
11605 DialectType::Hive
11606 | DialectType::Spark
11607 | DialectType::Databricks
11608 );
11609 match target {
11610 DialectType::DuckDB if is_hive_source => {
11611 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
11612 let strptime =
11613 Expression::Function(Box::new(Function::new(
11614 "STRPTIME".to_string(),
11615 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
11616 )));
11617 Ok(Expression::Function(Box::new(Function::new(
11618 "EPOCH".to_string(),
11619 vec![strptime],
11620 ))))
11621 }
11622 DialectType::Presto | DialectType::Trino if is_hive_source => {
11623 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
11624 let cast_varchar =
11625 Expression::Cast(Box::new(crate::expressions::Cast {
11626 this: arg.clone(),
11627 to: DataType::VarChar {
11628 length: None,
11629 parenthesized_length: false,
11630 },
11631 trailing_comments: vec![],
11632 double_colon_syntax: false,
11633 format: None,
11634 default: None,
11635 inferred_type: None,
11636 }));
11637 let date_parse =
11638 Expression::Function(Box::new(Function::new(
11639 "DATE_PARSE".to_string(),
11640 vec![
11641 cast_varchar,
11642 Expression::string("%Y-%m-%d %T"),
11643 ],
11644 )));
11645 let try_expr = Expression::Function(Box::new(
11646 Function::new("TRY".to_string(), vec![date_parse]),
11647 ));
11648 let date_format =
11649 Expression::Function(Box::new(Function::new(
11650 "DATE_FORMAT".to_string(),
11651 vec![arg, Expression::string("%Y-%m-%d %T")],
11652 )));
11653 let parse_datetime =
11654 Expression::Function(Box::new(Function::new(
11655 "PARSE_DATETIME".to_string(),
11656 vec![
11657 date_format,
11658 Expression::string("yyyy-MM-dd HH:mm:ss"),
11659 ],
11660 )));
11661 let coalesce =
11662 Expression::Function(Box::new(Function::new(
11663 "COALESCE".to_string(),
11664 vec![try_expr, parse_datetime],
11665 )));
11666 Ok(Expression::Function(Box::new(Function::new(
11667 "TO_UNIXTIME".to_string(),
11668 vec![coalesce],
11669 ))))
11670 }
11671 DialectType::Presto | DialectType::Trino => {
11672 Ok(Expression::Function(Box::new(Function::new(
11673 "TO_UNIXTIME".to_string(),
11674 vec![arg],
11675 ))))
11676 }
11677 _ => Ok(Expression::Function(Box::new(Function::new(
11678 "UNIX_TIMESTAMP".to_string(),
11679 vec![arg],
11680 )))),
11681 }
11682 }
11683 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
11684 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
11685 DialectType::Spark
11686 | DialectType::Databricks
11687 | DialectType::Hive => Ok(Expression::Function(Box::new(
11688 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
11689 ))),
11690 _ => Ok(Expression::Function(Box::new(Function::new(
11691 "TO_UNIX_TIMESTAMP".to_string(),
11692 f.args,
11693 )))),
11694 },
11695 // CURDATE() -> CURRENT_DATE
11696 "CURDATE" => {
11697 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
11698 }
11699 // CURTIME() -> CURRENT_TIME
11700 "CURTIME" => {
11701 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
11702 precision: None,
11703 }))
11704 }
11705 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive (drop lambda)
11706 "ARRAY_SORT" if f.args.len() >= 1 => {
11707 match target {
11708 DialectType::Hive => {
11709 let mut args = f.args;
11710 args.truncate(1); // Drop lambda comparator
11711 Ok(Expression::Function(Box::new(Function::new(
11712 "SORT_ARRAY".to_string(),
11713 args,
11714 ))))
11715 }
11716 _ => Ok(Expression::Function(f)),
11717 }
11718 }
11719 // SORT_ARRAY(x) -> ARRAY_SORT(x) for non-Hive/Spark
11720 "SORT_ARRAY" if f.args.len() == 1 => match target {
11721 DialectType::Hive
11722 | DialectType::Spark
11723 | DialectType::Databricks => Ok(Expression::Function(f)),
11724 _ => Ok(Expression::Function(Box::new(Function::new(
11725 "ARRAY_SORT".to_string(),
11726 f.args,
11727 )))),
11728 },
11729 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
11730 "SORT_ARRAY" if f.args.len() == 2 => {
11731 let is_desc =
11732 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
11733 if is_desc {
11734 match target {
11735 DialectType::DuckDB => {
11736 Ok(Expression::Function(Box::new(Function::new(
11737 "ARRAY_REVERSE_SORT".to_string(),
11738 vec![f.args.into_iter().next().unwrap()],
11739 ))))
11740 }
11741 DialectType::Presto | DialectType::Trino => {
11742 let arr_arg = f.args.into_iter().next().unwrap();
11743 let a =
11744 Expression::Column(Box::new(crate::expressions::Column {
11745 name: crate::expressions::Identifier::new("a"),
11746 table: None,
11747 join_mark: false,
11748 trailing_comments: Vec::new(),
11749 span: None,
11750 inferred_type: None,
11751 }));
11752 let b =
11753 Expression::Column(Box::new(crate::expressions::Column {
11754 name: crate::expressions::Identifier::new("b"),
11755 table: None,
11756 join_mark: false,
11757 trailing_comments: Vec::new(),
11758 span: None,
11759 inferred_type: None,
11760 }));
11761 let case_expr = Expression::Case(Box::new(
11762 crate::expressions::Case {
11763 operand: None,
11764 whens: vec![
11765 (
11766 Expression::Lt(Box::new(
11767 BinaryOp::new(a.clone(), b.clone()),
11768 )),
11769 Expression::Literal(Literal::Number(
11770 "1".to_string(),
11771 )),
11772 ),
11773 (
11774 Expression::Gt(Box::new(
11775 BinaryOp::new(a.clone(), b.clone()),
11776 )),
11777 Expression::Literal(Literal::Number(
11778 "-1".to_string(),
11779 )),
11780 ),
11781 ],
11782 else_: Some(Expression::Literal(
11783 Literal::Number("0".to_string()),
11784 )),
11785 comments: Vec::new(),
11786 inferred_type: None,
11787 },
11788 ));
11789 let lambda = Expression::Lambda(Box::new(
11790 crate::expressions::LambdaExpr {
11791 parameters: vec![
11792 crate::expressions::Identifier::new("a"),
11793 crate::expressions::Identifier::new("b"),
11794 ],
11795 body: case_expr,
11796 colon: false,
11797 parameter_types: Vec::new(),
11798 },
11799 ));
11800 Ok(Expression::Function(Box::new(Function::new(
11801 "ARRAY_SORT".to_string(),
11802 vec![arr_arg, lambda],
11803 ))))
11804 }
11805 _ => Ok(Expression::Function(f)),
11806 }
11807 } else {
11808 // SORT_ARRAY(x, TRUE) -> ARRAY_SORT(x)
11809 match target {
11810 DialectType::Hive => Ok(Expression::Function(f)),
11811 _ => Ok(Expression::Function(Box::new(Function::new(
11812 "ARRAY_SORT".to_string(),
11813 vec![f.args.into_iter().next().unwrap()],
11814 )))),
11815 }
11816 }
11817 }
11818 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
11819 "LEFT" if f.args.len() == 2 => {
11820 match target {
11821 DialectType::Hive
11822 | DialectType::Presto
11823 | DialectType::Trino
11824 | DialectType::Athena => {
11825 let x = f.args[0].clone();
11826 let n = f.args[1].clone();
11827 Ok(Expression::Function(Box::new(Function::new(
11828 "SUBSTRING".to_string(),
11829 vec![x, Expression::number(1), n],
11830 ))))
11831 }
11832 DialectType::Spark | DialectType::Databricks
11833 if matches!(
11834 source,
11835 DialectType::TSQL | DialectType::Fabric
11836 ) =>
11837 {
11838 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
11839 let x = f.args[0].clone();
11840 let n = f.args[1].clone();
11841 let cast_x = Expression::Cast(Box::new(Cast {
11842 this: x,
11843 to: DataType::VarChar {
11844 length: None,
11845 parenthesized_length: false,
11846 },
11847 double_colon_syntax: false,
11848 trailing_comments: Vec::new(),
11849 format: None,
11850 default: None,
11851 inferred_type: None,
11852 }));
11853 Ok(Expression::Function(Box::new(Function::new(
11854 "LEFT".to_string(),
11855 vec![cast_x, n],
11856 ))))
11857 }
11858 _ => Ok(Expression::Function(f)),
11859 }
11860 }
11861 "RIGHT" if f.args.len() == 2 => {
11862 match target {
11863 DialectType::Hive
11864 | DialectType::Presto
11865 | DialectType::Trino
11866 | DialectType::Athena => {
11867 let x = f.args[0].clone();
11868 let n = f.args[1].clone();
11869 // SUBSTRING(x, LENGTH(x) - (n - 1))
11870 let len_x = Expression::Function(Box::new(Function::new(
11871 "LENGTH".to_string(),
11872 vec![x.clone()],
11873 )));
11874 let n_minus_1 = Expression::Sub(Box::new(
11875 crate::expressions::BinaryOp::new(
11876 n,
11877 Expression::number(1),
11878 ),
11879 ));
11880 let n_minus_1_paren = Expression::Paren(Box::new(
11881 crate::expressions::Paren {
11882 this: n_minus_1,
11883 trailing_comments: Vec::new(),
11884 },
11885 ));
11886 let offset = Expression::Sub(Box::new(
11887 crate::expressions::BinaryOp::new(
11888 len_x,
11889 n_minus_1_paren,
11890 ),
11891 ));
11892 Ok(Expression::Function(Box::new(Function::new(
11893 "SUBSTRING".to_string(),
11894 vec![x, offset],
11895 ))))
11896 }
11897 DialectType::Spark | DialectType::Databricks
11898 if matches!(
11899 source,
11900 DialectType::TSQL | DialectType::Fabric
11901 ) =>
11902 {
11903 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
11904 let x = f.args[0].clone();
11905 let n = f.args[1].clone();
11906 let cast_x = Expression::Cast(Box::new(Cast {
11907 this: x,
11908 to: DataType::VarChar {
11909 length: None,
11910 parenthesized_length: false,
11911 },
11912 double_colon_syntax: false,
11913 trailing_comments: Vec::new(),
11914 format: None,
11915 default: None,
11916 inferred_type: None,
11917 }));
11918 Ok(Expression::Function(Box::new(Function::new(
11919 "RIGHT".to_string(),
11920 vec![cast_x, n],
11921 ))))
11922 }
11923 _ => Ok(Expression::Function(f)),
11924 }
11925 }
11926 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
11927 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
11928 DialectType::Snowflake => Ok(Expression::Function(Box::new(
11929 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
11930 ))),
11931 DialectType::Spark | DialectType::Databricks => {
11932 Ok(Expression::Function(Box::new(Function::new(
11933 "MAP_FROM_ARRAYS".to_string(),
11934 f.args,
11935 ))))
11936 }
11937 _ => Ok(Expression::Function(Box::new(Function::new(
11938 "MAP".to_string(),
11939 f.args,
11940 )))),
11941 },
11942 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
11943 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
11944 "LIKE" if f.args.len() >= 2 => {
11945 let (this, pattern) = if matches!(source, DialectType::SQLite) {
11946 // SQLite: LIKE(pattern, string) -> string LIKE pattern
11947 (f.args[1].clone(), f.args[0].clone())
11948 } else {
11949 // Standard: LIKE(string, pattern) -> string LIKE pattern
11950 (f.args[0].clone(), f.args[1].clone())
11951 };
11952 let escape = if f.args.len() >= 3 {
11953 Some(f.args[2].clone())
11954 } else {
11955 None
11956 };
11957 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
11958 left: this,
11959 right: pattern,
11960 escape,
11961 quantifier: None,
11962 inferred_type: None,
11963 })))
11964 }
11965 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
11966 "ILIKE" if f.args.len() >= 2 => {
11967 let this = f.args[0].clone();
11968 let pattern = f.args[1].clone();
11969 let escape = if f.args.len() >= 3 {
11970 Some(f.args[2].clone())
11971 } else {
11972 None
11973 };
11974 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
11975 left: this,
11976 right: pattern,
11977 escape,
11978 quantifier: None,
11979 inferred_type: None,
11980 })))
11981 }
11982 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
11983 "CHAR" if f.args.len() == 1 => match target {
11984 DialectType::MySQL
11985 | DialectType::SingleStore
11986 | DialectType::TSQL => Ok(Expression::Function(f)),
11987 _ => Ok(Expression::Function(Box::new(Function::new(
11988 "CHR".to_string(),
11989 f.args,
11990 )))),
11991 },
11992 // CONCAT(a, b) -> a || b for PostgreSQL
11993 "CONCAT"
11994 if f.args.len() == 2
11995 && matches!(target, DialectType::PostgreSQL)
11996 && matches!(
11997 source,
11998 DialectType::ClickHouse | DialectType::MySQL
11999 ) =>
12000 {
12001 let mut args = f.args;
12002 let right = args.pop().unwrap();
12003 let left = args.pop().unwrap();
12004 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
12005 this: Box::new(left),
12006 expression: Box::new(right),
12007 safe: None,
12008 })))
12009 }
12010 // ARRAY_TO_STRING(arr, delim) -> target-specific
12011 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
12012 DialectType::Presto | DialectType::Trino => {
12013 Ok(Expression::Function(Box::new(Function::new(
12014 "ARRAY_JOIN".to_string(),
12015 f.args,
12016 ))))
12017 }
12018 DialectType::TSQL => Ok(Expression::Function(Box::new(
12019 Function::new("STRING_AGG".to_string(), f.args),
12020 ))),
12021 _ => Ok(Expression::Function(f)),
12022 },
12023 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
12024 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
12025 DialectType::Spark
12026 | DialectType::Databricks
12027 | DialectType::Hive => Ok(Expression::Function(Box::new(
12028 Function::new("CONCAT".to_string(), f.args),
12029 ))),
12030 DialectType::Snowflake => Ok(Expression::Function(Box::new(
12031 Function::new("ARRAY_CAT".to_string(), f.args),
12032 ))),
12033 DialectType::Redshift => Ok(Expression::Function(Box::new(
12034 Function::new("ARRAY_CONCAT".to_string(), f.args),
12035 ))),
12036 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12037 Function::new("ARRAY_CAT".to_string(), f.args),
12038 ))),
12039 DialectType::DuckDB => Ok(Expression::Function(Box::new(
12040 Function::new("LIST_CONCAT".to_string(), f.args),
12041 ))),
12042 DialectType::Presto | DialectType::Trino => {
12043 Ok(Expression::Function(Box::new(Function::new(
12044 "CONCAT".to_string(),
12045 f.args,
12046 ))))
12047 }
12048 DialectType::BigQuery => Ok(Expression::Function(Box::new(
12049 Function::new("ARRAY_CONCAT".to_string(), f.args),
12050 ))),
12051 _ => Ok(Expression::Function(f)),
12052 },
12053 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
12054 "HAS" if f.args.len() == 2 => match target {
12055 DialectType::Spark
12056 | DialectType::Databricks
12057 | DialectType::Hive => Ok(Expression::Function(Box::new(
12058 Function::new("ARRAY_CONTAINS".to_string(), f.args),
12059 ))),
12060 DialectType::Presto | DialectType::Trino => {
12061 Ok(Expression::Function(Box::new(Function::new(
12062 "CONTAINS".to_string(),
12063 f.args,
12064 ))))
12065 }
12066 _ => Ok(Expression::Function(f)),
12067 },
12068 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
12069 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
12070 Function::new("COALESCE".to_string(), f.args),
12071 ))),
12072 // ISNULL(x) in MySQL -> (x IS NULL)
12073 "ISNULL"
12074 if f.args.len() == 1
12075 && matches!(source, DialectType::MySQL)
12076 && matches!(target, DialectType::MySQL) =>
12077 {
12078 let arg = f.args.into_iter().next().unwrap();
12079 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
12080 this: Expression::IsNull(Box::new(
12081 crate::expressions::IsNull {
12082 this: arg,
12083 not: false,
12084 postfix_form: false,
12085 },
12086 )),
12087 trailing_comments: Vec::new(),
12088 })))
12089 }
12090 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
12091 "MONTHNAME"
12092 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
12093 {
12094 let arg = f.args.into_iter().next().unwrap();
12095 Ok(Expression::Function(Box::new(Function::new(
12096 "DATE_FORMAT".to_string(),
12097 vec![arg, Expression::string("%M")],
12098 ))))
12099 }
12100 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
12101 "SPLITBYSTRING" if f.args.len() == 2 => {
12102 let sep = f.args[0].clone();
12103 let str_arg = f.args[1].clone();
12104 match target {
12105 DialectType::DuckDB => Ok(Expression::Function(Box::new(
12106 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
12107 ))),
12108 DialectType::Doris => {
12109 Ok(Expression::Function(Box::new(Function::new(
12110 "SPLIT_BY_STRING".to_string(),
12111 vec![str_arg, sep],
12112 ))))
12113 }
12114 DialectType::Hive
12115 | DialectType::Spark
12116 | DialectType::Databricks => {
12117 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
12118 let escaped =
12119 Expression::Function(Box::new(Function::new(
12120 "CONCAT".to_string(),
12121 vec![
12122 Expression::string("\\Q"),
12123 sep,
12124 Expression::string("\\E"),
12125 ],
12126 )));
12127 Ok(Expression::Function(Box::new(Function::new(
12128 "SPLIT".to_string(),
12129 vec![str_arg, escaped],
12130 ))))
12131 }
12132 _ => Ok(Expression::Function(f)),
12133 }
12134 }
12135 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
12136 "SPLITBYREGEXP" if f.args.len() == 2 => {
12137 let sep = f.args[0].clone();
12138 let str_arg = f.args[1].clone();
12139 match target {
12140 DialectType::DuckDB => {
12141 Ok(Expression::Function(Box::new(Function::new(
12142 "STR_SPLIT_REGEX".to_string(),
12143 vec![str_arg, sep],
12144 ))))
12145 }
12146 DialectType::Hive
12147 | DialectType::Spark
12148 | DialectType::Databricks => {
12149 Ok(Expression::Function(Box::new(Function::new(
12150 "SPLIT".to_string(),
12151 vec![str_arg, sep],
12152 ))))
12153 }
12154 _ => Ok(Expression::Function(f)),
12155 }
12156 }
12157 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
12158 "TOMONDAY" => {
12159 if f.args.len() == 1 {
12160 let arg = f.args.into_iter().next().unwrap();
12161 match target {
12162 DialectType::Doris => {
12163 Ok(Expression::Function(Box::new(Function::new(
12164 "DATE_TRUNC".to_string(),
12165 vec![arg, Expression::string("WEEK")],
12166 ))))
12167 }
12168 _ => Ok(Expression::Function(Box::new(Function::new(
12169 "DATE_TRUNC".to_string(),
12170 vec![Expression::string("WEEK"), arg],
12171 )))),
12172 }
12173 } else {
12174 Ok(Expression::Function(f))
12175 }
12176 }
12177 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
12178 "COLLECT_LIST" if f.args.len() == 1 => match target {
12179 DialectType::Spark
12180 | DialectType::Databricks
12181 | DialectType::Hive => Ok(Expression::Function(f)),
12182 _ => Ok(Expression::Function(Box::new(Function::new(
12183 "ARRAY_AGG".to_string(),
12184 f.args,
12185 )))),
12186 },
12187 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
12188 "TO_CHAR"
12189 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
12190 {
12191 let arg = f.args.into_iter().next().unwrap();
12192 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12193 this: arg,
12194 to: DataType::Custom {
12195 name: "STRING".to_string(),
12196 },
12197 double_colon_syntax: false,
12198 trailing_comments: Vec::new(),
12199 format: None,
12200 default: None,
12201 inferred_type: None,
12202 })))
12203 }
12204 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
12205 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
12206 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12207 Function::new("RANDOM".to_string(), vec![]),
12208 ))),
12209 _ => Ok(Expression::Function(f)),
12210 },
12211 // ClickHouse formatDateTime -> target-specific
12212 "FORMATDATETIME" if f.args.len() >= 2 => match target {
12213 DialectType::MySQL => Ok(Expression::Function(Box::new(
12214 Function::new("DATE_FORMAT".to_string(), f.args),
12215 ))),
12216 _ => Ok(Expression::Function(f)),
12217 },
12218 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
12219 "REPLICATE" if f.args.len() == 2 => match target {
12220 DialectType::TSQL => Ok(Expression::Function(f)),
12221 _ => Ok(Expression::Function(Box::new(Function::new(
12222 "REPEAT".to_string(),
12223 f.args,
12224 )))),
12225 },
12226 // LEN(x) -> LENGTH(x) for non-TSQL targets
12227 // No CAST needed when arg is already a string literal
12228 "LEN" if f.args.len() == 1 => {
12229 match target {
12230 DialectType::TSQL => Ok(Expression::Function(f)),
12231 DialectType::Spark | DialectType::Databricks => {
12232 let arg = f.args.into_iter().next().unwrap();
12233 // Don't wrap string literals with CAST - they're already strings
12234 let is_string = matches!(
12235 &arg,
12236 Expression::Literal(
12237 crate::expressions::Literal::String(_)
12238 )
12239 );
12240 let final_arg = if is_string {
12241 arg
12242 } else {
12243 Expression::Cast(Box::new(Cast {
12244 this: arg,
12245 to: DataType::VarChar {
12246 length: None,
12247 parenthesized_length: false,
12248 },
12249 double_colon_syntax: false,
12250 trailing_comments: Vec::new(),
12251 format: None,
12252 default: None,
12253 inferred_type: None,
12254 }))
12255 };
12256 Ok(Expression::Function(Box::new(Function::new(
12257 "LENGTH".to_string(),
12258 vec![final_arg],
12259 ))))
12260 }
12261 _ => {
12262 let arg = f.args.into_iter().next().unwrap();
12263 Ok(Expression::Function(Box::new(Function::new(
12264 "LENGTH".to_string(),
12265 vec![arg],
12266 ))))
12267 }
12268 }
12269 }
12270 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
12271 "COUNT_BIG" if f.args.len() == 1 => match target {
12272 DialectType::TSQL => Ok(Expression::Function(f)),
12273 _ => Ok(Expression::Function(Box::new(Function::new(
12274 "COUNT".to_string(),
12275 f.args,
12276 )))),
12277 },
12278 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
12279 "DATEFROMPARTS" if f.args.len() == 3 => match target {
12280 DialectType::TSQL => Ok(Expression::Function(f)),
12281 _ => Ok(Expression::Function(Box::new(Function::new(
12282 "MAKE_DATE".to_string(),
12283 f.args,
12284 )))),
12285 },
12286 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
12287 "REGEXP_LIKE" if f.args.len() >= 2 => {
12288 let str_expr = f.args[0].clone();
12289 let pattern = f.args[1].clone();
12290 let flags = if f.args.len() >= 3 {
12291 Some(f.args[2].clone())
12292 } else {
12293 None
12294 };
12295 match target {
12296 DialectType::DuckDB => {
12297 let mut new_args = vec![str_expr, pattern];
12298 if let Some(fl) = flags {
12299 new_args.push(fl);
12300 }
12301 Ok(Expression::Function(Box::new(Function::new(
12302 "REGEXP_MATCHES".to_string(),
12303 new_args,
12304 ))))
12305 }
12306 _ => Ok(Expression::RegexpLike(Box::new(
12307 crate::expressions::RegexpFunc {
12308 this: str_expr,
12309 pattern,
12310 flags,
12311 },
12312 ))),
12313 }
12314 }
12315 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
12316 "ARRAYJOIN" if f.args.len() == 1 => match target {
12317 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12318 Function::new("UNNEST".to_string(), f.args),
12319 ))),
12320 _ => Ok(Expression::Function(f)),
12321 },
12322 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
12323 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
12324 match target {
12325 DialectType::TSQL => Ok(Expression::Function(f)),
12326 DialectType::DuckDB => {
12327 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
12328 let mut args = f.args;
12329 let ms = args.pop().unwrap();
12330 let s = args.pop().unwrap();
12331 // s + (ms / 1000.0)
12332 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
12333 ms,
12334 Expression::Literal(
12335 crate::expressions::Literal::Number(
12336 "1000.0".to_string(),
12337 ),
12338 ),
12339 )));
12340 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
12341 s,
12342 Expression::Paren(Box::new(Paren {
12343 this: ms_frac,
12344 trailing_comments: vec![],
12345 })),
12346 )));
12347 args.push(s_with_ms);
12348 Ok(Expression::Function(Box::new(Function::new(
12349 "MAKE_TIMESTAMP".to_string(),
12350 args,
12351 ))))
12352 }
12353 DialectType::Snowflake => {
12354 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
12355 let mut args = f.args;
12356 let ms = args.pop().unwrap();
12357 // ms * 1000000
12358 let ns = Expression::Mul(Box::new(BinaryOp::new(
12359 ms,
12360 Expression::number(1000000),
12361 )));
12362 args.push(ns);
12363 Ok(Expression::Function(Box::new(Function::new(
12364 "TIMESTAMP_FROM_PARTS".to_string(),
12365 args,
12366 ))))
12367 }
12368 _ => {
12369 // Default: keep function name for other targets
12370 Ok(Expression::Function(Box::new(Function::new(
12371 "DATETIMEFROMPARTS".to_string(),
12372 f.args,
12373 ))))
12374 }
12375 }
12376 }
12377 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
12378 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
12379 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
12380 let is_try = name == "TRY_CONVERT";
12381 let type_expr = f.args[0].clone();
12382 let value_expr = f.args[1].clone();
12383 let style = if f.args.len() >= 3 {
12384 Some(&f.args[2])
12385 } else {
12386 None
12387 };
12388
12389 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
12390 if matches!(target, DialectType::TSQL) {
12391 let normalized_type = match &type_expr {
12392 Expression::DataType(dt) => {
12393 let new_dt = match dt {
12394 DataType::Int { .. } => DataType::Custom {
12395 name: "INTEGER".to_string(),
12396 },
12397 _ => dt.clone(),
12398 };
12399 Expression::DataType(new_dt)
12400 }
12401 Expression::Identifier(id) => {
12402 if id.name.eq_ignore_ascii_case("INT") {
12403 Expression::Identifier(
12404 crate::expressions::Identifier::new("INTEGER"),
12405 )
12406 } else {
12407 let upper = id.name.to_ascii_uppercase();
12408 Expression::Identifier(
12409 crate::expressions::Identifier::new(upper),
12410 )
12411 }
12412 }
12413 Expression::Column(col) => {
12414 if col.name.name.eq_ignore_ascii_case("INT") {
12415 Expression::Identifier(
12416 crate::expressions::Identifier::new("INTEGER"),
12417 )
12418 } else {
12419 let upper = col.name.name.to_ascii_uppercase();
12420 Expression::Identifier(
12421 crate::expressions::Identifier::new(upper),
12422 )
12423 }
12424 }
12425 _ => type_expr.clone(),
12426 };
12427 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
12428 let mut new_args = vec![normalized_type, value_expr];
12429 if let Some(s) = style {
12430 new_args.push(s.clone());
12431 }
12432 return Ok(Expression::Function(Box::new(Function::new(
12433 func_name.to_string(),
12434 new_args,
12435 ))));
12436 }
12437
12438 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
12439 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
12440 match e {
12441 Expression::DataType(dt) => {
12442 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
12443 match dt {
12444 DataType::Custom { name }
12445 if name.starts_with("NVARCHAR(")
12446 || name.starts_with("NCHAR(") =>
12447 {
12448 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
12449 let inner = &name[name.find('(').unwrap() + 1
12450 ..name.len() - 1];
12451 if inner.eq_ignore_ascii_case("MAX") {
12452 Some(DataType::Text)
12453 } else if let Ok(len) = inner.parse::<u32>() {
12454 if name.starts_with("NCHAR") {
12455 Some(DataType::Char {
12456 length: Some(len),
12457 })
12458 } else {
12459 Some(DataType::VarChar {
12460 length: Some(len),
12461 parenthesized_length: false,
12462 })
12463 }
12464 } else {
12465 Some(dt.clone())
12466 }
12467 }
12468 DataType::Custom { name } if name == "NVARCHAR" => {
12469 Some(DataType::VarChar {
12470 length: None,
12471 parenthesized_length: false,
12472 })
12473 }
12474 DataType::Custom { name } if name == "NCHAR" => {
12475 Some(DataType::Char { length: None })
12476 }
12477 DataType::Custom { name }
12478 if name == "NVARCHAR(MAX)"
12479 || name == "VARCHAR(MAX)" =>
12480 {
12481 Some(DataType::Text)
12482 }
12483 _ => Some(dt.clone()),
12484 }
12485 }
12486 Expression::Identifier(id) => {
12487 let name = id.name.to_ascii_uppercase();
12488 match name.as_str() {
12489 "INT" | "INTEGER" => Some(DataType::Int {
12490 length: None,
12491 integer_spelling: false,
12492 }),
12493 "BIGINT" => Some(DataType::BigInt { length: None }),
12494 "SMALLINT" => {
12495 Some(DataType::SmallInt { length: None })
12496 }
12497 "TINYINT" => {
12498 Some(DataType::TinyInt { length: None })
12499 }
12500 "FLOAT" => Some(DataType::Float {
12501 precision: None,
12502 scale: None,
12503 real_spelling: false,
12504 }),
12505 "REAL" => Some(DataType::Float {
12506 precision: None,
12507 scale: None,
12508 real_spelling: true,
12509 }),
12510 "DATETIME" | "DATETIME2" => {
12511 Some(DataType::Timestamp {
12512 timezone: false,
12513 precision: None,
12514 })
12515 }
12516 "DATE" => Some(DataType::Date),
12517 "BIT" => Some(DataType::Boolean),
12518 "TEXT" => Some(DataType::Text),
12519 "NUMERIC" => Some(DataType::Decimal {
12520 precision: None,
12521 scale: None,
12522 }),
12523 "MONEY" => Some(DataType::Decimal {
12524 precision: Some(15),
12525 scale: Some(4),
12526 }),
12527 "SMALLMONEY" => Some(DataType::Decimal {
12528 precision: Some(6),
12529 scale: Some(4),
12530 }),
12531 "VARCHAR" => Some(DataType::VarChar {
12532 length: None,
12533 parenthesized_length: false,
12534 }),
12535 "NVARCHAR" => Some(DataType::VarChar {
12536 length: None,
12537 parenthesized_length: false,
12538 }),
12539 "CHAR" => Some(DataType::Char { length: None }),
12540 "NCHAR" => Some(DataType::Char { length: None }),
12541 _ => Some(DataType::Custom { name }),
12542 }
12543 }
12544 Expression::Column(col) => {
12545 let name = col.name.name.to_ascii_uppercase();
12546 match name.as_str() {
12547 "INT" | "INTEGER" => Some(DataType::Int {
12548 length: None,
12549 integer_spelling: false,
12550 }),
12551 "BIGINT" => Some(DataType::BigInt { length: None }),
12552 "FLOAT" => Some(DataType::Float {
12553 precision: None,
12554 scale: None,
12555 real_spelling: false,
12556 }),
12557 "DATETIME" | "DATETIME2" => {
12558 Some(DataType::Timestamp {
12559 timezone: false,
12560 precision: None,
12561 })
12562 }
12563 "DATE" => Some(DataType::Date),
12564 "NUMERIC" => Some(DataType::Decimal {
12565 precision: None,
12566 scale: None,
12567 }),
12568 "VARCHAR" => Some(DataType::VarChar {
12569 length: None,
12570 parenthesized_length: false,
12571 }),
12572 "NVARCHAR" => Some(DataType::VarChar {
12573 length: None,
12574 parenthesized_length: false,
12575 }),
12576 "CHAR" => Some(DataType::Char { length: None }),
12577 "NCHAR" => Some(DataType::Char { length: None }),
12578 _ => Some(DataType::Custom { name }),
12579 }
12580 }
12581 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
12582 Expression::Function(f) => {
12583 let fname = f.name.to_ascii_uppercase();
12584 match fname.as_str() {
12585 "VARCHAR" | "NVARCHAR" => {
12586 let len = f.args.first().and_then(|a| {
12587 if let Expression::Literal(
12588 crate::expressions::Literal::Number(n),
12589 ) = a
12590 {
12591 n.parse::<u32>().ok()
12592 } else if let Expression::Identifier(id) = a
12593 {
12594 if id.name.eq_ignore_ascii_case("MAX") {
12595 None
12596 } else {
12597 None
12598 }
12599 } else {
12600 None
12601 }
12602 });
12603 // Check for VARCHAR(MAX) -> TEXT
12604 let is_max = f.args.first().map_or(false, |a| {
12605 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
12606 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
12607 });
12608 if is_max {
12609 Some(DataType::Text)
12610 } else {
12611 Some(DataType::VarChar {
12612 length: len,
12613 parenthesized_length: false,
12614 })
12615 }
12616 }
12617 "NCHAR" | "CHAR" => {
12618 let len = f.args.first().and_then(|a| {
12619 if let Expression::Literal(
12620 crate::expressions::Literal::Number(n),
12621 ) = a
12622 {
12623 n.parse::<u32>().ok()
12624 } else {
12625 None
12626 }
12627 });
12628 Some(DataType::Char { length: len })
12629 }
12630 "NUMERIC" | "DECIMAL" => {
12631 let precision = f.args.first().and_then(|a| {
12632 if let Expression::Literal(
12633 crate::expressions::Literal::Number(n),
12634 ) = a
12635 {
12636 n.parse::<u32>().ok()
12637 } else {
12638 None
12639 }
12640 });
12641 let scale = f.args.get(1).and_then(|a| {
12642 if let Expression::Literal(
12643 crate::expressions::Literal::Number(n),
12644 ) = a
12645 {
12646 n.parse::<u32>().ok()
12647 } else {
12648 None
12649 }
12650 });
12651 Some(DataType::Decimal { precision, scale })
12652 }
12653 _ => None,
12654 }
12655 }
12656 _ => None,
12657 }
12658 }
12659
12660 if let Some(mut dt) = expr_to_datatype(&type_expr) {
12661 // For TSQL source: VARCHAR/CHAR without length defaults to 30
12662 let is_tsql_source =
12663 matches!(source, DialectType::TSQL | DialectType::Fabric);
12664 if is_tsql_source {
12665 match &dt {
12666 DataType::VarChar { length: None, .. } => {
12667 dt = DataType::VarChar {
12668 length: Some(30),
12669 parenthesized_length: false,
12670 };
12671 }
12672 DataType::Char { length: None } => {
12673 dt = DataType::Char { length: Some(30) };
12674 }
12675 _ => {}
12676 }
12677 }
12678
12679 // Determine if this is a string type
12680 let is_string_type = matches!(
12681 dt,
12682 DataType::VarChar { .. }
12683 | DataType::Char { .. }
12684 | DataType::Text
12685 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
12686 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
12687 || name.starts_with("VARCHAR(") || name == "VARCHAR"
12688 || name == "STRING");
12689
12690 // Determine if this is a date/time type
12691 let is_datetime_type = matches!(
12692 dt,
12693 DataType::Timestamp { .. } | DataType::Date
12694 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
12695 || name == "DATETIME2" || name == "SMALLDATETIME");
12696
12697 // Check for date conversion with style
12698 if style.is_some() {
12699 let style_num = style.and_then(|s| {
12700 if let Expression::Literal(
12701 crate::expressions::Literal::Number(n),
12702 ) = s
12703 {
12704 n.parse::<u32>().ok()
12705 } else {
12706 None
12707 }
12708 });
12709
12710 // TSQL CONVERT date styles (Java format)
12711 let format_str = style_num.and_then(|n| match n {
12712 101 => Some("MM/dd/yyyy"),
12713 102 => Some("yyyy.MM.dd"),
12714 103 => Some("dd/MM/yyyy"),
12715 104 => Some("dd.MM.yyyy"),
12716 105 => Some("dd-MM-yyyy"),
12717 108 => Some("HH:mm:ss"),
12718 110 => Some("MM-dd-yyyy"),
12719 112 => Some("yyyyMMdd"),
12720 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
12721 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
12722 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
12723 _ => None,
12724 });
12725
12726 // Non-string, non-datetime types with style: just CAST, ignore the style
12727 if !is_string_type && !is_datetime_type {
12728 let cast_expr = if is_try {
12729 Expression::TryCast(Box::new(
12730 crate::expressions::Cast {
12731 this: value_expr,
12732 to: dt,
12733 trailing_comments: Vec::new(),
12734 double_colon_syntax: false,
12735 format: None,
12736 default: None,
12737 inferred_type: None,
12738 },
12739 ))
12740 } else {
12741 Expression::Cast(Box::new(
12742 crate::expressions::Cast {
12743 this: value_expr,
12744 to: dt,
12745 trailing_comments: Vec::new(),
12746 double_colon_syntax: false,
12747 format: None,
12748 default: None,
12749 inferred_type: None,
12750 },
12751 ))
12752 };
12753 return Ok(cast_expr);
12754 }
12755
12756 if let Some(java_fmt) = format_str {
12757 let c_fmt = java_fmt
12758 .replace("yyyy", "%Y")
12759 .replace("MM", "%m")
12760 .replace("dd", "%d")
12761 .replace("HH", "%H")
12762 .replace("mm", "%M")
12763 .replace("ss", "%S")
12764 .replace("SSSSSS", "%f")
12765 .replace("SSS", "%f")
12766 .replace("'T'", "T");
12767
12768 // For datetime target types: style is the INPUT format for parsing strings -> dates
12769 if is_datetime_type {
12770 match target {
12771 DialectType::DuckDB => {
12772 return Ok(Expression::Function(Box::new(
12773 Function::new(
12774 "STRPTIME".to_string(),
12775 vec![
12776 value_expr,
12777 Expression::string(&c_fmt),
12778 ],
12779 ),
12780 )));
12781 }
12782 DialectType::Spark
12783 | DialectType::Databricks => {
12784 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
12785 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
12786 let func_name =
12787 if matches!(dt, DataType::Date) {
12788 "TO_DATE"
12789 } else {
12790 "TO_TIMESTAMP"
12791 };
12792 return Ok(Expression::Function(Box::new(
12793 Function::new(
12794 func_name.to_string(),
12795 vec![
12796 value_expr,
12797 Expression::string(java_fmt),
12798 ],
12799 ),
12800 )));
12801 }
12802 DialectType::Hive => {
12803 return Ok(Expression::Function(Box::new(
12804 Function::new(
12805 "TO_TIMESTAMP".to_string(),
12806 vec![
12807 value_expr,
12808 Expression::string(java_fmt),
12809 ],
12810 ),
12811 )));
12812 }
12813 _ => {
12814 return Ok(Expression::Cast(Box::new(
12815 crate::expressions::Cast {
12816 this: value_expr,
12817 to: dt,
12818 trailing_comments: Vec::new(),
12819 double_colon_syntax: false,
12820 format: None,
12821 default: None,
12822 inferred_type: None,
12823 },
12824 )));
12825 }
12826 }
12827 }
12828
12829 // For string target types: style is the OUTPUT format for dates -> strings
12830 match target {
12831 DialectType::DuckDB => Ok(Expression::Function(
12832 Box::new(Function::new(
12833 "STRPTIME".to_string(),
12834 vec![
12835 value_expr,
12836 Expression::string(&c_fmt),
12837 ],
12838 )),
12839 )),
12840 DialectType::Spark | DialectType::Databricks => {
12841 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
12842 // Determine the target string type
12843 let string_dt = match &dt {
12844 DataType::VarChar {
12845 length: Some(l),
12846 ..
12847 } => DataType::VarChar {
12848 length: Some(*l),
12849 parenthesized_length: false,
12850 },
12851 DataType::Text => DataType::Custom {
12852 name: "STRING".to_string(),
12853 },
12854 _ => DataType::Custom {
12855 name: "STRING".to_string(),
12856 },
12857 };
12858 let date_format_expr = Expression::Function(
12859 Box::new(Function::new(
12860 "DATE_FORMAT".to_string(),
12861 vec![
12862 value_expr,
12863 Expression::string(java_fmt),
12864 ],
12865 )),
12866 );
12867 let cast_expr = if is_try {
12868 Expression::TryCast(Box::new(
12869 crate::expressions::Cast {
12870 this: date_format_expr,
12871 to: string_dt,
12872 trailing_comments: Vec::new(),
12873 double_colon_syntax: false,
12874 format: None,
12875 default: None,
12876 inferred_type: None,
12877 },
12878 ))
12879 } else {
12880 Expression::Cast(Box::new(
12881 crate::expressions::Cast {
12882 this: date_format_expr,
12883 to: string_dt,
12884 trailing_comments: Vec::new(),
12885 double_colon_syntax: false,
12886 format: None,
12887 default: None,
12888 inferred_type: None,
12889 },
12890 ))
12891 };
12892 Ok(cast_expr)
12893 }
12894 DialectType::MySQL | DialectType::SingleStore => {
12895 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
12896 let mysql_fmt = java_fmt
12897 .replace("yyyy", "%Y")
12898 .replace("MM", "%m")
12899 .replace("dd", "%d")
12900 .replace("HH:mm:ss.SSSSSS", "%T")
12901 .replace("HH:mm:ss", "%T")
12902 .replace("HH", "%H")
12903 .replace("mm", "%i")
12904 .replace("ss", "%S");
12905 let date_format_expr = Expression::Function(
12906 Box::new(Function::new(
12907 "DATE_FORMAT".to_string(),
12908 vec![
12909 value_expr,
12910 Expression::string(&mysql_fmt),
12911 ],
12912 )),
12913 );
12914 // MySQL uses CHAR for string casts
12915 let mysql_dt = match &dt {
12916 DataType::VarChar { length, .. } => {
12917 DataType::Char { length: *length }
12918 }
12919 _ => dt,
12920 };
12921 Ok(Expression::Cast(Box::new(
12922 crate::expressions::Cast {
12923 this: date_format_expr,
12924 to: mysql_dt,
12925 trailing_comments: Vec::new(),
12926 double_colon_syntax: false,
12927 format: None,
12928 default: None,
12929 inferred_type: None,
12930 },
12931 )))
12932 }
12933 DialectType::Hive => {
12934 let func_name = "TO_TIMESTAMP";
12935 Ok(Expression::Function(Box::new(
12936 Function::new(
12937 func_name.to_string(),
12938 vec![
12939 value_expr,
12940 Expression::string(java_fmt),
12941 ],
12942 ),
12943 )))
12944 }
12945 _ => Ok(Expression::Cast(Box::new(
12946 crate::expressions::Cast {
12947 this: value_expr,
12948 to: dt,
12949 trailing_comments: Vec::new(),
12950 double_colon_syntax: false,
12951 format: None,
12952 default: None,
12953 inferred_type: None,
12954 },
12955 ))),
12956 }
12957 } else {
12958 // Unknown style, just CAST
12959 let cast_expr = if is_try {
12960 Expression::TryCast(Box::new(
12961 crate::expressions::Cast {
12962 this: value_expr,
12963 to: dt,
12964 trailing_comments: Vec::new(),
12965 double_colon_syntax: false,
12966 format: None,
12967 default: None,
12968 inferred_type: None,
12969 },
12970 ))
12971 } else {
12972 Expression::Cast(Box::new(
12973 crate::expressions::Cast {
12974 this: value_expr,
12975 to: dt,
12976 trailing_comments: Vec::new(),
12977 double_colon_syntax: false,
12978 format: None,
12979 default: None,
12980 inferred_type: None,
12981 },
12982 ))
12983 };
12984 Ok(cast_expr)
12985 }
12986 } else {
12987 // No style - simple CAST
12988 let final_dt = if matches!(
12989 target,
12990 DialectType::MySQL | DialectType::SingleStore
12991 ) {
12992 match &dt {
12993 DataType::Int { .. }
12994 | DataType::BigInt { .. }
12995 | DataType::SmallInt { .. }
12996 | DataType::TinyInt { .. } => DataType::Custom {
12997 name: "SIGNED".to_string(),
12998 },
12999 DataType::VarChar { length, .. } => {
13000 DataType::Char { length: *length }
13001 }
13002 _ => dt,
13003 }
13004 } else {
13005 dt
13006 };
13007 let cast_expr = if is_try {
13008 Expression::TryCast(Box::new(
13009 crate::expressions::Cast {
13010 this: value_expr,
13011 to: final_dt,
13012 trailing_comments: Vec::new(),
13013 double_colon_syntax: false,
13014 format: None,
13015 default: None,
13016 inferred_type: None,
13017 },
13018 ))
13019 } else {
13020 Expression::Cast(Box::new(crate::expressions::Cast {
13021 this: value_expr,
13022 to: final_dt,
13023 trailing_comments: Vec::new(),
13024 double_colon_syntax: false,
13025 format: None,
13026 default: None,
13027 inferred_type: None,
13028 }))
13029 };
13030 Ok(cast_expr)
13031 }
13032 } else {
13033 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
13034 Ok(Expression::Function(f))
13035 }
13036 }
13037 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
13038 "STRFTIME" if f.args.len() == 2 => {
13039 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
13040 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
13041 // SQLite: args[0] = format, args[1] = value
13042 (f.args[1].clone(), &f.args[0])
13043 } else {
13044 // DuckDB and others: args[0] = value, args[1] = format
13045 (f.args[0].clone(), &f.args[1])
13046 };
13047
13048 // Helper to convert C-style format to Java-style
13049 fn c_to_java_format(fmt: &str) -> String {
13050 fmt.replace("%Y", "yyyy")
13051 .replace("%m", "MM")
13052 .replace("%d", "dd")
13053 .replace("%H", "HH")
13054 .replace("%M", "mm")
13055 .replace("%S", "ss")
13056 .replace("%f", "SSSSSS")
13057 .replace("%y", "yy")
13058 .replace("%-m", "M")
13059 .replace("%-d", "d")
13060 .replace("%-H", "H")
13061 .replace("%-I", "h")
13062 .replace("%I", "hh")
13063 .replace("%p", "a")
13064 .replace("%j", "DDD")
13065 .replace("%a", "EEE")
13066 .replace("%b", "MMM")
13067 .replace("%F", "yyyy-MM-dd")
13068 .replace("%T", "HH:mm:ss")
13069 }
13070
13071 // Helper: recursively convert format strings within expressions (handles CONCAT)
13072 fn convert_fmt_expr(
13073 expr: &Expression,
13074 converter: &dyn Fn(&str) -> String,
13075 ) -> Expression {
13076 match expr {
13077 Expression::Literal(
13078 crate::expressions::Literal::String(s),
13079 ) => Expression::string(&converter(s)),
13080 Expression::Function(func)
13081 if func.name.eq_ignore_ascii_case("CONCAT") =>
13082 {
13083 let new_args: Vec<Expression> = func
13084 .args
13085 .iter()
13086 .map(|a| convert_fmt_expr(a, converter))
13087 .collect();
13088 Expression::Function(Box::new(Function::new(
13089 "CONCAT".to_string(),
13090 new_args,
13091 )))
13092 }
13093 other => other.clone(),
13094 }
13095 }
13096
13097 match target {
13098 DialectType::DuckDB => {
13099 if matches!(source, DialectType::SQLite) {
13100 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
13101 let cast_val = Expression::Cast(Box::new(Cast {
13102 this: val,
13103 to: crate::expressions::DataType::Timestamp {
13104 precision: None,
13105 timezone: false,
13106 },
13107 trailing_comments: Vec::new(),
13108 double_colon_syntax: false,
13109 format: None,
13110 default: None,
13111 inferred_type: None,
13112 }));
13113 Ok(Expression::Function(Box::new(Function::new(
13114 "STRFTIME".to_string(),
13115 vec![cast_val, fmt_expr.clone()],
13116 ))))
13117 } else {
13118 Ok(Expression::Function(f))
13119 }
13120 }
13121 DialectType::Spark
13122 | DialectType::Databricks
13123 | DialectType::Hive => {
13124 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
13125 let converted_fmt =
13126 convert_fmt_expr(fmt_expr, &c_to_java_format);
13127 Ok(Expression::Function(Box::new(Function::new(
13128 "DATE_FORMAT".to_string(),
13129 vec![val, converted_fmt],
13130 ))))
13131 }
13132 DialectType::TSQL | DialectType::Fabric => {
13133 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
13134 let converted_fmt =
13135 convert_fmt_expr(fmt_expr, &c_to_java_format);
13136 Ok(Expression::Function(Box::new(Function::new(
13137 "FORMAT".to_string(),
13138 vec![val, converted_fmt],
13139 ))))
13140 }
13141 DialectType::Presto
13142 | DialectType::Trino
13143 | DialectType::Athena => {
13144 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
13145 if let Expression::Literal(
13146 crate::expressions::Literal::String(s),
13147 ) = fmt_expr
13148 {
13149 let presto_fmt = duckdb_to_presto_format(s);
13150 Ok(Expression::Function(Box::new(Function::new(
13151 "DATE_FORMAT".to_string(),
13152 vec![val, Expression::string(&presto_fmt)],
13153 ))))
13154 } else {
13155 Ok(Expression::Function(Box::new(Function::new(
13156 "DATE_FORMAT".to_string(),
13157 vec![val, fmt_expr.clone()],
13158 ))))
13159 }
13160 }
13161 DialectType::BigQuery => {
13162 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
13163 if let Expression::Literal(
13164 crate::expressions::Literal::String(s),
13165 ) = fmt_expr
13166 {
13167 let bq_fmt = duckdb_to_bigquery_format(s);
13168 Ok(Expression::Function(Box::new(Function::new(
13169 "FORMAT_DATE".to_string(),
13170 vec![Expression::string(&bq_fmt), val],
13171 ))))
13172 } else {
13173 Ok(Expression::Function(Box::new(Function::new(
13174 "FORMAT_DATE".to_string(),
13175 vec![fmt_expr.clone(), val],
13176 ))))
13177 }
13178 }
13179 DialectType::PostgreSQL | DialectType::Redshift => {
13180 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
13181 if let Expression::Literal(
13182 crate::expressions::Literal::String(s),
13183 ) = fmt_expr
13184 {
13185 let pg_fmt = s
13186 .replace("%Y", "YYYY")
13187 .replace("%m", "MM")
13188 .replace("%d", "DD")
13189 .replace("%H", "HH24")
13190 .replace("%M", "MI")
13191 .replace("%S", "SS")
13192 .replace("%y", "YY")
13193 .replace("%-m", "FMMM")
13194 .replace("%-d", "FMDD")
13195 .replace("%-H", "FMHH24")
13196 .replace("%-I", "FMHH12")
13197 .replace("%p", "AM")
13198 .replace("%F", "YYYY-MM-DD")
13199 .replace("%T", "HH24:MI:SS");
13200 Ok(Expression::Function(Box::new(Function::new(
13201 "TO_CHAR".to_string(),
13202 vec![val, Expression::string(&pg_fmt)],
13203 ))))
13204 } else {
13205 Ok(Expression::Function(Box::new(Function::new(
13206 "TO_CHAR".to_string(),
13207 vec![val, fmt_expr.clone()],
13208 ))))
13209 }
13210 }
13211 _ => Ok(Expression::Function(f)),
13212 }
13213 }
13214 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
13215 "STRPTIME" if f.args.len() == 2 => {
13216 let val = f.args[0].clone();
13217 let fmt_expr = &f.args[1];
13218
13219 fn c_to_java_format_parse(fmt: &str) -> String {
13220 fmt.replace("%Y", "yyyy")
13221 .replace("%m", "MM")
13222 .replace("%d", "dd")
13223 .replace("%H", "HH")
13224 .replace("%M", "mm")
13225 .replace("%S", "ss")
13226 .replace("%f", "SSSSSS")
13227 .replace("%y", "yy")
13228 .replace("%-m", "M")
13229 .replace("%-d", "d")
13230 .replace("%-H", "H")
13231 .replace("%-I", "h")
13232 .replace("%I", "hh")
13233 .replace("%p", "a")
13234 .replace("%F", "yyyy-MM-dd")
13235 .replace("%T", "HH:mm:ss")
13236 }
13237
13238 match target {
13239 DialectType::DuckDB => Ok(Expression::Function(f)),
13240 DialectType::Spark | DialectType::Databricks => {
13241 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
13242 if let Expression::Literal(
13243 crate::expressions::Literal::String(s),
13244 ) = fmt_expr
13245 {
13246 let java_fmt = c_to_java_format_parse(s);
13247 Ok(Expression::Function(Box::new(Function::new(
13248 "TO_TIMESTAMP".to_string(),
13249 vec![val, Expression::string(&java_fmt)],
13250 ))))
13251 } else {
13252 Ok(Expression::Function(Box::new(Function::new(
13253 "TO_TIMESTAMP".to_string(),
13254 vec![val, fmt_expr.clone()],
13255 ))))
13256 }
13257 }
13258 DialectType::Hive => {
13259 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
13260 if let Expression::Literal(
13261 crate::expressions::Literal::String(s),
13262 ) = fmt_expr
13263 {
13264 let java_fmt = c_to_java_format_parse(s);
13265 let unix_ts =
13266 Expression::Function(Box::new(Function::new(
13267 "UNIX_TIMESTAMP".to_string(),
13268 vec![val, Expression::string(&java_fmt)],
13269 )));
13270 let from_unix =
13271 Expression::Function(Box::new(Function::new(
13272 "FROM_UNIXTIME".to_string(),
13273 vec![unix_ts],
13274 )));
13275 Ok(Expression::Cast(Box::new(
13276 crate::expressions::Cast {
13277 this: from_unix,
13278 to: DataType::Timestamp {
13279 timezone: false,
13280 precision: None,
13281 },
13282 trailing_comments: Vec::new(),
13283 double_colon_syntax: false,
13284 format: None,
13285 default: None,
13286 inferred_type: None,
13287 },
13288 )))
13289 } else {
13290 Ok(Expression::Function(f))
13291 }
13292 }
13293 DialectType::Presto
13294 | DialectType::Trino
13295 | DialectType::Athena => {
13296 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
13297 if let Expression::Literal(
13298 crate::expressions::Literal::String(s),
13299 ) = fmt_expr
13300 {
13301 let presto_fmt = duckdb_to_presto_format(s);
13302 Ok(Expression::Function(Box::new(Function::new(
13303 "DATE_PARSE".to_string(),
13304 vec![val, Expression::string(&presto_fmt)],
13305 ))))
13306 } else {
13307 Ok(Expression::Function(Box::new(Function::new(
13308 "DATE_PARSE".to_string(),
13309 vec![val, fmt_expr.clone()],
13310 ))))
13311 }
13312 }
13313 DialectType::BigQuery => {
13314 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
13315 if let Expression::Literal(
13316 crate::expressions::Literal::String(s),
13317 ) = fmt_expr
13318 {
13319 let bq_fmt = duckdb_to_bigquery_format(s);
13320 Ok(Expression::Function(Box::new(Function::new(
13321 "PARSE_TIMESTAMP".to_string(),
13322 vec![Expression::string(&bq_fmt), val],
13323 ))))
13324 } else {
13325 Ok(Expression::Function(Box::new(Function::new(
13326 "PARSE_TIMESTAMP".to_string(),
13327 vec![fmt_expr.clone(), val],
13328 ))))
13329 }
13330 }
13331 _ => Ok(Expression::Function(f)),
13332 }
13333 }
13334 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
13335 "DATE_FORMAT"
13336 if f.args.len() >= 2
13337 && matches!(
13338 source,
13339 DialectType::Presto
13340 | DialectType::Trino
13341 | DialectType::Athena
13342 ) =>
13343 {
13344 let val = f.args[0].clone();
13345 let fmt_expr = &f.args[1];
13346
13347 match target {
13348 DialectType::Presto
13349 | DialectType::Trino
13350 | DialectType::Athena => {
13351 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
13352 if let Expression::Literal(
13353 crate::expressions::Literal::String(s),
13354 ) = fmt_expr
13355 {
13356 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
13357 Ok(Expression::Function(Box::new(Function::new(
13358 "DATE_FORMAT".to_string(),
13359 vec![val, Expression::string(&normalized)],
13360 ))))
13361 } else {
13362 Ok(Expression::Function(f))
13363 }
13364 }
13365 DialectType::Hive
13366 | DialectType::Spark
13367 | DialectType::Databricks => {
13368 // Convert Presto C-style to Java-style format
13369 if let Expression::Literal(
13370 crate::expressions::Literal::String(s),
13371 ) = fmt_expr
13372 {
13373 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
13374 Ok(Expression::Function(Box::new(Function::new(
13375 "DATE_FORMAT".to_string(),
13376 vec![val, Expression::string(&java_fmt)],
13377 ))))
13378 } else {
13379 Ok(Expression::Function(f))
13380 }
13381 }
13382 DialectType::DuckDB => {
13383 // Convert to STRFTIME(val, duckdb_fmt)
13384 if let Expression::Literal(
13385 crate::expressions::Literal::String(s),
13386 ) = fmt_expr
13387 {
13388 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
13389 Ok(Expression::Function(Box::new(Function::new(
13390 "STRFTIME".to_string(),
13391 vec![val, Expression::string(&duckdb_fmt)],
13392 ))))
13393 } else {
13394 Ok(Expression::Function(Box::new(Function::new(
13395 "STRFTIME".to_string(),
13396 vec![val, fmt_expr.clone()],
13397 ))))
13398 }
13399 }
13400 DialectType::BigQuery => {
13401 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
13402 if let Expression::Literal(
13403 crate::expressions::Literal::String(s),
13404 ) = fmt_expr
13405 {
13406 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
13407 Ok(Expression::Function(Box::new(Function::new(
13408 "FORMAT_DATE".to_string(),
13409 vec![Expression::string(&bq_fmt), val],
13410 ))))
13411 } else {
13412 Ok(Expression::Function(Box::new(Function::new(
13413 "FORMAT_DATE".to_string(),
13414 vec![fmt_expr.clone(), val],
13415 ))))
13416 }
13417 }
13418 _ => Ok(Expression::Function(f)),
13419 }
13420 }
13421 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
13422 "DATE_PARSE"
13423 if f.args.len() >= 2
13424 && matches!(
13425 source,
13426 DialectType::Presto
13427 | DialectType::Trino
13428 | DialectType::Athena
13429 ) =>
13430 {
13431 let val = f.args[0].clone();
13432 let fmt_expr = &f.args[1];
13433
13434 match target {
13435 DialectType::Presto
13436 | DialectType::Trino
13437 | DialectType::Athena => {
13438 // Presto -> Presto: normalize format
13439 if let Expression::Literal(
13440 crate::expressions::Literal::String(s),
13441 ) = fmt_expr
13442 {
13443 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
13444 Ok(Expression::Function(Box::new(Function::new(
13445 "DATE_PARSE".to_string(),
13446 vec![val, Expression::string(&normalized)],
13447 ))))
13448 } else {
13449 Ok(Expression::Function(f))
13450 }
13451 }
13452 DialectType::Hive => {
13453 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
13454 if let Expression::Literal(
13455 crate::expressions::Literal::String(s),
13456 ) = fmt_expr
13457 {
13458 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
13459 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
13460 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
13461 this: val,
13462 to: DataType::Timestamp { timezone: false, precision: None },
13463 trailing_comments: Vec::new(),
13464 double_colon_syntax: false,
13465 format: None,
13466 default: None,
13467 inferred_type: None,
13468 })))
13469 } else {
13470 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
13471 Ok(Expression::Function(Box::new(Function::new(
13472 "TO_TIMESTAMP".to_string(),
13473 vec![val, Expression::string(&java_fmt)],
13474 ))))
13475 }
13476 } else {
13477 Ok(Expression::Function(f))
13478 }
13479 }
13480 DialectType::Spark | DialectType::Databricks => {
13481 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
13482 if let Expression::Literal(
13483 crate::expressions::Literal::String(s),
13484 ) = fmt_expr
13485 {
13486 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
13487 Ok(Expression::Function(Box::new(Function::new(
13488 "TO_TIMESTAMP".to_string(),
13489 vec![val, Expression::string(&java_fmt)],
13490 ))))
13491 } else {
13492 Ok(Expression::Function(f))
13493 }
13494 }
13495 DialectType::DuckDB => {
13496 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
13497 if let Expression::Literal(
13498 crate::expressions::Literal::String(s),
13499 ) = fmt_expr
13500 {
13501 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
13502 Ok(Expression::Function(Box::new(Function::new(
13503 "STRPTIME".to_string(),
13504 vec![val, Expression::string(&duckdb_fmt)],
13505 ))))
13506 } else {
13507 Ok(Expression::Function(Box::new(Function::new(
13508 "STRPTIME".to_string(),
13509 vec![val, fmt_expr.clone()],
13510 ))))
13511 }
13512 }
13513 _ => Ok(Expression::Function(f)),
13514 }
13515 }
13516 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
13517 "FROM_BASE64"
13518 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
13519 {
13520 Ok(Expression::Function(Box::new(Function::new(
13521 "UNBASE64".to_string(),
13522 f.args,
13523 ))))
13524 }
13525 "TO_BASE64"
13526 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
13527 {
13528 Ok(Expression::Function(Box::new(Function::new(
13529 "BASE64".to_string(),
13530 f.args,
13531 ))))
13532 }
13533 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
13534 "FROM_UNIXTIME"
13535 if f.args.len() == 1
13536 && matches!(
13537 source,
13538 DialectType::Presto
13539 | DialectType::Trino
13540 | DialectType::Athena
13541 )
13542 && matches!(
13543 target,
13544 DialectType::Spark | DialectType::Databricks
13545 ) =>
13546 {
13547 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
13548 let from_unix = Expression::Function(Box::new(Function::new(
13549 "FROM_UNIXTIME".to_string(),
13550 f.args,
13551 )));
13552 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
13553 this: from_unix,
13554 to: DataType::Timestamp {
13555 timezone: false,
13556 precision: None,
13557 },
13558 trailing_comments: Vec::new(),
13559 double_colon_syntax: false,
13560 format: None,
13561 default: None,
13562 inferred_type: None,
13563 })))
13564 }
13565 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
13566 "DATE_FORMAT"
13567 if f.args.len() >= 2
13568 && !matches!(
13569 target,
13570 DialectType::Hive
13571 | DialectType::Spark
13572 | DialectType::Databricks
13573 | DialectType::MySQL
13574 | DialectType::SingleStore
13575 ) =>
13576 {
13577 let val = f.args[0].clone();
13578 let fmt_expr = &f.args[1];
13579 let is_hive_source = matches!(
13580 source,
13581 DialectType::Hive
13582 | DialectType::Spark
13583 | DialectType::Databricks
13584 );
13585
13586 fn java_to_c_format(fmt: &str) -> String {
13587 // Replace Java patterns with C strftime patterns.
13588 // Uses multi-pass to handle patterns that conflict.
13589 // First pass: replace multi-char patterns (longer first)
13590 let result = fmt
13591 .replace("yyyy", "%Y")
13592 .replace("SSSSSS", "%f")
13593 .replace("EEEE", "%W")
13594 .replace("MM", "%m")
13595 .replace("dd", "%d")
13596 .replace("HH", "%H")
13597 .replace("mm", "%M")
13598 .replace("ss", "%S")
13599 .replace("yy", "%y");
13600 // Second pass: handle single-char timezone patterns
13601 // z -> %Z (timezone name), Z -> %z (timezone offset)
13602 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
13603 let mut out = String::new();
13604 let chars: Vec<char> = result.chars().collect();
13605 let mut i = 0;
13606 while i < chars.len() {
13607 if chars[i] == '%' && i + 1 < chars.len() {
13608 // Already a format specifier, skip both chars
13609 out.push(chars[i]);
13610 out.push(chars[i + 1]);
13611 i += 2;
13612 } else if chars[i] == 'z' {
13613 out.push_str("%Z");
13614 i += 1;
13615 } else if chars[i] == 'Z' {
13616 out.push_str("%z");
13617 i += 1;
13618 } else {
13619 out.push(chars[i]);
13620 i += 1;
13621 }
13622 }
13623 out
13624 }
13625
13626 fn java_to_presto_format(fmt: &str) -> String {
13627 // Presto uses %T for HH:MM:SS
13628 let c_fmt = java_to_c_format(fmt);
13629 c_fmt.replace("%H:%M:%S", "%T")
13630 }
13631
13632 fn java_to_bq_format(fmt: &str) -> String {
13633 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
13634 let c_fmt = java_to_c_format(fmt);
13635 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
13636 }
13637
13638 // For Hive source, CAST string literals to appropriate type
13639 let cast_val = if is_hive_source {
13640 match &val {
13641 Expression::Literal(
13642 crate::expressions::Literal::String(_),
13643 ) => {
13644 match target {
13645 DialectType::DuckDB
13646 | DialectType::Presto
13647 | DialectType::Trino
13648 | DialectType::Athena => {
13649 Self::ensure_cast_timestamp(val.clone())
13650 }
13651 DialectType::BigQuery => {
13652 // BigQuery: CAST(val AS DATETIME)
13653 Expression::Cast(Box::new(
13654 crate::expressions::Cast {
13655 this: val.clone(),
13656 to: DataType::Custom {
13657 name: "DATETIME".to_string(),
13658 },
13659 trailing_comments: vec![],
13660 double_colon_syntax: false,
13661 format: None,
13662 default: None,
13663 inferred_type: None,
13664 },
13665 ))
13666 }
13667 _ => val.clone(),
13668 }
13669 }
13670 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
13671 Expression::Cast(c)
13672 if matches!(c.to, DataType::Date)
13673 && matches!(
13674 target,
13675 DialectType::Presto
13676 | DialectType::Trino
13677 | DialectType::Athena
13678 ) =>
13679 {
13680 Expression::Cast(Box::new(crate::expressions::Cast {
13681 this: val.clone(),
13682 to: DataType::Timestamp {
13683 timezone: false,
13684 precision: None,
13685 },
13686 trailing_comments: vec![],
13687 double_colon_syntax: false,
13688 format: None,
13689 default: None,
13690 inferred_type: None,
13691 }))
13692 }
13693 Expression::Literal(crate::expressions::Literal::Date(
13694 _,
13695 )) if matches!(
13696 target,
13697 DialectType::Presto
13698 | DialectType::Trino
13699 | DialectType::Athena
13700 ) =>
13701 {
13702 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
13703 let cast_date = Self::date_literal_to_cast(val.clone());
13704 Expression::Cast(Box::new(crate::expressions::Cast {
13705 this: cast_date,
13706 to: DataType::Timestamp {
13707 timezone: false,
13708 precision: None,
13709 },
13710 trailing_comments: vec![],
13711 double_colon_syntax: false,
13712 format: None,
13713 default: None,
13714 inferred_type: None,
13715 }))
13716 }
13717 _ => val.clone(),
13718 }
13719 } else {
13720 val.clone()
13721 };
13722
13723 match target {
13724 DialectType::DuckDB => {
13725 if let Expression::Literal(
13726 crate::expressions::Literal::String(s),
13727 ) = fmt_expr
13728 {
13729 let c_fmt = if is_hive_source {
13730 java_to_c_format(s)
13731 } else {
13732 s.clone()
13733 };
13734 Ok(Expression::Function(Box::new(Function::new(
13735 "STRFTIME".to_string(),
13736 vec![cast_val, Expression::string(&c_fmt)],
13737 ))))
13738 } else {
13739 Ok(Expression::Function(Box::new(Function::new(
13740 "STRFTIME".to_string(),
13741 vec![cast_val, fmt_expr.clone()],
13742 ))))
13743 }
13744 }
13745 DialectType::Presto
13746 | DialectType::Trino
13747 | DialectType::Athena => {
13748 if is_hive_source {
13749 if let Expression::Literal(
13750 crate::expressions::Literal::String(s),
13751 ) = fmt_expr
13752 {
13753 let p_fmt = java_to_presto_format(s);
13754 Ok(Expression::Function(Box::new(Function::new(
13755 "DATE_FORMAT".to_string(),
13756 vec![cast_val, Expression::string(&p_fmt)],
13757 ))))
13758 } else {
13759 Ok(Expression::Function(Box::new(Function::new(
13760 "DATE_FORMAT".to_string(),
13761 vec![cast_val, fmt_expr.clone()],
13762 ))))
13763 }
13764 } else {
13765 Ok(Expression::Function(Box::new(Function::new(
13766 "DATE_FORMAT".to_string(),
13767 f.args,
13768 ))))
13769 }
13770 }
13771 DialectType::BigQuery => {
13772 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
13773 if let Expression::Literal(
13774 crate::expressions::Literal::String(s),
13775 ) = fmt_expr
13776 {
13777 let bq_fmt = if is_hive_source {
13778 java_to_bq_format(s)
13779 } else {
13780 java_to_c_format(s)
13781 };
13782 Ok(Expression::Function(Box::new(Function::new(
13783 "FORMAT_DATE".to_string(),
13784 vec![Expression::string(&bq_fmt), cast_val],
13785 ))))
13786 } else {
13787 Ok(Expression::Function(Box::new(Function::new(
13788 "FORMAT_DATE".to_string(),
13789 vec![fmt_expr.clone(), cast_val],
13790 ))))
13791 }
13792 }
13793 DialectType::PostgreSQL | DialectType::Redshift => {
13794 if let Expression::Literal(
13795 crate::expressions::Literal::String(s),
13796 ) = fmt_expr
13797 {
13798 let pg_fmt = s
13799 .replace("yyyy", "YYYY")
13800 .replace("MM", "MM")
13801 .replace("dd", "DD")
13802 .replace("HH", "HH24")
13803 .replace("mm", "MI")
13804 .replace("ss", "SS")
13805 .replace("yy", "YY");
13806 Ok(Expression::Function(Box::new(Function::new(
13807 "TO_CHAR".to_string(),
13808 vec![val, Expression::string(&pg_fmt)],
13809 ))))
13810 } else {
13811 Ok(Expression::Function(Box::new(Function::new(
13812 "TO_CHAR".to_string(),
13813 vec![val, fmt_expr.clone()],
13814 ))))
13815 }
13816 }
13817 _ => Ok(Expression::Function(f)),
13818 }
13819 }
13820 // DATEDIFF(unit, start, end) - 3-arg form
13821 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
13822 "DATEDIFF" if f.args.len() == 3 => {
13823 let mut args = f.args;
13824 // SQLite source: args = (date1, date2, unit_string)
13825 // Standard source: args = (unit, start, end)
13826 let (_arg0, arg1, arg2, unit_str) =
13827 if matches!(source, DialectType::SQLite) {
13828 let date1 = args.remove(0);
13829 let date2 = args.remove(0);
13830 let unit_expr = args.remove(0);
13831 let unit_s = Self::get_unit_str_static(&unit_expr);
13832
13833 // For SQLite target, generate JULIANDAY arithmetic directly
13834 if matches!(target, DialectType::SQLite) {
13835 let jd_first = Expression::Function(Box::new(
13836 Function::new("JULIANDAY".to_string(), vec![date1]),
13837 ));
13838 let jd_second = Expression::Function(Box::new(
13839 Function::new("JULIANDAY".to_string(), vec![date2]),
13840 ));
13841 let diff = Expression::Sub(Box::new(
13842 crate::expressions::BinaryOp::new(
13843 jd_first, jd_second,
13844 ),
13845 ));
13846 let paren_diff = Expression::Paren(Box::new(
13847 crate::expressions::Paren {
13848 this: diff,
13849 trailing_comments: Vec::new(),
13850 },
13851 ));
13852 let adjusted = match unit_s.as_str() {
13853 "HOUR" => Expression::Mul(Box::new(
13854 crate::expressions::BinaryOp::new(
13855 paren_diff,
13856 Expression::Literal(Literal::Number(
13857 "24.0".to_string(),
13858 )),
13859 ),
13860 )),
13861 "MINUTE" => Expression::Mul(Box::new(
13862 crate::expressions::BinaryOp::new(
13863 paren_diff,
13864 Expression::Literal(Literal::Number(
13865 "1440.0".to_string(),
13866 )),
13867 ),
13868 )),
13869 "SECOND" => Expression::Mul(Box::new(
13870 crate::expressions::BinaryOp::new(
13871 paren_diff,
13872 Expression::Literal(Literal::Number(
13873 "86400.0".to_string(),
13874 )),
13875 ),
13876 )),
13877 "MONTH" => Expression::Div(Box::new(
13878 crate::expressions::BinaryOp::new(
13879 paren_diff,
13880 Expression::Literal(Literal::Number(
13881 "30.0".to_string(),
13882 )),
13883 ),
13884 )),
13885 "YEAR" => Expression::Div(Box::new(
13886 crate::expressions::BinaryOp::new(
13887 paren_diff,
13888 Expression::Literal(Literal::Number(
13889 "365.0".to_string(),
13890 )),
13891 ),
13892 )),
13893 _ => paren_diff,
13894 };
13895 return Ok(Expression::Cast(Box::new(Cast {
13896 this: adjusted,
13897 to: DataType::Int {
13898 length: None,
13899 integer_spelling: true,
13900 },
13901 trailing_comments: vec![],
13902 double_colon_syntax: false,
13903 format: None,
13904 default: None,
13905 inferred_type: None,
13906 })));
13907 }
13908
13909 // For other targets, remap to standard (unit, start, end) form
13910 let unit_ident =
13911 Expression::Identifier(Identifier::new(&unit_s));
13912 (unit_ident, date1, date2, unit_s)
13913 } else {
13914 let arg0 = args.remove(0);
13915 let arg1 = args.remove(0);
13916 let arg2 = args.remove(0);
13917 let unit_s = Self::get_unit_str_static(&arg0);
13918 (arg0, arg1, arg2, unit_s)
13919 };
13920
13921 // For Hive/Spark source, string literal dates need to be cast
13922 // Note: Databricks is excluded - it handles string args like standard SQL
13923 let is_hive_spark =
13924 matches!(source, DialectType::Hive | DialectType::Spark);
13925
13926 match target {
13927 DialectType::Snowflake => {
13928 let unit =
13929 Expression::Identifier(Identifier::new(&unit_str));
13930 // Use ensure_to_date_preserved to add TO_DATE with a marker
13931 // that prevents the Snowflake TO_DATE handler from converting it to CAST
13932 let d1 = if is_hive_spark {
13933 Self::ensure_to_date_preserved(arg1)
13934 } else {
13935 arg1
13936 };
13937 let d2 = if is_hive_spark {
13938 Self::ensure_to_date_preserved(arg2)
13939 } else {
13940 arg2
13941 };
13942 Ok(Expression::Function(Box::new(Function::new(
13943 "DATEDIFF".to_string(),
13944 vec![unit, d1, d2],
13945 ))))
13946 }
13947 DialectType::Redshift => {
13948 let unit =
13949 Expression::Identifier(Identifier::new(&unit_str));
13950 let d1 = if is_hive_spark {
13951 Self::ensure_cast_date(arg1)
13952 } else {
13953 arg1
13954 };
13955 let d2 = if is_hive_spark {
13956 Self::ensure_cast_date(arg2)
13957 } else {
13958 arg2
13959 };
13960 Ok(Expression::Function(Box::new(Function::new(
13961 "DATEDIFF".to_string(),
13962 vec![unit, d1, d2],
13963 ))))
13964 }
13965 DialectType::TSQL => {
13966 let unit =
13967 Expression::Identifier(Identifier::new(&unit_str));
13968 Ok(Expression::Function(Box::new(Function::new(
13969 "DATEDIFF".to_string(),
13970 vec![unit, arg1, arg2],
13971 ))))
13972 }
13973 DialectType::DuckDB => {
13974 let is_redshift_tsql = matches!(
13975 source,
13976 DialectType::Redshift | DialectType::TSQL
13977 );
13978 if is_hive_spark {
13979 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
13980 let d1 = Self::ensure_cast_date(arg1);
13981 let d2 = Self::ensure_cast_date(arg2);
13982 Ok(Expression::Function(Box::new(Function::new(
13983 "DATE_DIFF".to_string(),
13984 vec![Expression::string(&unit_str), d1, d2],
13985 ))))
13986 } else if matches!(source, DialectType::Snowflake) {
13987 // For Snowflake source: special handling per unit
13988 match unit_str.as_str() {
13989 "NANOSECOND" => {
13990 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
13991 fn cast_to_timestamp_ns(
13992 expr: Expression,
13993 ) -> Expression
13994 {
13995 Expression::Cast(Box::new(Cast {
13996 this: expr,
13997 to: DataType::Custom {
13998 name: "TIMESTAMP_NS".to_string(),
13999 },
14000 trailing_comments: vec![],
14001 double_colon_syntax: false,
14002 format: None,
14003 default: None,
14004 inferred_type: None,
14005 }))
14006 }
14007 let epoch_end = Expression::Function(Box::new(
14008 Function::new(
14009 "EPOCH_NS".to_string(),
14010 vec![cast_to_timestamp_ns(arg2)],
14011 ),
14012 ));
14013 let epoch_start = Expression::Function(
14014 Box::new(Function::new(
14015 "EPOCH_NS".to_string(),
14016 vec![cast_to_timestamp_ns(arg1)],
14017 )),
14018 );
14019 Ok(Expression::Sub(Box::new(BinaryOp::new(
14020 epoch_end,
14021 epoch_start,
14022 ))))
14023 }
14024 "WEEK" => {
14025 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
14026 let d1 = Self::force_cast_date(arg1);
14027 let d2 = Self::force_cast_date(arg2);
14028 let dt1 = Expression::Function(Box::new(
14029 Function::new(
14030 "DATE_TRUNC".to_string(),
14031 vec![Expression::string("WEEK"), d1],
14032 ),
14033 ));
14034 let dt2 = Expression::Function(Box::new(
14035 Function::new(
14036 "DATE_TRUNC".to_string(),
14037 vec![Expression::string("WEEK"), d2],
14038 ),
14039 ));
14040 Ok(Expression::Function(Box::new(
14041 Function::new(
14042 "DATE_DIFF".to_string(),
14043 vec![
14044 Expression::string(&unit_str),
14045 dt1,
14046 dt2,
14047 ],
14048 ),
14049 )))
14050 }
14051 _ => {
14052 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
14053 let d1 = Self::force_cast_date(arg1);
14054 let d2 = Self::force_cast_date(arg2);
14055 Ok(Expression::Function(Box::new(
14056 Function::new(
14057 "DATE_DIFF".to_string(),
14058 vec![
14059 Expression::string(&unit_str),
14060 d1,
14061 d2,
14062 ],
14063 ),
14064 )))
14065 }
14066 }
14067 } else if is_redshift_tsql {
14068 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
14069 let d1 = Self::force_cast_timestamp(arg1);
14070 let d2 = Self::force_cast_timestamp(arg2);
14071 Ok(Expression::Function(Box::new(Function::new(
14072 "DATE_DIFF".to_string(),
14073 vec![Expression::string(&unit_str), d1, d2],
14074 ))))
14075 } else {
14076 // Keep as DATEDIFF so DuckDB's transform_datediff handles
14077 // DATE_TRUNC for WEEK, CAST for string literals, etc.
14078 let unit =
14079 Expression::Identifier(Identifier::new(&unit_str));
14080 Ok(Expression::Function(Box::new(Function::new(
14081 "DATEDIFF".to_string(),
14082 vec![unit, arg1, arg2],
14083 ))))
14084 }
14085 }
14086 DialectType::BigQuery => {
14087 let is_redshift_tsql = matches!(
14088 source,
14089 DialectType::Redshift
14090 | DialectType::TSQL
14091 | DialectType::Snowflake
14092 );
14093 let cast_d1 = if is_hive_spark {
14094 Self::ensure_cast_date(arg1)
14095 } else if is_redshift_tsql {
14096 Self::force_cast_datetime(arg1)
14097 } else {
14098 Self::ensure_cast_datetime(arg1)
14099 };
14100 let cast_d2 = if is_hive_spark {
14101 Self::ensure_cast_date(arg2)
14102 } else if is_redshift_tsql {
14103 Self::force_cast_datetime(arg2)
14104 } else {
14105 Self::ensure_cast_datetime(arg2)
14106 };
14107 let unit =
14108 Expression::Identifier(Identifier::new(&unit_str));
14109 Ok(Expression::Function(Box::new(Function::new(
14110 "DATE_DIFF".to_string(),
14111 vec![cast_d2, cast_d1, unit],
14112 ))))
14113 }
14114 DialectType::Presto
14115 | DialectType::Trino
14116 | DialectType::Athena => {
14117 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
14118 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
14119 let is_redshift_tsql = matches!(
14120 source,
14121 DialectType::Redshift
14122 | DialectType::TSQL
14123 | DialectType::Snowflake
14124 );
14125 let d1 = if is_hive_spark {
14126 Self::double_cast_timestamp_date(arg1)
14127 } else if is_redshift_tsql {
14128 Self::force_cast_timestamp(arg1)
14129 } else {
14130 arg1
14131 };
14132 let d2 = if is_hive_spark {
14133 Self::double_cast_timestamp_date(arg2)
14134 } else if is_redshift_tsql {
14135 Self::force_cast_timestamp(arg2)
14136 } else {
14137 arg2
14138 };
14139 Ok(Expression::Function(Box::new(Function::new(
14140 "DATE_DIFF".to_string(),
14141 vec![Expression::string(&unit_str), d1, d2],
14142 ))))
14143 }
14144 DialectType::Hive => match unit_str.as_str() {
14145 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
14146 this: Expression::Function(Box::new(Function::new(
14147 "MONTHS_BETWEEN".to_string(),
14148 vec![arg2, arg1],
14149 ))),
14150 to: DataType::Int {
14151 length: None,
14152 integer_spelling: false,
14153 },
14154 trailing_comments: vec![],
14155 double_colon_syntax: false,
14156 format: None,
14157 default: None,
14158 inferred_type: None,
14159 }))),
14160 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
14161 this: Expression::Div(Box::new(
14162 crate::expressions::BinaryOp::new(
14163 Expression::Function(Box::new(Function::new(
14164 "DATEDIFF".to_string(),
14165 vec![arg2, arg1],
14166 ))),
14167 Expression::number(7),
14168 ),
14169 )),
14170 to: DataType::Int {
14171 length: None,
14172 integer_spelling: false,
14173 },
14174 trailing_comments: vec![],
14175 double_colon_syntax: false,
14176 format: None,
14177 default: None,
14178 inferred_type: None,
14179 }))),
14180 _ => Ok(Expression::Function(Box::new(Function::new(
14181 "DATEDIFF".to_string(),
14182 vec![arg2, arg1],
14183 )))),
14184 },
14185 DialectType::Spark | DialectType::Databricks => {
14186 let unit =
14187 Expression::Identifier(Identifier::new(&unit_str));
14188 Ok(Expression::Function(Box::new(Function::new(
14189 "DATEDIFF".to_string(),
14190 vec![unit, arg1, arg2],
14191 ))))
14192 }
14193 _ => {
14194 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
14195 let d1 = if is_hive_spark {
14196 Self::ensure_cast_date(arg1)
14197 } else {
14198 arg1
14199 };
14200 let d2 = if is_hive_spark {
14201 Self::ensure_cast_date(arg2)
14202 } else {
14203 arg2
14204 };
14205 let unit =
14206 Expression::Identifier(Identifier::new(&unit_str));
14207 Ok(Expression::Function(Box::new(Function::new(
14208 "DATEDIFF".to_string(),
14209 vec![unit, d1, d2],
14210 ))))
14211 }
14212 }
14213 }
14214 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
14215 "DATEDIFF" if f.args.len() == 2 => {
14216 let mut args = f.args;
14217 let arg0 = args.remove(0);
14218 let arg1 = args.remove(0);
14219
14220 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
14221 // Also recognizes TryCast/Cast to DATE that may have been produced by
14222 // cross-dialect TO_DATE -> TRY_CAST conversion
14223 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
14224 if let Expression::Function(ref f) = e {
14225 if f.name.eq_ignore_ascii_case("TO_DATE")
14226 && f.args.len() == 1
14227 {
14228 return (f.args[0].clone(), true);
14229 }
14230 }
14231 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
14232 if let Expression::TryCast(ref c) = e {
14233 if matches!(c.to, DataType::Date) {
14234 return (e, true); // Already properly cast, return as-is
14235 }
14236 }
14237 (e, false)
14238 };
14239
14240 match target {
14241 DialectType::DuckDB => {
14242 // For Hive source, always CAST to DATE
14243 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
14244 let cast_d0 = if matches!(
14245 source,
14246 DialectType::Hive
14247 | DialectType::Spark
14248 | DialectType::Databricks
14249 ) {
14250 let (inner, was_to_date) = unwrap_to_date(arg1);
14251 if was_to_date {
14252 // Already a date expression, use directly
14253 if matches!(&inner, Expression::TryCast(_)) {
14254 inner // Already TRY_CAST(x AS DATE)
14255 } else {
14256 Self::try_cast_date(inner)
14257 }
14258 } else {
14259 Self::force_cast_date(inner)
14260 }
14261 } else {
14262 Self::ensure_cast_date(arg1)
14263 };
14264 let cast_d1 = if matches!(
14265 source,
14266 DialectType::Hive
14267 | DialectType::Spark
14268 | DialectType::Databricks
14269 ) {
14270 let (inner, was_to_date) = unwrap_to_date(arg0);
14271 if was_to_date {
14272 if matches!(&inner, Expression::TryCast(_)) {
14273 inner
14274 } else {
14275 Self::try_cast_date(inner)
14276 }
14277 } else {
14278 Self::force_cast_date(inner)
14279 }
14280 } else {
14281 Self::ensure_cast_date(arg0)
14282 };
14283 Ok(Expression::Function(Box::new(Function::new(
14284 "DATE_DIFF".to_string(),
14285 vec![Expression::string("DAY"), cast_d0, cast_d1],
14286 ))))
14287 }
14288 DialectType::Presto
14289 | DialectType::Trino
14290 | DialectType::Athena => {
14291 // For Hive/Spark source, apply double_cast_timestamp_date
14292 // For other sources (MySQL etc.), just swap args without casting
14293 if matches!(
14294 source,
14295 DialectType::Hive
14296 | DialectType::Spark
14297 | DialectType::Databricks
14298 ) {
14299 let cast_fn = |e: Expression| -> Expression {
14300 let (inner, was_to_date) = unwrap_to_date(e);
14301 if was_to_date {
14302 let first_cast =
14303 Self::double_cast_timestamp_date(inner);
14304 Self::double_cast_timestamp_date(first_cast)
14305 } else {
14306 Self::double_cast_timestamp_date(inner)
14307 }
14308 };
14309 Ok(Expression::Function(Box::new(Function::new(
14310 "DATE_DIFF".to_string(),
14311 vec![
14312 Expression::string("DAY"),
14313 cast_fn(arg1),
14314 cast_fn(arg0),
14315 ],
14316 ))))
14317 } else {
14318 Ok(Expression::Function(Box::new(Function::new(
14319 "DATE_DIFF".to_string(),
14320 vec![Expression::string("DAY"), arg1, arg0],
14321 ))))
14322 }
14323 }
14324 DialectType::Redshift => {
14325 let unit = Expression::Identifier(Identifier::new("DAY"));
14326 Ok(Expression::Function(Box::new(Function::new(
14327 "DATEDIFF".to_string(),
14328 vec![unit, arg1, arg0],
14329 ))))
14330 }
14331 _ => Ok(Expression::Function(Box::new(Function::new(
14332 "DATEDIFF".to_string(),
14333 vec![arg0, arg1],
14334 )))),
14335 }
14336 }
14337 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
14338 "DATE_DIFF" if f.args.len() == 3 => {
14339 let mut args = f.args;
14340 let arg0 = args.remove(0);
14341 let arg1 = args.remove(0);
14342 let arg2 = args.remove(0);
14343 let unit_str = Self::get_unit_str_static(&arg0);
14344
14345 match target {
14346 DialectType::DuckDB => {
14347 // DuckDB: DATE_DIFF('UNIT', start, end)
14348 Ok(Expression::Function(Box::new(Function::new(
14349 "DATE_DIFF".to_string(),
14350 vec![Expression::string(&unit_str), arg1, arg2],
14351 ))))
14352 }
14353 DialectType::Presto
14354 | DialectType::Trino
14355 | DialectType::Athena => {
14356 Ok(Expression::Function(Box::new(Function::new(
14357 "DATE_DIFF".to_string(),
14358 vec![Expression::string(&unit_str), arg1, arg2],
14359 ))))
14360 }
14361 DialectType::ClickHouse => {
14362 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
14363 let unit =
14364 Expression::Identifier(Identifier::new(&unit_str));
14365 Ok(Expression::Function(Box::new(Function::new(
14366 "DATE_DIFF".to_string(),
14367 vec![unit, arg1, arg2],
14368 ))))
14369 }
14370 DialectType::Snowflake | DialectType::Redshift => {
14371 let unit =
14372 Expression::Identifier(Identifier::new(&unit_str));
14373 Ok(Expression::Function(Box::new(Function::new(
14374 "DATEDIFF".to_string(),
14375 vec![unit, arg1, arg2],
14376 ))))
14377 }
14378 _ => {
14379 let unit =
14380 Expression::Identifier(Identifier::new(&unit_str));
14381 Ok(Expression::Function(Box::new(Function::new(
14382 "DATEDIFF".to_string(),
14383 vec![unit, arg1, arg2],
14384 ))))
14385 }
14386 }
14387 }
14388 // DATEADD(unit, val, date) - 3-arg form
14389 "DATEADD" if f.args.len() == 3 => {
14390 let mut args = f.args;
14391 let arg0 = args.remove(0);
14392 let arg1 = args.remove(0);
14393 let arg2 = args.remove(0);
14394 let unit_str = Self::get_unit_str_static(&arg0);
14395
14396 // Normalize TSQL unit abbreviations to standard names
14397 let unit_str = match unit_str.as_str() {
14398 "YY" | "YYYY" => "YEAR".to_string(),
14399 "QQ" | "Q" => "QUARTER".to_string(),
14400 "MM" | "M" => "MONTH".to_string(),
14401 "WK" | "WW" => "WEEK".to_string(),
14402 "DD" | "D" | "DY" => "DAY".to_string(),
14403 "HH" => "HOUR".to_string(),
14404 "MI" | "N" => "MINUTE".to_string(),
14405 "SS" | "S" => "SECOND".to_string(),
14406 "MS" => "MILLISECOND".to_string(),
14407 "MCS" | "US" => "MICROSECOND".to_string(),
14408 _ => unit_str,
14409 };
14410 match target {
14411 DialectType::Snowflake => {
14412 let unit =
14413 Expression::Identifier(Identifier::new(&unit_str));
14414 // Cast string literal to TIMESTAMP, but not for Snowflake source
14415 // (Snowflake natively accepts string literals in DATEADD)
14416 let arg2 = if matches!(
14417 &arg2,
14418 Expression::Literal(Literal::String(_))
14419 ) && !matches!(source, DialectType::Snowflake)
14420 {
14421 Expression::Cast(Box::new(Cast {
14422 this: arg2,
14423 to: DataType::Timestamp {
14424 precision: None,
14425 timezone: false,
14426 },
14427 trailing_comments: Vec::new(),
14428 double_colon_syntax: false,
14429 format: None,
14430 default: None,
14431 inferred_type: None,
14432 }))
14433 } else {
14434 arg2
14435 };
14436 Ok(Expression::Function(Box::new(Function::new(
14437 "DATEADD".to_string(),
14438 vec![unit, arg1, arg2],
14439 ))))
14440 }
14441 DialectType::TSQL => {
14442 let unit =
14443 Expression::Identifier(Identifier::new(&unit_str));
14444 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
14445 let arg2 = if matches!(
14446 &arg2,
14447 Expression::Literal(Literal::String(_))
14448 ) && !matches!(
14449 source,
14450 DialectType::Spark
14451 | DialectType::Databricks
14452 | DialectType::Hive
14453 ) {
14454 Expression::Cast(Box::new(Cast {
14455 this: arg2,
14456 to: DataType::Custom {
14457 name: "DATETIME2".to_string(),
14458 },
14459 trailing_comments: Vec::new(),
14460 double_colon_syntax: false,
14461 format: None,
14462 default: None,
14463 inferred_type: None,
14464 }))
14465 } else {
14466 arg2
14467 };
14468 Ok(Expression::Function(Box::new(Function::new(
14469 "DATEADD".to_string(),
14470 vec![unit, arg1, arg2],
14471 ))))
14472 }
14473 DialectType::Redshift => {
14474 let unit =
14475 Expression::Identifier(Identifier::new(&unit_str));
14476 Ok(Expression::Function(Box::new(Function::new(
14477 "DATEADD".to_string(),
14478 vec![unit, arg1, arg2],
14479 ))))
14480 }
14481 DialectType::Databricks => {
14482 let unit =
14483 Expression::Identifier(Identifier::new(&unit_str));
14484 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
14485 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
14486 let func_name = if matches!(
14487 source,
14488 DialectType::TSQL
14489 | DialectType::Fabric
14490 | DialectType::Databricks
14491 | DialectType::Snowflake
14492 ) {
14493 "DATEADD"
14494 } else {
14495 "DATE_ADD"
14496 };
14497 Ok(Expression::Function(Box::new(Function::new(
14498 func_name.to_string(),
14499 vec![unit, arg1, arg2],
14500 ))))
14501 }
14502 DialectType::DuckDB => {
14503 // Special handling for NANOSECOND from Snowflake
14504 if unit_str == "NANOSECOND"
14505 && matches!(source, DialectType::Snowflake)
14506 {
14507 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
14508 let cast_ts = Expression::Cast(Box::new(Cast {
14509 this: arg2,
14510 to: DataType::Custom {
14511 name: "TIMESTAMP_NS".to_string(),
14512 },
14513 trailing_comments: vec![],
14514 double_colon_syntax: false,
14515 format: None,
14516 default: None,
14517 inferred_type: None,
14518 }));
14519 let epoch_ns =
14520 Expression::Function(Box::new(Function::new(
14521 "EPOCH_NS".to_string(),
14522 vec![cast_ts],
14523 )));
14524 let sum = Expression::Add(Box::new(BinaryOp::new(
14525 epoch_ns, arg1,
14526 )));
14527 Ok(Expression::Function(Box::new(Function::new(
14528 "MAKE_TIMESTAMP_NS".to_string(),
14529 vec![sum],
14530 ))))
14531 } else {
14532 // DuckDB: convert to date + INTERVAL syntax with CAST
14533 let iu = Self::parse_interval_unit_static(&unit_str);
14534 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
14535 this: Some(arg1),
14536 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
14537 }));
14538 // Cast string literal to TIMESTAMP
14539 let arg2 = if matches!(
14540 &arg2,
14541 Expression::Literal(Literal::String(_))
14542 ) {
14543 Expression::Cast(Box::new(Cast {
14544 this: arg2,
14545 to: DataType::Timestamp {
14546 precision: None,
14547 timezone: false,
14548 },
14549 trailing_comments: Vec::new(),
14550 double_colon_syntax: false,
14551 format: None,
14552 default: None,
14553 inferred_type: None,
14554 }))
14555 } else {
14556 arg2
14557 };
14558 Ok(Expression::Add(Box::new(
14559 crate::expressions::BinaryOp::new(arg2, interval),
14560 )))
14561 }
14562 }
14563 DialectType::Spark => {
14564 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
14565 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
14566 if matches!(source, DialectType::TSQL | DialectType::Fabric)
14567 {
14568 fn multiply_expr_spark(
14569 expr: Expression,
14570 factor: i64,
14571 ) -> Expression
14572 {
14573 if let Expression::Literal(
14574 crate::expressions::Literal::Number(n),
14575 ) = &expr
14576 {
14577 if let Ok(val) = n.parse::<i64>() {
14578 return Expression::Literal(
14579 crate::expressions::Literal::Number(
14580 (val * factor).to_string(),
14581 ),
14582 );
14583 }
14584 }
14585 Expression::Mul(Box::new(
14586 crate::expressions::BinaryOp::new(
14587 expr,
14588 Expression::Literal(
14589 crate::expressions::Literal::Number(
14590 factor.to_string(),
14591 ),
14592 ),
14593 ),
14594 ))
14595 }
14596 let normalized_unit = match unit_str.as_str() {
14597 "YEAR" | "YY" | "YYYY" => "YEAR",
14598 "QUARTER" | "QQ" | "Q" => "QUARTER",
14599 "MONTH" | "MM" | "M" => "MONTH",
14600 "WEEK" | "WK" | "WW" => "WEEK",
14601 "DAY" | "DD" | "D" | "DY" => "DAY",
14602 _ => &unit_str,
14603 };
14604 match normalized_unit {
14605 "YEAR" => {
14606 let months = multiply_expr_spark(arg1, 12);
14607 Ok(Expression::Function(Box::new(
14608 Function::new(
14609 "ADD_MONTHS".to_string(),
14610 vec![arg2, months],
14611 ),
14612 )))
14613 }
14614 "QUARTER" => {
14615 let months = multiply_expr_spark(arg1, 3);
14616 Ok(Expression::Function(Box::new(
14617 Function::new(
14618 "ADD_MONTHS".to_string(),
14619 vec![arg2, months],
14620 ),
14621 )))
14622 }
14623 "MONTH" => Ok(Expression::Function(Box::new(
14624 Function::new(
14625 "ADD_MONTHS".to_string(),
14626 vec![arg2, arg1],
14627 ),
14628 ))),
14629 "WEEK" => {
14630 let days = multiply_expr_spark(arg1, 7);
14631 Ok(Expression::Function(Box::new(
14632 Function::new(
14633 "DATE_ADD".to_string(),
14634 vec![arg2, days],
14635 ),
14636 )))
14637 }
14638 "DAY" => Ok(Expression::Function(Box::new(
14639 Function::new(
14640 "DATE_ADD".to_string(),
14641 vec![arg2, arg1],
14642 ),
14643 ))),
14644 _ => {
14645 let unit = Expression::Identifier(
14646 Identifier::new(&unit_str),
14647 );
14648 Ok(Expression::Function(Box::new(
14649 Function::new(
14650 "DATE_ADD".to_string(),
14651 vec![unit, arg1, arg2],
14652 ),
14653 )))
14654 }
14655 }
14656 } else {
14657 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
14658 let unit =
14659 Expression::Identifier(Identifier::new(&unit_str));
14660 Ok(Expression::Function(Box::new(Function::new(
14661 "DATE_ADD".to_string(),
14662 vec![unit, arg1, arg2],
14663 ))))
14664 }
14665 }
14666 DialectType::Hive => match unit_str.as_str() {
14667 "MONTH" => {
14668 Ok(Expression::Function(Box::new(Function::new(
14669 "ADD_MONTHS".to_string(),
14670 vec![arg2, arg1],
14671 ))))
14672 }
14673 _ => Ok(Expression::Function(Box::new(Function::new(
14674 "DATE_ADD".to_string(),
14675 vec![arg2, arg1],
14676 )))),
14677 },
14678 DialectType::Presto
14679 | DialectType::Trino
14680 | DialectType::Athena => {
14681 // Cast string literal date to TIMESTAMP
14682 let arg2 = if matches!(
14683 &arg2,
14684 Expression::Literal(Literal::String(_))
14685 ) {
14686 Expression::Cast(Box::new(Cast {
14687 this: arg2,
14688 to: DataType::Timestamp {
14689 precision: None,
14690 timezone: false,
14691 },
14692 trailing_comments: Vec::new(),
14693 double_colon_syntax: false,
14694 format: None,
14695 default: None,
14696 inferred_type: None,
14697 }))
14698 } else {
14699 arg2
14700 };
14701 Ok(Expression::Function(Box::new(Function::new(
14702 "DATE_ADD".to_string(),
14703 vec![Expression::string(&unit_str), arg1, arg2],
14704 ))))
14705 }
14706 DialectType::MySQL => {
14707 let iu = Self::parse_interval_unit_static(&unit_str);
14708 Ok(Expression::DateAdd(Box::new(
14709 crate::expressions::DateAddFunc {
14710 this: arg2,
14711 interval: arg1,
14712 unit: iu,
14713 },
14714 )))
14715 }
14716 DialectType::PostgreSQL => {
14717 // Cast string literal date to TIMESTAMP
14718 let arg2 = if matches!(
14719 &arg2,
14720 Expression::Literal(Literal::String(_))
14721 ) {
14722 Expression::Cast(Box::new(Cast {
14723 this: arg2,
14724 to: DataType::Timestamp {
14725 precision: None,
14726 timezone: false,
14727 },
14728 trailing_comments: Vec::new(),
14729 double_colon_syntax: false,
14730 format: None,
14731 default: None,
14732 inferred_type: None,
14733 }))
14734 } else {
14735 arg2
14736 };
14737 let interval = Expression::Interval(Box::new(
14738 crate::expressions::Interval {
14739 this: Some(Expression::string(&format!(
14740 "{} {}",
14741 Self::expr_to_string_static(&arg1),
14742 unit_str
14743 ))),
14744 unit: None,
14745 },
14746 ));
14747 Ok(Expression::Add(Box::new(
14748 crate::expressions::BinaryOp::new(arg2, interval),
14749 )))
14750 }
14751 DialectType::BigQuery => {
14752 let iu = Self::parse_interval_unit_static(&unit_str);
14753 let interval = Expression::Interval(Box::new(
14754 crate::expressions::Interval {
14755 this: Some(arg1),
14756 unit: Some(
14757 crate::expressions::IntervalUnitSpec::Simple {
14758 unit: iu,
14759 use_plural: false,
14760 },
14761 ),
14762 },
14763 ));
14764 // Non-TSQL sources: CAST string literal to DATETIME
14765 let arg2 = if !matches!(
14766 source,
14767 DialectType::TSQL | DialectType::Fabric
14768 ) && matches!(
14769 &arg2,
14770 Expression::Literal(Literal::String(_))
14771 ) {
14772 Expression::Cast(Box::new(Cast {
14773 this: arg2,
14774 to: DataType::Custom {
14775 name: "DATETIME".to_string(),
14776 },
14777 trailing_comments: Vec::new(),
14778 double_colon_syntax: false,
14779 format: None,
14780 default: None,
14781 inferred_type: None,
14782 }))
14783 } else {
14784 arg2
14785 };
14786 Ok(Expression::Function(Box::new(Function::new(
14787 "DATE_ADD".to_string(),
14788 vec![arg2, interval],
14789 ))))
14790 }
14791 _ => {
14792 let unit =
14793 Expression::Identifier(Identifier::new(&unit_str));
14794 Ok(Expression::Function(Box::new(Function::new(
14795 "DATEADD".to_string(),
14796 vec![unit, arg1, arg2],
14797 ))))
14798 }
14799 }
14800 }
14801 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
14802 // or (date, val, 'UNIT') from Generic canonical form
14803 "DATE_ADD" if f.args.len() == 3 => {
14804 let mut args = f.args;
14805 let arg0 = args.remove(0);
14806 let arg1 = args.remove(0);
14807 let arg2 = args.remove(0);
14808 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
14809 // where arg2 is a string literal matching a unit name
14810 let arg2_unit = match &arg2 {
14811 Expression::Literal(Literal::String(s)) => {
14812 let u = s.to_ascii_uppercase();
14813 if matches!(
14814 u.as_str(),
14815 "DAY"
14816 | "MONTH"
14817 | "YEAR"
14818 | "HOUR"
14819 | "MINUTE"
14820 | "SECOND"
14821 | "WEEK"
14822 | "QUARTER"
14823 | "MILLISECOND"
14824 | "MICROSECOND"
14825 ) {
14826 Some(u)
14827 } else {
14828 None
14829 }
14830 }
14831 _ => None,
14832 };
14833 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
14834 let (unit_str, val, date) = if let Some(u) = arg2_unit {
14835 (u, arg1, arg0)
14836 } else {
14837 (Self::get_unit_str_static(&arg0), arg1, arg2)
14838 };
14839 // Alias for backward compat with the rest of the match
14840 let arg1 = val;
14841 let arg2 = date;
14842
14843 match target {
14844 DialectType::Presto
14845 | DialectType::Trino
14846 | DialectType::Athena => {
14847 Ok(Expression::Function(Box::new(Function::new(
14848 "DATE_ADD".to_string(),
14849 vec![Expression::string(&unit_str), arg1, arg2],
14850 ))))
14851 }
14852 DialectType::DuckDB => {
14853 let iu = Self::parse_interval_unit_static(&unit_str);
14854 let interval = Expression::Interval(Box::new(
14855 crate::expressions::Interval {
14856 this: Some(arg1),
14857 unit: Some(
14858 crate::expressions::IntervalUnitSpec::Simple {
14859 unit: iu,
14860 use_plural: false,
14861 },
14862 ),
14863 },
14864 ));
14865 Ok(Expression::Add(Box::new(
14866 crate::expressions::BinaryOp::new(arg2, interval),
14867 )))
14868 }
14869 DialectType::PostgreSQL
14870 | DialectType::Materialize
14871 | DialectType::RisingWave => {
14872 // PostgreSQL: x + INTERVAL '1 DAY'
14873 let amount_str = Self::expr_to_string_static(&arg1);
14874 let interval = Expression::Interval(Box::new(
14875 crate::expressions::Interval {
14876 this: Some(Expression::string(&format!(
14877 "{} {}",
14878 amount_str, unit_str
14879 ))),
14880 unit: None,
14881 },
14882 ));
14883 Ok(Expression::Add(Box::new(
14884 crate::expressions::BinaryOp::new(arg2, interval),
14885 )))
14886 }
14887 DialectType::Snowflake
14888 | DialectType::TSQL
14889 | DialectType::Redshift => {
14890 let unit =
14891 Expression::Identifier(Identifier::new(&unit_str));
14892 Ok(Expression::Function(Box::new(Function::new(
14893 "DATEADD".to_string(),
14894 vec![unit, arg1, arg2],
14895 ))))
14896 }
14897 DialectType::BigQuery
14898 | DialectType::MySQL
14899 | DialectType::Doris
14900 | DialectType::StarRocks
14901 | DialectType::Drill => {
14902 // DATE_ADD(date, INTERVAL amount UNIT)
14903 let iu = Self::parse_interval_unit_static(&unit_str);
14904 let interval = Expression::Interval(Box::new(
14905 crate::expressions::Interval {
14906 this: Some(arg1),
14907 unit: Some(
14908 crate::expressions::IntervalUnitSpec::Simple {
14909 unit: iu,
14910 use_plural: false,
14911 },
14912 ),
14913 },
14914 ));
14915 Ok(Expression::Function(Box::new(Function::new(
14916 "DATE_ADD".to_string(),
14917 vec![arg2, interval],
14918 ))))
14919 }
14920 DialectType::SQLite => {
14921 // SQLite: DATE(x, '1 DAY')
14922 // Build the string '1 DAY' from amount and unit
14923 let amount_str = match &arg1 {
14924 Expression::Literal(Literal::Number(n)) => n.clone(),
14925 _ => "1".to_string(),
14926 };
14927 Ok(Expression::Function(Box::new(Function::new(
14928 "DATE".to_string(),
14929 vec![
14930 arg2,
14931 Expression::string(format!(
14932 "{} {}",
14933 amount_str, unit_str
14934 )),
14935 ],
14936 ))))
14937 }
14938 DialectType::Dremio => {
14939 // Dremio: DATE_ADD(date, amount) - drops unit
14940 Ok(Expression::Function(Box::new(Function::new(
14941 "DATE_ADD".to_string(),
14942 vec![arg2, arg1],
14943 ))))
14944 }
14945 DialectType::Spark => {
14946 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
14947 if unit_str == "DAY" {
14948 Ok(Expression::Function(Box::new(Function::new(
14949 "DATE_ADD".to_string(),
14950 vec![arg2, arg1],
14951 ))))
14952 } else {
14953 let unit =
14954 Expression::Identifier(Identifier::new(&unit_str));
14955 Ok(Expression::Function(Box::new(Function::new(
14956 "DATE_ADD".to_string(),
14957 vec![unit, arg1, arg2],
14958 ))))
14959 }
14960 }
14961 DialectType::Databricks => {
14962 let unit =
14963 Expression::Identifier(Identifier::new(&unit_str));
14964 Ok(Expression::Function(Box::new(Function::new(
14965 "DATE_ADD".to_string(),
14966 vec![unit, arg1, arg2],
14967 ))))
14968 }
14969 DialectType::Hive => {
14970 // Hive: DATE_ADD(date, val) for DAY
14971 Ok(Expression::Function(Box::new(Function::new(
14972 "DATE_ADD".to_string(),
14973 vec![arg2, arg1],
14974 ))))
14975 }
14976 _ => {
14977 let unit =
14978 Expression::Identifier(Identifier::new(&unit_str));
14979 Ok(Expression::Function(Box::new(Function::new(
14980 "DATE_ADD".to_string(),
14981 vec![unit, arg1, arg2],
14982 ))))
14983 }
14984 }
14985 }
14986 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
14987 "DATE_ADD"
14988 if f.args.len() == 2
14989 && matches!(
14990 source,
14991 DialectType::Hive
14992 | DialectType::Spark
14993 | DialectType::Databricks
14994 | DialectType::Generic
14995 ) =>
14996 {
14997 let mut args = f.args;
14998 let date = args.remove(0);
14999 let days = args.remove(0);
15000 match target {
15001 DialectType::Hive | DialectType::Spark => {
15002 // Keep as DATE_ADD(date, days) for Hive/Spark
15003 Ok(Expression::Function(Box::new(Function::new(
15004 "DATE_ADD".to_string(),
15005 vec![date, days],
15006 ))))
15007 }
15008 DialectType::Databricks => {
15009 // Databricks: DATEADD(DAY, days, date)
15010 Ok(Expression::Function(Box::new(Function::new(
15011 "DATEADD".to_string(),
15012 vec![
15013 Expression::Identifier(Identifier::new("DAY")),
15014 days,
15015 date,
15016 ],
15017 ))))
15018 }
15019 DialectType::DuckDB => {
15020 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
15021 let cast_date = Self::ensure_cast_date(date);
15022 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
15023 let interval_val = if matches!(
15024 days,
15025 Expression::Mul(_)
15026 | Expression::Sub(_)
15027 | Expression::Add(_)
15028 ) {
15029 Expression::Paren(Box::new(crate::expressions::Paren {
15030 this: days,
15031 trailing_comments: vec![],
15032 }))
15033 } else {
15034 days
15035 };
15036 let interval = Expression::Interval(Box::new(
15037 crate::expressions::Interval {
15038 this: Some(interval_val),
15039 unit: Some(
15040 crate::expressions::IntervalUnitSpec::Simple {
15041 unit: crate::expressions::IntervalUnit::Day,
15042 use_plural: false,
15043 },
15044 ),
15045 },
15046 ));
15047 Ok(Expression::Add(Box::new(
15048 crate::expressions::BinaryOp::new(cast_date, interval),
15049 )))
15050 }
15051 DialectType::Snowflake => {
15052 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
15053 let cast_date = if matches!(
15054 source,
15055 DialectType::Hive
15056 | DialectType::Spark
15057 | DialectType::Databricks
15058 ) {
15059 if matches!(
15060 date,
15061 Expression::Literal(Literal::String(_))
15062 ) {
15063 Self::double_cast_timestamp_date(date)
15064 } else {
15065 date
15066 }
15067 } else {
15068 date
15069 };
15070 Ok(Expression::Function(Box::new(Function::new(
15071 "DATEADD".to_string(),
15072 vec![
15073 Expression::Identifier(Identifier::new("DAY")),
15074 days,
15075 cast_date,
15076 ],
15077 ))))
15078 }
15079 DialectType::Redshift => {
15080 Ok(Expression::Function(Box::new(Function::new(
15081 "DATEADD".to_string(),
15082 vec![
15083 Expression::Identifier(Identifier::new("DAY")),
15084 days,
15085 date,
15086 ],
15087 ))))
15088 }
15089 DialectType::TSQL | DialectType::Fabric => {
15090 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
15091 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
15092 let cast_date = if matches!(
15093 source,
15094 DialectType::Hive | DialectType::Spark
15095 ) {
15096 if matches!(
15097 date,
15098 Expression::Literal(Literal::String(_))
15099 ) {
15100 Self::double_cast_datetime2_date(date)
15101 } else {
15102 date
15103 }
15104 } else {
15105 date
15106 };
15107 Ok(Expression::Function(Box::new(Function::new(
15108 "DATEADD".to_string(),
15109 vec![
15110 Expression::Identifier(Identifier::new("DAY")),
15111 days,
15112 cast_date,
15113 ],
15114 ))))
15115 }
15116 DialectType::Presto
15117 | DialectType::Trino
15118 | DialectType::Athena => {
15119 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
15120 let cast_date = if matches!(
15121 source,
15122 DialectType::Hive
15123 | DialectType::Spark
15124 | DialectType::Databricks
15125 ) {
15126 if matches!(
15127 date,
15128 Expression::Literal(Literal::String(_))
15129 ) {
15130 Self::double_cast_timestamp_date(date)
15131 } else {
15132 date
15133 }
15134 } else {
15135 date
15136 };
15137 Ok(Expression::Function(Box::new(Function::new(
15138 "DATE_ADD".to_string(),
15139 vec![Expression::string("DAY"), days, cast_date],
15140 ))))
15141 }
15142 DialectType::BigQuery => {
15143 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
15144 let cast_date = if matches!(
15145 source,
15146 DialectType::Hive
15147 | DialectType::Spark
15148 | DialectType::Databricks
15149 ) {
15150 Self::double_cast_datetime_date(date)
15151 } else {
15152 date
15153 };
15154 // Wrap complex expressions in Paren for interval
15155 let interval_val = if matches!(
15156 days,
15157 Expression::Mul(_)
15158 | Expression::Sub(_)
15159 | Expression::Add(_)
15160 ) {
15161 Expression::Paren(Box::new(crate::expressions::Paren {
15162 this: days,
15163 trailing_comments: vec![],
15164 }))
15165 } else {
15166 days
15167 };
15168 let interval = Expression::Interval(Box::new(
15169 crate::expressions::Interval {
15170 this: Some(interval_val),
15171 unit: Some(
15172 crate::expressions::IntervalUnitSpec::Simple {
15173 unit: crate::expressions::IntervalUnit::Day,
15174 use_plural: false,
15175 },
15176 ),
15177 },
15178 ));
15179 Ok(Expression::Function(Box::new(Function::new(
15180 "DATE_ADD".to_string(),
15181 vec![cast_date, interval],
15182 ))))
15183 }
15184 DialectType::MySQL => {
15185 let iu = crate::expressions::IntervalUnit::Day;
15186 Ok(Expression::DateAdd(Box::new(
15187 crate::expressions::DateAddFunc {
15188 this: date,
15189 interval: days,
15190 unit: iu,
15191 },
15192 )))
15193 }
15194 DialectType::PostgreSQL => {
15195 let interval = Expression::Interval(Box::new(
15196 crate::expressions::Interval {
15197 this: Some(Expression::string(&format!(
15198 "{} DAY",
15199 Self::expr_to_string_static(&days)
15200 ))),
15201 unit: None,
15202 },
15203 ));
15204 Ok(Expression::Add(Box::new(
15205 crate::expressions::BinaryOp::new(date, interval),
15206 )))
15207 }
15208 DialectType::Doris
15209 | DialectType::StarRocks
15210 | DialectType::Drill => {
15211 // DATE_ADD(date, INTERVAL days DAY)
15212 let interval = Expression::Interval(Box::new(
15213 crate::expressions::Interval {
15214 this: Some(days),
15215 unit: Some(
15216 crate::expressions::IntervalUnitSpec::Simple {
15217 unit: crate::expressions::IntervalUnit::Day,
15218 use_plural: false,
15219 },
15220 ),
15221 },
15222 ));
15223 Ok(Expression::Function(Box::new(Function::new(
15224 "DATE_ADD".to_string(),
15225 vec![date, interval],
15226 ))))
15227 }
15228 _ => Ok(Expression::Function(Box::new(Function::new(
15229 "DATE_ADD".to_string(),
15230 vec![date, days],
15231 )))),
15232 }
15233 }
15234 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
15235 "DATE_SUB"
15236 if f.args.len() == 2
15237 && matches!(
15238 source,
15239 DialectType::Hive
15240 | DialectType::Spark
15241 | DialectType::Databricks
15242 ) =>
15243 {
15244 let mut args = f.args;
15245 let date = args.remove(0);
15246 let days = args.remove(0);
15247 // Helper to create days * -1
15248 let make_neg_days = |d: Expression| -> Expression {
15249 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
15250 d,
15251 Expression::Literal(Literal::Number("-1".to_string())),
15252 )))
15253 };
15254 let is_string_literal =
15255 matches!(date, Expression::Literal(Literal::String(_)));
15256 match target {
15257 DialectType::Hive
15258 | DialectType::Spark
15259 | DialectType::Databricks => {
15260 // Keep as DATE_SUB(date, days) for Hive/Spark
15261 Ok(Expression::Function(Box::new(Function::new(
15262 "DATE_SUB".to_string(),
15263 vec![date, days],
15264 ))))
15265 }
15266 DialectType::DuckDB => {
15267 let cast_date = Self::ensure_cast_date(date);
15268 let neg = make_neg_days(days);
15269 let interval = Expression::Interval(Box::new(
15270 crate::expressions::Interval {
15271 this: Some(Expression::Paren(Box::new(
15272 crate::expressions::Paren {
15273 this: neg,
15274 trailing_comments: vec![],
15275 },
15276 ))),
15277 unit: Some(
15278 crate::expressions::IntervalUnitSpec::Simple {
15279 unit: crate::expressions::IntervalUnit::Day,
15280 use_plural: false,
15281 },
15282 ),
15283 },
15284 ));
15285 Ok(Expression::Add(Box::new(
15286 crate::expressions::BinaryOp::new(cast_date, interval),
15287 )))
15288 }
15289 DialectType::Snowflake => {
15290 let cast_date = if is_string_literal {
15291 Self::double_cast_timestamp_date(date)
15292 } else {
15293 date
15294 };
15295 let neg = make_neg_days(days);
15296 Ok(Expression::Function(Box::new(Function::new(
15297 "DATEADD".to_string(),
15298 vec![
15299 Expression::Identifier(Identifier::new("DAY")),
15300 neg,
15301 cast_date,
15302 ],
15303 ))))
15304 }
15305 DialectType::Redshift => {
15306 let neg = make_neg_days(days);
15307 Ok(Expression::Function(Box::new(Function::new(
15308 "DATEADD".to_string(),
15309 vec![
15310 Expression::Identifier(Identifier::new("DAY")),
15311 neg,
15312 date,
15313 ],
15314 ))))
15315 }
15316 DialectType::TSQL | DialectType::Fabric => {
15317 let cast_date = if is_string_literal {
15318 Self::double_cast_datetime2_date(date)
15319 } else {
15320 date
15321 };
15322 let neg = make_neg_days(days);
15323 Ok(Expression::Function(Box::new(Function::new(
15324 "DATEADD".to_string(),
15325 vec![
15326 Expression::Identifier(Identifier::new("DAY")),
15327 neg,
15328 cast_date,
15329 ],
15330 ))))
15331 }
15332 DialectType::Presto
15333 | DialectType::Trino
15334 | DialectType::Athena => {
15335 let cast_date = if is_string_literal {
15336 Self::double_cast_timestamp_date(date)
15337 } else {
15338 date
15339 };
15340 let neg = make_neg_days(days);
15341 Ok(Expression::Function(Box::new(Function::new(
15342 "DATE_ADD".to_string(),
15343 vec![Expression::string("DAY"), neg, cast_date],
15344 ))))
15345 }
15346 DialectType::BigQuery => {
15347 let cast_date = if is_string_literal {
15348 Self::double_cast_datetime_date(date)
15349 } else {
15350 date
15351 };
15352 let neg = make_neg_days(days);
15353 let interval = Expression::Interval(Box::new(
15354 crate::expressions::Interval {
15355 this: Some(Expression::Paren(Box::new(
15356 crate::expressions::Paren {
15357 this: neg,
15358 trailing_comments: vec![],
15359 },
15360 ))),
15361 unit: Some(
15362 crate::expressions::IntervalUnitSpec::Simple {
15363 unit: crate::expressions::IntervalUnit::Day,
15364 use_plural: false,
15365 },
15366 ),
15367 },
15368 ));
15369 Ok(Expression::Function(Box::new(Function::new(
15370 "DATE_ADD".to_string(),
15371 vec![cast_date, interval],
15372 ))))
15373 }
15374 _ => Ok(Expression::Function(Box::new(Function::new(
15375 "DATE_SUB".to_string(),
15376 vec![date, days],
15377 )))),
15378 }
15379 }
15380 // ADD_MONTHS(date, val) -> target-specific
15381 "ADD_MONTHS" if f.args.len() == 2 => {
15382 let mut args = f.args;
15383 let date = args.remove(0);
15384 let val = args.remove(0);
15385 match target {
15386 DialectType::TSQL => {
15387 let cast_date = Self::ensure_cast_datetime2(date);
15388 Ok(Expression::Function(Box::new(Function::new(
15389 "DATEADD".to_string(),
15390 vec![
15391 Expression::Identifier(Identifier::new("MONTH")),
15392 val,
15393 cast_date,
15394 ],
15395 ))))
15396 }
15397 DialectType::DuckDB => {
15398 let interval = Expression::Interval(Box::new(
15399 crate::expressions::Interval {
15400 this: Some(val),
15401 unit: Some(
15402 crate::expressions::IntervalUnitSpec::Simple {
15403 unit:
15404 crate::expressions::IntervalUnit::Month,
15405 use_plural: false,
15406 },
15407 ),
15408 },
15409 ));
15410 Ok(Expression::Add(Box::new(
15411 crate::expressions::BinaryOp::new(date, interval),
15412 )))
15413 }
15414 DialectType::Snowflake => {
15415 // Keep ADD_MONTHS when source is Snowflake
15416 if matches!(source, DialectType::Snowflake) {
15417 Ok(Expression::Function(Box::new(Function::new(
15418 "ADD_MONTHS".to_string(),
15419 vec![date, val],
15420 ))))
15421 } else {
15422 Ok(Expression::Function(Box::new(Function::new(
15423 "DATEADD".to_string(),
15424 vec![
15425 Expression::Identifier(Identifier::new(
15426 "MONTH",
15427 )),
15428 val,
15429 date,
15430 ],
15431 ))))
15432 }
15433 }
15434 DialectType::Redshift => {
15435 Ok(Expression::Function(Box::new(Function::new(
15436 "DATEADD".to_string(),
15437 vec![
15438 Expression::Identifier(Identifier::new("MONTH")),
15439 val,
15440 date,
15441 ],
15442 ))))
15443 }
15444 DialectType::Presto
15445 | DialectType::Trino
15446 | DialectType::Athena => {
15447 Ok(Expression::Function(Box::new(Function::new(
15448 "DATE_ADD".to_string(),
15449 vec![Expression::string("MONTH"), val, date],
15450 ))))
15451 }
15452 DialectType::BigQuery => {
15453 let interval = Expression::Interval(Box::new(
15454 crate::expressions::Interval {
15455 this: Some(val),
15456 unit: Some(
15457 crate::expressions::IntervalUnitSpec::Simple {
15458 unit:
15459 crate::expressions::IntervalUnit::Month,
15460 use_plural: false,
15461 },
15462 ),
15463 },
15464 ));
15465 Ok(Expression::Function(Box::new(Function::new(
15466 "DATE_ADD".to_string(),
15467 vec![date, interval],
15468 ))))
15469 }
15470 _ => Ok(Expression::Function(Box::new(Function::new(
15471 "ADD_MONTHS".to_string(),
15472 vec![date, val],
15473 )))),
15474 }
15475 }
15476 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
15477 "DATETRUNC" if f.args.len() == 2 => {
15478 let mut args = f.args;
15479 let arg0 = args.remove(0);
15480 let arg1 = args.remove(0);
15481 let unit_str = Self::get_unit_str_static(&arg0);
15482 match target {
15483 DialectType::TSQL | DialectType::Fabric => {
15484 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
15485 Ok(Expression::Function(Box::new(Function::new(
15486 "DATETRUNC".to_string(),
15487 vec![
15488 Expression::Identifier(Identifier::new(&unit_str)),
15489 arg1,
15490 ],
15491 ))))
15492 }
15493 DialectType::DuckDB => {
15494 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
15495 let date = Self::ensure_cast_timestamp(arg1);
15496 Ok(Expression::Function(Box::new(Function::new(
15497 "DATE_TRUNC".to_string(),
15498 vec![Expression::string(&unit_str), date],
15499 ))))
15500 }
15501 DialectType::ClickHouse => {
15502 // ClickHouse: dateTrunc('UNIT', expr)
15503 Ok(Expression::Function(Box::new(Function::new(
15504 "dateTrunc".to_string(),
15505 vec![Expression::string(&unit_str), arg1],
15506 ))))
15507 }
15508 _ => {
15509 // Standard: DATE_TRUNC('UNIT', expr)
15510 let unit = Expression::string(&unit_str);
15511 Ok(Expression::Function(Box::new(Function::new(
15512 "DATE_TRUNC".to_string(),
15513 vec![unit, arg1],
15514 ))))
15515 }
15516 }
15517 }
15518 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
15519 "GETDATE" if f.args.is_empty() => match target {
15520 DialectType::TSQL => Ok(Expression::Function(f)),
15521 DialectType::Redshift => Ok(Expression::Function(Box::new(
15522 Function::new("GETDATE".to_string(), vec![]),
15523 ))),
15524 _ => Ok(Expression::CurrentTimestamp(
15525 crate::expressions::CurrentTimestamp {
15526 precision: None,
15527 sysdate: false,
15528 },
15529 )),
15530 },
15531 // TO_HEX(x) / HEX(x) -> target-specific hex function
15532 "TO_HEX" | "HEX" if f.args.len() == 1 => {
15533 let name = match target {
15534 DialectType::Presto | DialectType::Trino => "TO_HEX",
15535 DialectType::Spark
15536 | DialectType::Databricks
15537 | DialectType::Hive => "HEX",
15538 DialectType::DuckDB
15539 | DialectType::PostgreSQL
15540 | DialectType::Redshift => "TO_HEX",
15541 _ => &f.name,
15542 };
15543 Ok(Expression::Function(Box::new(Function::new(
15544 name.to_string(),
15545 f.args,
15546 ))))
15547 }
15548 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
15549 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
15550 match target {
15551 DialectType::BigQuery => {
15552 // BigQuery: UNHEX(x) -> FROM_HEX(x)
15553 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
15554 // because BigQuery MD5 returns BYTES, not hex string
15555 let arg = &f.args[0];
15556 let wrapped_arg = match arg {
15557 Expression::Function(inner_f)
15558 if inner_f.name.eq_ignore_ascii_case("MD5")
15559 || inner_f.name.eq_ignore_ascii_case("SHA1")
15560 || inner_f.name.eq_ignore_ascii_case("SHA256")
15561 || inner_f.name.eq_ignore_ascii_case("SHA512") =>
15562 {
15563 // Wrap hash function in TO_HEX for BigQuery
15564 Expression::Function(Box::new(Function::new(
15565 "TO_HEX".to_string(),
15566 vec![arg.clone()],
15567 )))
15568 }
15569 _ => f.args.into_iter().next().unwrap(),
15570 };
15571 Ok(Expression::Function(Box::new(Function::new(
15572 "FROM_HEX".to_string(),
15573 vec![wrapped_arg],
15574 ))))
15575 }
15576 _ => {
15577 let name = match target {
15578 DialectType::Presto | DialectType::Trino => "FROM_HEX",
15579 DialectType::Spark
15580 | DialectType::Databricks
15581 | DialectType::Hive => "UNHEX",
15582 _ => &f.name,
15583 };
15584 Ok(Expression::Function(Box::new(Function::new(
15585 name.to_string(),
15586 f.args,
15587 ))))
15588 }
15589 }
15590 }
15591 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
15592 "TO_UTF8" if f.args.len() == 1 => match target {
15593 DialectType::Spark | DialectType::Databricks => {
15594 let mut args = f.args;
15595 args.push(Expression::string("utf-8"));
15596 Ok(Expression::Function(Box::new(Function::new(
15597 "ENCODE".to_string(),
15598 args,
15599 ))))
15600 }
15601 _ => Ok(Expression::Function(f)),
15602 },
15603 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
15604 "FROM_UTF8" if f.args.len() == 1 => match target {
15605 DialectType::Spark | DialectType::Databricks => {
15606 let mut args = f.args;
15607 args.push(Expression::string("utf-8"));
15608 Ok(Expression::Function(Box::new(Function::new(
15609 "DECODE".to_string(),
15610 args,
15611 ))))
15612 }
15613 _ => Ok(Expression::Function(f)),
15614 },
15615 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
15616 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
15617 let name = match target {
15618 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
15619 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
15620 DialectType::PostgreSQL | DialectType::Redshift => {
15621 "STARTS_WITH"
15622 }
15623 _ => &f.name,
15624 };
15625 Ok(Expression::Function(Box::new(Function::new(
15626 name.to_string(),
15627 f.args,
15628 ))))
15629 }
15630 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
15631 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
15632 let name = match target {
15633 DialectType::Presto
15634 | DialectType::Trino
15635 | DialectType::Athena => "APPROX_DISTINCT",
15636 _ => "APPROX_COUNT_DISTINCT",
15637 };
15638 Ok(Expression::Function(Box::new(Function::new(
15639 name.to_string(),
15640 f.args,
15641 ))))
15642 }
15643 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
15644 "JSON_EXTRACT"
15645 if f.args.len() == 2
15646 && !matches!(source, DialectType::BigQuery)
15647 && matches!(
15648 target,
15649 DialectType::Spark
15650 | DialectType::Databricks
15651 | DialectType::Hive
15652 ) =>
15653 {
15654 Ok(Expression::Function(Box::new(Function::new(
15655 "GET_JSON_OBJECT".to_string(),
15656 f.args,
15657 ))))
15658 }
15659 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
15660 "JSON_EXTRACT"
15661 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
15662 {
15663 let mut args = f.args;
15664 let path = args.remove(1);
15665 let this = args.remove(0);
15666 Ok(Expression::JsonExtract(Box::new(
15667 crate::expressions::JsonExtractFunc {
15668 this,
15669 path,
15670 returning: None,
15671 arrow_syntax: true,
15672 hash_arrow_syntax: false,
15673 wrapper_option: None,
15674 quotes_option: None,
15675 on_scalar_string: false,
15676 on_error: None,
15677 },
15678 )))
15679 }
15680 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
15681 "JSON_FORMAT" if f.args.len() == 1 => {
15682 match target {
15683 DialectType::Spark | DialectType::Databricks => {
15684 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
15685 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
15686 if matches!(
15687 source,
15688 DialectType::Presto
15689 | DialectType::Trino
15690 | DialectType::Athena
15691 ) {
15692 if let Some(Expression::ParseJson(pj)) = f.args.first()
15693 {
15694 if let Expression::Literal(Literal::String(s)) =
15695 &pj.this
15696 {
15697 let wrapped = Expression::Literal(
15698 Literal::String(format!("[{}]", s)),
15699 );
15700 let schema_of_json = Expression::Function(
15701 Box::new(Function::new(
15702 "SCHEMA_OF_JSON".to_string(),
15703 vec![wrapped.clone()],
15704 )),
15705 );
15706 let from_json = Expression::Function(Box::new(
15707 Function::new(
15708 "FROM_JSON".to_string(),
15709 vec![wrapped, schema_of_json],
15710 ),
15711 ));
15712 let to_json = Expression::Function(Box::new(
15713 Function::new(
15714 "TO_JSON".to_string(),
15715 vec![from_json],
15716 ),
15717 ));
15718 return Ok(Expression::Function(Box::new(
15719 Function::new(
15720 "REGEXP_EXTRACT".to_string(),
15721 vec![
15722 to_json,
15723 Expression::Literal(
15724 Literal::String(
15725 "^.(.*).$".to_string(),
15726 ),
15727 ),
15728 Expression::Literal(
15729 Literal::Number(
15730 "1".to_string(),
15731 ),
15732 ),
15733 ],
15734 ),
15735 )));
15736 }
15737 }
15738 }
15739
15740 // Strip inner CAST(... AS JSON) or TO_JSON() if present
15741 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
15742 let mut args = f.args;
15743 if let Some(Expression::Cast(ref c)) = args.first() {
15744 if matches!(&c.to, DataType::Json | DataType::JsonB) {
15745 args = vec![c.this.clone()];
15746 }
15747 } else if let Some(Expression::Function(ref inner_f)) =
15748 args.first()
15749 {
15750 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
15751 && inner_f.args.len() == 1
15752 {
15753 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
15754 args = inner_f.args.clone();
15755 }
15756 }
15757 Ok(Expression::Function(Box::new(Function::new(
15758 "TO_JSON".to_string(),
15759 args,
15760 ))))
15761 }
15762 DialectType::BigQuery => Ok(Expression::Function(Box::new(
15763 Function::new("TO_JSON_STRING".to_string(), f.args),
15764 ))),
15765 DialectType::DuckDB => {
15766 // CAST(TO_JSON(x) AS TEXT)
15767 let to_json = Expression::Function(Box::new(
15768 Function::new("TO_JSON".to_string(), f.args),
15769 ));
15770 Ok(Expression::Cast(Box::new(Cast {
15771 this: to_json,
15772 to: DataType::Text,
15773 trailing_comments: Vec::new(),
15774 double_colon_syntax: false,
15775 format: None,
15776 default: None,
15777 inferred_type: None,
15778 })))
15779 }
15780 _ => Ok(Expression::Function(f)),
15781 }
15782 }
15783 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
15784 "SYSDATE" if f.args.is_empty() => {
15785 match target {
15786 DialectType::Oracle | DialectType::Redshift => {
15787 Ok(Expression::Function(f))
15788 }
15789 DialectType::Snowflake => {
15790 // Snowflake uses SYSDATE() with parens
15791 let mut f = *f;
15792 f.no_parens = false;
15793 Ok(Expression::Function(Box::new(f)))
15794 }
15795 DialectType::DuckDB => {
15796 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
15797 Ok(Expression::AtTimeZone(Box::new(
15798 crate::expressions::AtTimeZone {
15799 this: Expression::CurrentTimestamp(
15800 crate::expressions::CurrentTimestamp {
15801 precision: None,
15802 sysdate: false,
15803 },
15804 ),
15805 zone: Expression::Literal(Literal::String(
15806 "UTC".to_string(),
15807 )),
15808 },
15809 )))
15810 }
15811 _ => Ok(Expression::CurrentTimestamp(
15812 crate::expressions::CurrentTimestamp {
15813 precision: None,
15814 sysdate: true,
15815 },
15816 )),
15817 }
15818 }
15819 // LOGICAL_OR(x) -> BOOL_OR(x)
15820 "LOGICAL_OR" if f.args.len() == 1 => {
15821 let name = match target {
15822 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
15823 _ => &f.name,
15824 };
15825 Ok(Expression::Function(Box::new(Function::new(
15826 name.to_string(),
15827 f.args,
15828 ))))
15829 }
15830 // LOGICAL_AND(x) -> BOOL_AND(x)
15831 "LOGICAL_AND" if f.args.len() == 1 => {
15832 let name = match target {
15833 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
15834 _ => &f.name,
15835 };
15836 Ok(Expression::Function(Box::new(Function::new(
15837 name.to_string(),
15838 f.args,
15839 ))))
15840 }
15841 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
15842 "MONTHS_ADD" if f.args.len() == 2 => match target {
15843 DialectType::Oracle => Ok(Expression::Function(Box::new(
15844 Function::new("ADD_MONTHS".to_string(), f.args),
15845 ))),
15846 _ => Ok(Expression::Function(f)),
15847 },
15848 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
15849 "ARRAY_JOIN" if f.args.len() >= 2 => {
15850 match target {
15851 DialectType::Spark | DialectType::Databricks => {
15852 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
15853 Ok(Expression::Function(f))
15854 }
15855 DialectType::Hive => {
15856 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
15857 let mut args = f.args;
15858 let arr = args.remove(0);
15859 let sep = args.remove(0);
15860 // Drop any remaining args (null_replacement)
15861 Ok(Expression::Function(Box::new(Function::new(
15862 "CONCAT_WS".to_string(),
15863 vec![sep, arr],
15864 ))))
15865 }
15866 DialectType::Presto | DialectType::Trino => {
15867 Ok(Expression::Function(f))
15868 }
15869 _ => Ok(Expression::Function(f)),
15870 }
15871 }
15872 // LOCATE(substr, str, pos) 3-arg -> target-specific
15873 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
15874 "LOCATE"
15875 if f.args.len() == 3
15876 && matches!(
15877 target,
15878 DialectType::Presto
15879 | DialectType::Trino
15880 | DialectType::Athena
15881 | DialectType::DuckDB
15882 ) =>
15883 {
15884 let mut args = f.args;
15885 let substr = args.remove(0);
15886 let string = args.remove(0);
15887 let pos = args.remove(0);
15888 // STRPOS(SUBSTRING(string, pos), substr)
15889 let substring_call = Expression::Function(Box::new(Function::new(
15890 "SUBSTRING".to_string(),
15891 vec![string.clone(), pos.clone()],
15892 )));
15893 let strpos_call = Expression::Function(Box::new(Function::new(
15894 "STRPOS".to_string(),
15895 vec![substring_call, substr.clone()],
15896 )));
15897 // STRPOS(...) + pos - 1
15898 let pos_adjusted =
15899 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
15900 Expression::Add(Box::new(
15901 crate::expressions::BinaryOp::new(
15902 strpos_call.clone(),
15903 pos.clone(),
15904 ),
15905 )),
15906 Expression::number(1),
15907 )));
15908 // STRPOS(...) = 0
15909 let is_zero =
15910 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
15911 strpos_call.clone(),
15912 Expression::number(0),
15913 )));
15914
15915 match target {
15916 DialectType::Presto
15917 | DialectType::Trino
15918 | DialectType::Athena => {
15919 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
15920 Ok(Expression::Function(Box::new(Function::new(
15921 "IF".to_string(),
15922 vec![is_zero, Expression::number(0), pos_adjusted],
15923 ))))
15924 }
15925 DialectType::DuckDB => {
15926 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
15927 Ok(Expression::Case(Box::new(crate::expressions::Case {
15928 operand: None,
15929 whens: vec![(is_zero, Expression::number(0))],
15930 else_: Some(pos_adjusted),
15931 comments: Vec::new(),
15932 inferred_type: None,
15933 })))
15934 }
15935 _ => Ok(Expression::Function(Box::new(Function::new(
15936 "LOCATE".to_string(),
15937 vec![substr, string, pos],
15938 )))),
15939 }
15940 }
15941 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
15942 "STRPOS"
15943 if f.args.len() == 3
15944 && matches!(
15945 target,
15946 DialectType::BigQuery
15947 | DialectType::Oracle
15948 | DialectType::Teradata
15949 ) =>
15950 {
15951 let mut args = f.args;
15952 let haystack = args.remove(0);
15953 let needle = args.remove(0);
15954 let occurrence = args.remove(0);
15955 Ok(Expression::Function(Box::new(Function::new(
15956 "INSTR".to_string(),
15957 vec![haystack, needle, Expression::number(1), occurrence],
15958 ))))
15959 }
15960 // SCHEMA_NAME(id) -> target-specific
15961 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
15962 DialectType::MySQL | DialectType::SingleStore => {
15963 Ok(Expression::Function(Box::new(Function::new(
15964 "SCHEMA".to_string(),
15965 vec![],
15966 ))))
15967 }
15968 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
15969 crate::expressions::CurrentSchema { this: None },
15970 ))),
15971 DialectType::SQLite => Ok(Expression::string("main")),
15972 _ => Ok(Expression::Function(f)),
15973 },
15974 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
15975 "STRTOL" if f.args.len() == 2 => match target {
15976 DialectType::Presto | DialectType::Trino => {
15977 Ok(Expression::Function(Box::new(Function::new(
15978 "FROM_BASE".to_string(),
15979 f.args,
15980 ))))
15981 }
15982 _ => Ok(Expression::Function(f)),
15983 },
15984 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
15985 "EDITDIST3" if f.args.len() == 2 => match target {
15986 DialectType::Spark | DialectType::Databricks => {
15987 Ok(Expression::Function(Box::new(Function::new(
15988 "LEVENSHTEIN".to_string(),
15989 f.args,
15990 ))))
15991 }
15992 _ => Ok(Expression::Function(f)),
15993 },
15994 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
15995 "FORMAT"
15996 if f.args.len() == 2
15997 && matches!(
15998 source,
15999 DialectType::MySQL | DialectType::SingleStore
16000 )
16001 && matches!(target, DialectType::DuckDB) =>
16002 {
16003 let mut args = f.args;
16004 let num_expr = args.remove(0);
16005 let decimals_expr = args.remove(0);
16006 // Extract decimal count
16007 let dec_count = match &decimals_expr {
16008 Expression::Literal(Literal::Number(n)) => n.clone(),
16009 _ => "0".to_string(),
16010 };
16011 let fmt_str = format!("{{:,.{}f}}", dec_count);
16012 Ok(Expression::Function(Box::new(Function::new(
16013 "FORMAT".to_string(),
16014 vec![Expression::string(&fmt_str), num_expr],
16015 ))))
16016 }
16017 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
16018 "FORMAT"
16019 if f.args.len() == 2
16020 && matches!(
16021 source,
16022 DialectType::TSQL | DialectType::Fabric
16023 ) =>
16024 {
16025 let val_expr = f.args[0].clone();
16026 let fmt_expr = f.args[1].clone();
16027 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
16028 // Only expand shortcodes that are NOT also valid numeric format specifiers.
16029 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
16030 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
16031 let (expanded_fmt, is_shortcode) = match &fmt_expr {
16032 Expression::Literal(crate::expressions::Literal::String(s)) => {
16033 match s.as_str() {
16034 "m" | "M" => (Expression::string("MMMM d"), true),
16035 "t" => (Expression::string("h:mm tt"), true),
16036 "T" => (Expression::string("h:mm:ss tt"), true),
16037 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
16038 _ => (fmt_expr.clone(), false),
16039 }
16040 }
16041 _ => (fmt_expr.clone(), false),
16042 };
16043 // Check if the format looks like a date format
16044 let is_date_format = is_shortcode
16045 || match &expanded_fmt {
16046 Expression::Literal(
16047 crate::expressions::Literal::String(s),
16048 ) => {
16049 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
16050 s.contains("yyyy")
16051 || s.contains("YYYY")
16052 || s.contains("MM")
16053 || s.contains("dd")
16054 || s.contains("MMMM")
16055 || s.contains("HH")
16056 || s.contains("hh")
16057 || s.contains("ss")
16058 }
16059 _ => false,
16060 };
16061 match target {
16062 DialectType::Spark | DialectType::Databricks => {
16063 let func_name = if is_date_format {
16064 "DATE_FORMAT"
16065 } else {
16066 "FORMAT_NUMBER"
16067 };
16068 Ok(Expression::Function(Box::new(Function::new(
16069 func_name.to_string(),
16070 vec![val_expr, expanded_fmt],
16071 ))))
16072 }
16073 _ => {
16074 // For TSQL and other targets, expand shortcodes but keep FORMAT
16075 if is_shortcode {
16076 Ok(Expression::Function(Box::new(Function::new(
16077 "FORMAT".to_string(),
16078 vec![val_expr, expanded_fmt],
16079 ))))
16080 } else {
16081 Ok(Expression::Function(f))
16082 }
16083 }
16084 }
16085 }
16086 // FORMAT('%s', x) from Trino/Presto -> target-specific
16087 "FORMAT"
16088 if f.args.len() >= 2
16089 && matches!(
16090 source,
16091 DialectType::Trino
16092 | DialectType::Presto
16093 | DialectType::Athena
16094 ) =>
16095 {
16096 let fmt_expr = f.args[0].clone();
16097 let value_args: Vec<Expression> = f.args[1..].to_vec();
16098 match target {
16099 // DuckDB: replace %s with {} in format string
16100 DialectType::DuckDB => {
16101 let new_fmt = match &fmt_expr {
16102 Expression::Literal(Literal::String(s)) => {
16103 Expression::Literal(Literal::String(
16104 s.replace("%s", "{}"),
16105 ))
16106 }
16107 _ => fmt_expr,
16108 };
16109 let mut args = vec![new_fmt];
16110 args.extend(value_args);
16111 Ok(Expression::Function(Box::new(Function::new(
16112 "FORMAT".to_string(),
16113 args,
16114 ))))
16115 }
16116 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
16117 DialectType::Snowflake => match &fmt_expr {
16118 Expression::Literal(Literal::String(s))
16119 if s == "%s" && value_args.len() == 1 =>
16120 {
16121 Ok(Expression::Function(Box::new(Function::new(
16122 "TO_CHAR".to_string(),
16123 value_args,
16124 ))))
16125 }
16126 _ => Ok(Expression::Function(f)),
16127 },
16128 // Default: keep FORMAT as-is
16129 _ => Ok(Expression::Function(f)),
16130 }
16131 }
16132 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
16133 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
16134 if f.args.len() == 2 =>
16135 {
16136 // When coming from Snowflake source: ARRAY_CONTAINS(value, array)
16137 // args[0]=value, args[1]=array. For DuckDB target, swap and add NULL-aware CASE.
16138 if matches!(target, DialectType::DuckDB)
16139 && matches!(source, DialectType::Snowflake)
16140 && f.name.eq_ignore_ascii_case("ARRAY_CONTAINS")
16141 {
16142 let value = f.args[0].clone();
16143 let array = f.args[1].clone();
16144
16145 // value IS NULL
16146 let value_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
16147 this: value.clone(),
16148 not: false,
16149 postfix_form: false,
16150 }));
16151
16152 // ARRAY_LENGTH(array)
16153 let array_length = Expression::Function(Box::new(Function::new(
16154 "ARRAY_LENGTH".to_string(),
16155 vec![array.clone()],
16156 )));
16157 // LIST_COUNT(array)
16158 let list_count = Expression::Function(Box::new(Function::new(
16159 "LIST_COUNT".to_string(),
16160 vec![array.clone()],
16161 )));
16162 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
16163 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
16164 left: array_length,
16165 right: list_count,
16166 left_comments: vec![],
16167 operator_comments: vec![],
16168 trailing_comments: vec![],
16169 inferred_type: None,
16170 }));
16171 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
16172 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
16173 this: Box::new(neq),
16174 expression: Box::new(Expression::Boolean(crate::expressions::BooleanLiteral { value: false })),
16175 }));
16176
16177 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
16178 let array_contains = Expression::Function(Box::new(Function::new(
16179 "ARRAY_CONTAINS".to_string(),
16180 vec![array, value],
16181 )));
16182
16183 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
16184 return Ok(Expression::Case(Box::new(Case {
16185 operand: None,
16186 whens: vec![(value_is_null, nullif)],
16187 else_: Some(array_contains),
16188 comments: Vec::new(),
16189 inferred_type: None,
16190 })));
16191 }
16192 match target {
16193 DialectType::PostgreSQL | DialectType::Redshift => {
16194 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
16195 let arr = f.args[0].clone();
16196 let needle = f.args[1].clone();
16197 // Convert [] to ARRAY[] for PostgreSQL
16198 let pg_arr = match arr {
16199 Expression::Array(a) => Expression::ArrayFunc(
16200 Box::new(crate::expressions::ArrayConstructor {
16201 expressions: a.expressions,
16202 bracket_notation: false,
16203 use_list_keyword: false,
16204 }),
16205 ),
16206 _ => arr,
16207 };
16208 // needle = ANY(arr) using the Any quantified expression
16209 let any_expr = Expression::Any(Box::new(
16210 crate::expressions::QuantifiedExpr {
16211 this: needle.clone(),
16212 subquery: pg_arr,
16213 op: Some(crate::expressions::QuantifiedOp::Eq),
16214 },
16215 ));
16216 let coalesce = Expression::Coalesce(Box::new(
16217 crate::expressions::VarArgFunc {
16218 expressions: vec![
16219 any_expr,
16220 Expression::Boolean(
16221 crate::expressions::BooleanLiteral {
16222 value: false,
16223 },
16224 ),
16225 ],
16226 original_name: None,
16227 inferred_type: None,
16228 },
16229 ));
16230 let is_null_check = Expression::IsNull(Box::new(
16231 crate::expressions::IsNull {
16232 this: needle,
16233 not: false,
16234 postfix_form: false,
16235 },
16236 ));
16237 Ok(Expression::Case(Box::new(Case {
16238 operand: None,
16239 whens: vec![(
16240 is_null_check,
16241 Expression::Null(crate::expressions::Null),
16242 )],
16243 else_: Some(coalesce),
16244 comments: Vec::new(),
16245 inferred_type: None,
16246 })))
16247 }
16248 _ => Ok(Expression::Function(Box::new(Function::new(
16249 "ARRAY_CONTAINS".to_string(),
16250 f.args,
16251 )))),
16252 }
16253 }
16254 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
16255 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
16256 match target {
16257 DialectType::PostgreSQL | DialectType::Redshift => {
16258 // arr1 && arr2 with ARRAY[] syntax
16259 let mut args = f.args;
16260 let arr1 = args.remove(0);
16261 let arr2 = args.remove(0);
16262 let pg_arr1 = match arr1 {
16263 Expression::Array(a) => Expression::ArrayFunc(
16264 Box::new(crate::expressions::ArrayConstructor {
16265 expressions: a.expressions,
16266 bracket_notation: false,
16267 use_list_keyword: false,
16268 }),
16269 ),
16270 _ => arr1,
16271 };
16272 let pg_arr2 = match arr2 {
16273 Expression::Array(a) => Expression::ArrayFunc(
16274 Box::new(crate::expressions::ArrayConstructor {
16275 expressions: a.expressions,
16276 bracket_notation: false,
16277 use_list_keyword: false,
16278 }),
16279 ),
16280 _ => arr2,
16281 };
16282 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
16283 pg_arr1, pg_arr2,
16284 ))))
16285 }
16286 DialectType::DuckDB => {
16287 // DuckDB: arr1 && arr2 (native support)
16288 let mut args = f.args;
16289 let arr1 = args.remove(0);
16290 let arr2 = args.remove(0);
16291 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
16292 arr1, arr2,
16293 ))))
16294 }
16295 _ => Ok(Expression::Function(Box::new(Function::new(
16296 "LIST_HAS_ANY".to_string(),
16297 f.args,
16298 )))),
16299 }
16300 }
16301 // APPROX_QUANTILE(x, q) -> target-specific
16302 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
16303 DialectType::Snowflake => Ok(Expression::Function(Box::new(
16304 Function::new("APPROX_PERCENTILE".to_string(), f.args),
16305 ))),
16306 DialectType::DuckDB => Ok(Expression::Function(f)),
16307 _ => Ok(Expression::Function(f)),
16308 },
16309 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
16310 "MAKE_DATE" if f.args.len() == 3 => match target {
16311 DialectType::BigQuery => Ok(Expression::Function(Box::new(
16312 Function::new("DATE".to_string(), f.args),
16313 ))),
16314 _ => Ok(Expression::Function(f)),
16315 },
16316 // RANGE(start, end[, step]) -> target-specific
16317 "RANGE"
16318 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
16319 {
16320 let start = f.args[0].clone();
16321 let end = f.args[1].clone();
16322 let step = f.args.get(2).cloned();
16323 match target {
16324 DialectType::Spark | DialectType::Databricks => {
16325 // RANGE(start, end) -> SEQUENCE(start, end-1)
16326 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
16327 // RANGE(start, start) -> ARRAY() (empty)
16328 // RANGE(start, end, 0) -> ARRAY() (empty)
16329 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
16330
16331 // Check for constant args
16332 fn extract_i64(e: &Expression) -> Option<i64> {
16333 match e {
16334 Expression::Literal(Literal::Number(n)) => {
16335 n.parse::<i64>().ok()
16336 }
16337 Expression::Neg(u) => {
16338 if let Expression::Literal(Literal::Number(n)) =
16339 &u.this
16340 {
16341 n.parse::<i64>().ok().map(|v| -v)
16342 } else {
16343 None
16344 }
16345 }
16346 _ => None,
16347 }
16348 }
16349 let start_val = extract_i64(&start);
16350 let end_val = extract_i64(&end);
16351 let step_val = step.as_ref().and_then(|s| extract_i64(s));
16352
16353 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
16354 if step_val == Some(0) {
16355 return Ok(Expression::Function(Box::new(
16356 Function::new("ARRAY".to_string(), vec![]),
16357 )));
16358 }
16359 if let (Some(s), Some(e_val)) = (start_val, end_val) {
16360 if s == e_val {
16361 return Ok(Expression::Function(Box::new(
16362 Function::new("ARRAY".to_string(), vec![]),
16363 )));
16364 }
16365 }
16366
16367 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
16368 // All constants - compute new end = end - step (if step provided) or end - 1
16369 match step_val {
16370 Some(st) if st < 0 => {
16371 // Negative step: SEQUENCE(start, end - step, step)
16372 let new_end = e_val - st; // end - step (= end + |step|)
16373 let mut args =
16374 vec![start, Expression::number(new_end)];
16375 if let Some(s) = step {
16376 args.push(s);
16377 }
16378 Ok(Expression::Function(Box::new(
16379 Function::new("SEQUENCE".to_string(), args),
16380 )))
16381 }
16382 Some(st) => {
16383 let new_end = e_val - st;
16384 let mut args =
16385 vec![start, Expression::number(new_end)];
16386 if let Some(s) = step {
16387 args.push(s);
16388 }
16389 Ok(Expression::Function(Box::new(
16390 Function::new("SEQUENCE".to_string(), args),
16391 )))
16392 }
16393 None => {
16394 // No step: SEQUENCE(start, end - 1)
16395 let new_end = e_val - 1;
16396 Ok(Expression::Function(Box::new(
16397 Function::new(
16398 "SEQUENCE".to_string(),
16399 vec![
16400 start,
16401 Expression::number(new_end),
16402 ],
16403 ),
16404 )))
16405 }
16406 }
16407 } else {
16408 // Variable end: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
16409 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
16410 end.clone(),
16411 Expression::number(1),
16412 )));
16413 let cond = Expression::Lte(Box::new(BinaryOp::new(
16414 Expression::Paren(Box::new(Paren {
16415 this: end_m1.clone(),
16416 trailing_comments: Vec::new(),
16417 })),
16418 start.clone(),
16419 )));
16420 let empty = Expression::Function(Box::new(
16421 Function::new("ARRAY".to_string(), vec![]),
16422 ));
16423 let mut seq_args = vec![
16424 start,
16425 Expression::Paren(Box::new(Paren {
16426 this: end_m1,
16427 trailing_comments: Vec::new(),
16428 })),
16429 ];
16430 if let Some(s) = step {
16431 seq_args.push(s);
16432 }
16433 let seq = Expression::Function(Box::new(
16434 Function::new("SEQUENCE".to_string(), seq_args),
16435 ));
16436 Ok(Expression::IfFunc(Box::new(
16437 crate::expressions::IfFunc {
16438 condition: cond,
16439 true_value: empty,
16440 false_value: Some(seq),
16441 original_name: None,
16442 inferred_type: None,
16443 },
16444 )))
16445 }
16446 }
16447 DialectType::SQLite => {
16448 // RANGE(start, end) -> GENERATE_SERIES(start, end)
16449 // The subquery wrapping is handled at the Alias level
16450 let mut args = vec![start, end];
16451 if let Some(s) = step {
16452 args.push(s);
16453 }
16454 Ok(Expression::Function(Box::new(Function::new(
16455 "GENERATE_SERIES".to_string(),
16456 args,
16457 ))))
16458 }
16459 _ => Ok(Expression::Function(f)),
16460 }
16461 }
16462 // ARRAY_REVERSE_SORT -> target-specific
16463 // (handled above as well, but also need DuckDB self-normalization)
16464 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
16465 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
16466 DialectType::Snowflake => Ok(Expression::Function(Box::new(
16467 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
16468 ))),
16469 DialectType::Spark | DialectType::Databricks => {
16470 Ok(Expression::Function(Box::new(Function::new(
16471 "MAP_FROM_ARRAYS".to_string(),
16472 f.args,
16473 ))))
16474 }
16475 _ => Ok(Expression::Function(Box::new(Function::new(
16476 "MAP".to_string(),
16477 f.args,
16478 )))),
16479 },
16480 // VARIANCE(x) -> varSamp(x) for ClickHouse
16481 "VARIANCE" if f.args.len() == 1 => match target {
16482 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
16483 Function::new("varSamp".to_string(), f.args),
16484 ))),
16485 _ => Ok(Expression::Function(f)),
16486 },
16487 // STDDEV(x) -> stddevSamp(x) for ClickHouse
16488 "STDDEV" if f.args.len() == 1 => match target {
16489 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
16490 Function::new("stddevSamp".to_string(), f.args),
16491 ))),
16492 _ => Ok(Expression::Function(f)),
16493 },
16494 // ISINF(x) -> IS_INF(x) for BigQuery
16495 "ISINF" if f.args.len() == 1 => match target {
16496 DialectType::BigQuery => Ok(Expression::Function(Box::new(
16497 Function::new("IS_INF".to_string(), f.args),
16498 ))),
16499 _ => Ok(Expression::Function(f)),
16500 },
16501 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
16502 "CONTAINS" if f.args.len() == 2 => match target {
16503 DialectType::Spark
16504 | DialectType::Databricks
16505 | DialectType::Hive => Ok(Expression::Function(Box::new(
16506 Function::new("ARRAY_CONTAINS".to_string(), f.args),
16507 ))),
16508 _ => Ok(Expression::Function(f)),
16509 },
16510 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
16511 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
16512 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
16513 Ok(Expression::Function(Box::new(Function::new(
16514 "CONTAINS".to_string(),
16515 f.args,
16516 ))))
16517 }
16518 DialectType::DuckDB => Ok(Expression::Function(Box::new(
16519 Function::new("ARRAY_CONTAINS".to_string(), f.args),
16520 ))),
16521 _ => Ok(Expression::Function(f)),
16522 },
16523 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
16524 "TO_UNIXTIME" if f.args.len() == 1 => match target {
16525 DialectType::Hive
16526 | DialectType::Spark
16527 | DialectType::Databricks => Ok(Expression::Function(Box::new(
16528 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
16529 ))),
16530 _ => Ok(Expression::Function(f)),
16531 },
16532 // FROM_UNIXTIME(x) -> target-specific
16533 "FROM_UNIXTIME" if f.args.len() == 1 => {
16534 match target {
16535 DialectType::Hive
16536 | DialectType::Spark
16537 | DialectType::Databricks
16538 | DialectType::Presto
16539 | DialectType::Trino => Ok(Expression::Function(f)),
16540 DialectType::DuckDB => {
16541 // DuckDB: TO_TIMESTAMP(x)
16542 let arg = f.args.into_iter().next().unwrap();
16543 Ok(Expression::Function(Box::new(Function::new(
16544 "TO_TIMESTAMP".to_string(),
16545 vec![arg],
16546 ))))
16547 }
16548 DialectType::PostgreSQL => {
16549 // PG: TO_TIMESTAMP(col)
16550 let arg = f.args.into_iter().next().unwrap();
16551 Ok(Expression::Function(Box::new(Function::new(
16552 "TO_TIMESTAMP".to_string(),
16553 vec![arg],
16554 ))))
16555 }
16556 DialectType::Redshift => {
16557 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
16558 let arg = f.args.into_iter().next().unwrap();
16559 let epoch_ts = Expression::Literal(Literal::Timestamp(
16560 "epoch".to_string(),
16561 ));
16562 let interval = Expression::Interval(Box::new(
16563 crate::expressions::Interval {
16564 this: Some(Expression::string("1 SECOND")),
16565 unit: None,
16566 },
16567 ));
16568 let mul =
16569 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
16570 let add =
16571 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
16572 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
16573 this: add,
16574 trailing_comments: Vec::new(),
16575 })))
16576 }
16577 _ => Ok(Expression::Function(f)),
16578 }
16579 }
16580 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
16581 "FROM_UNIXTIME"
16582 if f.args.len() == 2
16583 && matches!(
16584 source,
16585 DialectType::Hive
16586 | DialectType::Spark
16587 | DialectType::Databricks
16588 ) =>
16589 {
16590 let mut args = f.args;
16591 let unix_ts = args.remove(0);
16592 let fmt_expr = args.remove(0);
16593 match target {
16594 DialectType::DuckDB => {
16595 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
16596 let to_ts = Expression::Function(Box::new(Function::new(
16597 "TO_TIMESTAMP".to_string(),
16598 vec![unix_ts],
16599 )));
16600 if let Expression::Literal(
16601 crate::expressions::Literal::String(s),
16602 ) = &fmt_expr
16603 {
16604 let c_fmt = Self::hive_format_to_c_format(s);
16605 Ok(Expression::Function(Box::new(Function::new(
16606 "STRFTIME".to_string(),
16607 vec![to_ts, Expression::string(&c_fmt)],
16608 ))))
16609 } else {
16610 Ok(Expression::Function(Box::new(Function::new(
16611 "STRFTIME".to_string(),
16612 vec![to_ts, fmt_expr],
16613 ))))
16614 }
16615 }
16616 DialectType::Presto
16617 | DialectType::Trino
16618 | DialectType::Athena => {
16619 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
16620 let from_unix =
16621 Expression::Function(Box::new(Function::new(
16622 "FROM_UNIXTIME".to_string(),
16623 vec![unix_ts],
16624 )));
16625 if let Expression::Literal(
16626 crate::expressions::Literal::String(s),
16627 ) = &fmt_expr
16628 {
16629 let p_fmt = Self::hive_format_to_presto_format(s);
16630 Ok(Expression::Function(Box::new(Function::new(
16631 "DATE_FORMAT".to_string(),
16632 vec![from_unix, Expression::string(&p_fmt)],
16633 ))))
16634 } else {
16635 Ok(Expression::Function(Box::new(Function::new(
16636 "DATE_FORMAT".to_string(),
16637 vec![from_unix, fmt_expr],
16638 ))))
16639 }
16640 }
16641 _ => {
16642 // Keep as FROM_UNIXTIME(x, fmt) for other targets
16643 Ok(Expression::Function(Box::new(Function::new(
16644 "FROM_UNIXTIME".to_string(),
16645 vec![unix_ts, fmt_expr],
16646 ))))
16647 }
16648 }
16649 }
16650 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
16651 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
16652 let unit_str = Self::get_unit_str_static(&f.args[0]);
16653 // Get the raw unit text preserving original case
16654 let raw_unit = match &f.args[0] {
16655 Expression::Identifier(id) => id.name.clone(),
16656 Expression::Literal(crate::expressions::Literal::String(s)) => {
16657 s.clone()
16658 }
16659 Expression::Column(col) => col.name.name.clone(),
16660 _ => unit_str.clone(),
16661 };
16662 match target {
16663 DialectType::TSQL | DialectType::Fabric => {
16664 // Preserve original case of unit for TSQL
16665 let unit_name = match unit_str.as_str() {
16666 "YY" | "YYYY" => "YEAR".to_string(),
16667 "QQ" | "Q" => "QUARTER".to_string(),
16668 "MM" | "M" => "MONTH".to_string(),
16669 "WK" | "WW" => "WEEK".to_string(),
16670 "DD" | "D" | "DY" => "DAY".to_string(),
16671 "HH" => "HOUR".to_string(),
16672 "MI" | "N" => "MINUTE".to_string(),
16673 "SS" | "S" => "SECOND".to_string(),
16674 _ => raw_unit.clone(), // preserve original case
16675 };
16676 let mut args = f.args;
16677 args[0] =
16678 Expression::Identifier(Identifier::new(&unit_name));
16679 Ok(Expression::Function(Box::new(Function::new(
16680 "DATEPART".to_string(),
16681 args,
16682 ))))
16683 }
16684 DialectType::Spark | DialectType::Databricks => {
16685 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
16686 // Preserve original case for non-abbreviation units
16687 let unit = match unit_str.as_str() {
16688 "YY" | "YYYY" => "YEAR".to_string(),
16689 "QQ" | "Q" => "QUARTER".to_string(),
16690 "MM" | "M" => "MONTH".to_string(),
16691 "WK" | "WW" => "WEEK".to_string(),
16692 "DD" | "D" | "DY" => "DAY".to_string(),
16693 "HH" => "HOUR".to_string(),
16694 "MI" | "N" => "MINUTE".to_string(),
16695 "SS" | "S" => "SECOND".to_string(),
16696 _ => raw_unit, // preserve original case
16697 };
16698 Ok(Expression::Extract(Box::new(
16699 crate::expressions::ExtractFunc {
16700 this: f.args[1].clone(),
16701 field: crate::expressions::DateTimeField::Custom(
16702 unit,
16703 ),
16704 },
16705 )))
16706 }
16707 _ => Ok(Expression::Function(Box::new(Function::new(
16708 "DATE_PART".to_string(),
16709 f.args,
16710 )))),
16711 }
16712 }
16713 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
16714 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
16715 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
16716 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
16717 "DATENAME" if f.args.len() == 2 => {
16718 let unit_str = Self::get_unit_str_static(&f.args[0]);
16719 let date_expr = f.args[1].clone();
16720 match unit_str.as_str() {
16721 "MM" | "M" | "MONTH" => match target {
16722 DialectType::TSQL => {
16723 let cast_date = Expression::Cast(Box::new(
16724 crate::expressions::Cast {
16725 this: date_expr,
16726 to: DataType::Custom {
16727 name: "DATETIME2".to_string(),
16728 },
16729 trailing_comments: Vec::new(),
16730 double_colon_syntax: false,
16731 format: None,
16732 default: None,
16733 inferred_type: None,
16734 },
16735 ));
16736 Ok(Expression::Function(Box::new(Function::new(
16737 "FORMAT".to_string(),
16738 vec![cast_date, Expression::string("MMMM")],
16739 ))))
16740 }
16741 DialectType::Spark | DialectType::Databricks => {
16742 let cast_date = Expression::Cast(Box::new(
16743 crate::expressions::Cast {
16744 this: date_expr,
16745 to: DataType::Timestamp {
16746 timezone: false,
16747 precision: None,
16748 },
16749 trailing_comments: Vec::new(),
16750 double_colon_syntax: false,
16751 format: None,
16752 default: None,
16753 inferred_type: None,
16754 },
16755 ));
16756 Ok(Expression::Function(Box::new(Function::new(
16757 "DATE_FORMAT".to_string(),
16758 vec![cast_date, Expression::string("MMMM")],
16759 ))))
16760 }
16761 _ => Ok(Expression::Function(f)),
16762 },
16763 "DW" | "WEEKDAY" => match target {
16764 DialectType::TSQL => {
16765 let cast_date = Expression::Cast(Box::new(
16766 crate::expressions::Cast {
16767 this: date_expr,
16768 to: DataType::Custom {
16769 name: "DATETIME2".to_string(),
16770 },
16771 trailing_comments: Vec::new(),
16772 double_colon_syntax: false,
16773 format: None,
16774 default: None,
16775 inferred_type: None,
16776 },
16777 ));
16778 Ok(Expression::Function(Box::new(Function::new(
16779 "FORMAT".to_string(),
16780 vec![cast_date, Expression::string("dddd")],
16781 ))))
16782 }
16783 DialectType::Spark | DialectType::Databricks => {
16784 let cast_date = Expression::Cast(Box::new(
16785 crate::expressions::Cast {
16786 this: date_expr,
16787 to: DataType::Timestamp {
16788 timezone: false,
16789 precision: None,
16790 },
16791 trailing_comments: Vec::new(),
16792 double_colon_syntax: false,
16793 format: None,
16794 default: None,
16795 inferred_type: None,
16796 },
16797 ));
16798 Ok(Expression::Function(Box::new(Function::new(
16799 "DATE_FORMAT".to_string(),
16800 vec![cast_date, Expression::string("EEEE")],
16801 ))))
16802 }
16803 _ => Ok(Expression::Function(f)),
16804 },
16805 _ => Ok(Expression::Function(f)),
16806 }
16807 }
16808 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
16809 "STRING_AGG" if f.args.len() >= 2 => {
16810 let x = f.args[0].clone();
16811 let sep = f.args[1].clone();
16812 match target {
16813 DialectType::MySQL
16814 | DialectType::SingleStore
16815 | DialectType::Doris
16816 | DialectType::StarRocks => Ok(Expression::GroupConcat(
16817 Box::new(crate::expressions::GroupConcatFunc {
16818 this: x,
16819 separator: Some(sep),
16820 order_by: None,
16821 distinct: false,
16822 filter: None,
16823 inferred_type: None,
16824 }),
16825 )),
16826 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
16827 crate::expressions::GroupConcatFunc {
16828 this: x,
16829 separator: Some(sep),
16830 order_by: None,
16831 distinct: false,
16832 filter: None,
16833 inferred_type: None,
16834 },
16835 ))),
16836 DialectType::PostgreSQL | DialectType::Redshift => {
16837 Ok(Expression::StringAgg(Box::new(
16838 crate::expressions::StringAggFunc {
16839 this: x,
16840 separator: Some(sep),
16841 order_by: None,
16842 distinct: false,
16843 filter: None,
16844 limit: None,
16845 inferred_type: None,
16846 },
16847 )))
16848 }
16849 _ => Ok(Expression::Function(f)),
16850 }
16851 }
16852 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
16853 "JSON_ARRAYAGG" => match target {
16854 DialectType::PostgreSQL => {
16855 Ok(Expression::Function(Box::new(Function {
16856 name: "JSON_AGG".to_string(),
16857 ..(*f)
16858 })))
16859 }
16860 _ => Ok(Expression::Function(f)),
16861 },
16862 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
16863 "SCHEMA_NAME" => match target {
16864 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
16865 crate::expressions::CurrentSchema { this: None },
16866 ))),
16867 DialectType::SQLite => Ok(Expression::string("main")),
16868 _ => Ok(Expression::Function(f)),
16869 },
16870 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
16871 "TO_TIMESTAMP"
16872 if f.args.len() == 2
16873 && matches!(
16874 source,
16875 DialectType::Spark
16876 | DialectType::Databricks
16877 | DialectType::Hive
16878 )
16879 && matches!(target, DialectType::DuckDB) =>
16880 {
16881 let mut args = f.args;
16882 let val = args.remove(0);
16883 let fmt_expr = args.remove(0);
16884 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
16885 // Convert Java/Spark format to C strptime format
16886 fn java_to_c_fmt(fmt: &str) -> String {
16887 let result = fmt
16888 .replace("yyyy", "%Y")
16889 .replace("SSSSSS", "%f")
16890 .replace("EEEE", "%W")
16891 .replace("MM", "%m")
16892 .replace("dd", "%d")
16893 .replace("HH", "%H")
16894 .replace("mm", "%M")
16895 .replace("ss", "%S")
16896 .replace("yy", "%y");
16897 let mut out = String::new();
16898 let chars: Vec<char> = result.chars().collect();
16899 let mut i = 0;
16900 while i < chars.len() {
16901 if chars[i] == '%' && i + 1 < chars.len() {
16902 out.push(chars[i]);
16903 out.push(chars[i + 1]);
16904 i += 2;
16905 } else if chars[i] == 'z' {
16906 out.push_str("%Z");
16907 i += 1;
16908 } else if chars[i] == 'Z' {
16909 out.push_str("%z");
16910 i += 1;
16911 } else {
16912 out.push(chars[i]);
16913 i += 1;
16914 }
16915 }
16916 out
16917 }
16918 let c_fmt = java_to_c_fmt(s);
16919 Ok(Expression::Function(Box::new(Function::new(
16920 "STRPTIME".to_string(),
16921 vec![val, Expression::string(&c_fmt)],
16922 ))))
16923 } else {
16924 Ok(Expression::Function(Box::new(Function::new(
16925 "STRPTIME".to_string(),
16926 vec![val, fmt_expr],
16927 ))))
16928 }
16929 }
16930 // TO_DATE(x) 1-arg from Doris: date conversion
16931 "TO_DATE"
16932 if f.args.len() == 1
16933 && matches!(
16934 source,
16935 DialectType::Doris | DialectType::StarRocks
16936 ) =>
16937 {
16938 let arg = f.args.into_iter().next().unwrap();
16939 match target {
16940 DialectType::Oracle
16941 | DialectType::DuckDB
16942 | DialectType::TSQL => {
16943 // CAST(x AS DATE)
16944 Ok(Expression::Cast(Box::new(Cast {
16945 this: arg,
16946 to: DataType::Date,
16947 double_colon_syntax: false,
16948 trailing_comments: vec![],
16949 format: None,
16950 default: None,
16951 inferred_type: None,
16952 })))
16953 }
16954 DialectType::MySQL | DialectType::SingleStore => {
16955 // DATE(x)
16956 Ok(Expression::Function(Box::new(Function::new(
16957 "DATE".to_string(),
16958 vec![arg],
16959 ))))
16960 }
16961 _ => {
16962 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
16963 Ok(Expression::Function(Box::new(Function::new(
16964 "TO_DATE".to_string(),
16965 vec![arg],
16966 ))))
16967 }
16968 }
16969 }
16970 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
16971 "TO_DATE"
16972 if f.args.len() == 1
16973 && matches!(
16974 source,
16975 DialectType::Spark
16976 | DialectType::Databricks
16977 | DialectType::Hive
16978 ) =>
16979 {
16980 let arg = f.args.into_iter().next().unwrap();
16981 match target {
16982 DialectType::DuckDB => {
16983 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
16984 Ok(Expression::TryCast(Box::new(Cast {
16985 this: arg,
16986 to: DataType::Date,
16987 double_colon_syntax: false,
16988 trailing_comments: vec![],
16989 format: None,
16990 default: None,
16991 inferred_type: None,
16992 })))
16993 }
16994 DialectType::Presto
16995 | DialectType::Trino
16996 | DialectType::Athena => {
16997 // CAST(CAST(x AS TIMESTAMP) AS DATE)
16998 Ok(Self::double_cast_timestamp_date(arg))
16999 }
17000 DialectType::Snowflake => {
17001 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
17002 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
17003 Ok(Expression::Function(Box::new(Function::new(
17004 "TRY_TO_DATE".to_string(),
17005 vec![arg, Expression::string("yyyy-mm-DD")],
17006 ))))
17007 }
17008 _ => {
17009 // Default: keep as TO_DATE(x)
17010 Ok(Expression::Function(Box::new(Function::new(
17011 "TO_DATE".to_string(),
17012 vec![arg],
17013 ))))
17014 }
17015 }
17016 }
17017 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
17018 "TO_DATE"
17019 if f.args.len() == 2
17020 && matches!(
17021 source,
17022 DialectType::Spark
17023 | DialectType::Databricks
17024 | DialectType::Hive
17025 ) =>
17026 {
17027 let mut args = f.args;
17028 let val = args.remove(0);
17029 let fmt_expr = args.remove(0);
17030 let is_default_format = matches!(&fmt_expr, Expression::Literal(Literal::String(s)) if s == "yyyy-MM-dd");
17031
17032 if is_default_format {
17033 // Default format: same as 1-arg form
17034 match target {
17035 DialectType::DuckDB => {
17036 Ok(Expression::TryCast(Box::new(Cast {
17037 this: val,
17038 to: DataType::Date,
17039 double_colon_syntax: false,
17040 trailing_comments: vec![],
17041 format: None,
17042 default: None,
17043 inferred_type: None,
17044 })))
17045 }
17046 DialectType::Presto
17047 | DialectType::Trino
17048 | DialectType::Athena => {
17049 Ok(Self::double_cast_timestamp_date(val))
17050 }
17051 DialectType::Snowflake => {
17052 // TRY_TO_DATE(x, format) with Snowflake format mapping
17053 let sf_fmt = "yyyy-MM-dd"
17054 .replace("yyyy", "yyyy")
17055 .replace("MM", "mm")
17056 .replace("dd", "DD");
17057 Ok(Expression::Function(Box::new(Function::new(
17058 "TRY_TO_DATE".to_string(),
17059 vec![val, Expression::string(&sf_fmt)],
17060 ))))
17061 }
17062 _ => Ok(Expression::Function(Box::new(Function::new(
17063 "TO_DATE".to_string(),
17064 vec![val],
17065 )))),
17066 }
17067 } else {
17068 // Non-default format: use format-based parsing
17069 if let Expression::Literal(Literal::String(ref s)) = fmt_expr {
17070 match target {
17071 DialectType::DuckDB => {
17072 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
17073 fn java_to_c_fmt_todate(fmt: &str) -> String {
17074 let result = fmt
17075 .replace("yyyy", "%Y")
17076 .replace("SSSSSS", "%f")
17077 .replace("EEEE", "%W")
17078 .replace("MM", "%m")
17079 .replace("dd", "%d")
17080 .replace("HH", "%H")
17081 .replace("mm", "%M")
17082 .replace("ss", "%S")
17083 .replace("yy", "%y");
17084 let mut out = String::new();
17085 let chars: Vec<char> = result.chars().collect();
17086 let mut i = 0;
17087 while i < chars.len() {
17088 if chars[i] == '%' && i + 1 < chars.len() {
17089 out.push(chars[i]);
17090 out.push(chars[i + 1]);
17091 i += 2;
17092 } else if chars[i] == 'z' {
17093 out.push_str("%Z");
17094 i += 1;
17095 } else if chars[i] == 'Z' {
17096 out.push_str("%z");
17097 i += 1;
17098 } else {
17099 out.push(chars[i]);
17100 i += 1;
17101 }
17102 }
17103 out
17104 }
17105 let c_fmt = java_to_c_fmt_todate(s);
17106 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
17107 let try_strptime =
17108 Expression::Function(Box::new(Function::new(
17109 "TRY_STRPTIME".to_string(),
17110 vec![val, Expression::string(&c_fmt)],
17111 )));
17112 let cast_ts = Expression::Cast(Box::new(Cast {
17113 this: try_strptime,
17114 to: DataType::Timestamp {
17115 precision: None,
17116 timezone: false,
17117 },
17118 double_colon_syntax: false,
17119 trailing_comments: vec![],
17120 format: None,
17121 default: None,
17122 inferred_type: None,
17123 }));
17124 Ok(Expression::Cast(Box::new(Cast {
17125 this: cast_ts,
17126 to: DataType::Date,
17127 double_colon_syntax: false,
17128 trailing_comments: vec![],
17129 format: None,
17130 default: None,
17131 inferred_type: None,
17132 })))
17133 }
17134 DialectType::Presto
17135 | DialectType::Trino
17136 | DialectType::Athena => {
17137 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
17138 let p_fmt = s
17139 .replace("yyyy", "%Y")
17140 .replace("SSSSSS", "%f")
17141 .replace("MM", "%m")
17142 .replace("dd", "%d")
17143 .replace("HH", "%H")
17144 .replace("mm", "%M")
17145 .replace("ss", "%S")
17146 .replace("yy", "%y");
17147 let date_parse =
17148 Expression::Function(Box::new(Function::new(
17149 "DATE_PARSE".to_string(),
17150 vec![val, Expression::string(&p_fmt)],
17151 )));
17152 Ok(Expression::Cast(Box::new(Cast {
17153 this: date_parse,
17154 to: DataType::Date,
17155 double_colon_syntax: false,
17156 trailing_comments: vec![],
17157 format: None,
17158 default: None,
17159 inferred_type: None,
17160 })))
17161 }
17162 DialectType::Snowflake => {
17163 // TRY_TO_DATE(x, snowflake_fmt)
17164 Ok(Expression::Function(Box::new(Function::new(
17165 "TRY_TO_DATE".to_string(),
17166 vec![val, Expression::string(s)],
17167 ))))
17168 }
17169 _ => Ok(Expression::Function(Box::new(Function::new(
17170 "TO_DATE".to_string(),
17171 vec![val, fmt_expr],
17172 )))),
17173 }
17174 } else {
17175 Ok(Expression::Function(Box::new(Function::new(
17176 "TO_DATE".to_string(),
17177 vec![val, fmt_expr],
17178 ))))
17179 }
17180 }
17181 }
17182 // TO_TIMESTAMP(x) 1-arg: epoch conversion
17183 "TO_TIMESTAMP"
17184 if f.args.len() == 1
17185 && matches!(source, DialectType::DuckDB)
17186 && matches!(
17187 target,
17188 DialectType::BigQuery
17189 | DialectType::Presto
17190 | DialectType::Trino
17191 | DialectType::Hive
17192 | DialectType::Spark
17193 | DialectType::Databricks
17194 | DialectType::Athena
17195 ) =>
17196 {
17197 let arg = f.args.into_iter().next().unwrap();
17198 let func_name = match target {
17199 DialectType::BigQuery => "TIMESTAMP_SECONDS",
17200 DialectType::Presto
17201 | DialectType::Trino
17202 | DialectType::Athena
17203 | DialectType::Hive
17204 | DialectType::Spark
17205 | DialectType::Databricks => "FROM_UNIXTIME",
17206 _ => "TO_TIMESTAMP",
17207 };
17208 Ok(Expression::Function(Box::new(Function::new(
17209 func_name.to_string(),
17210 vec![arg],
17211 ))))
17212 }
17213 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
17214 "CONCAT" if f.args.len() == 1 => {
17215 let arg = f.args.into_iter().next().unwrap();
17216 match target {
17217 DialectType::Presto
17218 | DialectType::Trino
17219 | DialectType::Athena => {
17220 // CONCAT(a) -> CAST(a AS VARCHAR)
17221 Ok(Expression::Cast(Box::new(Cast {
17222 this: arg,
17223 to: DataType::VarChar {
17224 length: None,
17225 parenthesized_length: false,
17226 },
17227 trailing_comments: vec![],
17228 double_colon_syntax: false,
17229 format: None,
17230 default: None,
17231 inferred_type: None,
17232 })))
17233 }
17234 DialectType::TSQL => {
17235 // CONCAT(a) -> a
17236 Ok(arg)
17237 }
17238 DialectType::DuckDB => {
17239 // Keep CONCAT(a) for DuckDB (native support)
17240 Ok(Expression::Function(Box::new(Function::new(
17241 "CONCAT".to_string(),
17242 vec![arg],
17243 ))))
17244 }
17245 DialectType::Spark | DialectType::Databricks => {
17246 let coalesced = Expression::Coalesce(Box::new(
17247 crate::expressions::VarArgFunc {
17248 expressions: vec![arg, Expression::string("")],
17249 original_name: None,
17250 inferred_type: None,
17251 },
17252 ));
17253 Ok(Expression::Function(Box::new(Function::new(
17254 "CONCAT".to_string(),
17255 vec![coalesced],
17256 ))))
17257 }
17258 _ => Ok(Expression::Function(Box::new(Function::new(
17259 "CONCAT".to_string(),
17260 vec![arg],
17261 )))),
17262 }
17263 }
17264 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
17265 "REGEXP_EXTRACT"
17266 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
17267 {
17268 // If group_index is 0, drop it
17269 let drop_group = match &f.args[2] {
17270 Expression::Literal(Literal::Number(n)) => n == "0",
17271 _ => false,
17272 };
17273 if drop_group {
17274 let mut args = f.args;
17275 args.truncate(2);
17276 Ok(Expression::Function(Box::new(Function::new(
17277 "REGEXP_EXTRACT".to_string(),
17278 args,
17279 ))))
17280 } else {
17281 Ok(Expression::Function(f))
17282 }
17283 }
17284 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
17285 "REGEXP_EXTRACT"
17286 if f.args.len() == 4
17287 && matches!(target, DialectType::Snowflake) =>
17288 {
17289 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
17290 let mut args = f.args;
17291 let this = args.remove(0);
17292 let pattern = args.remove(0);
17293 let group = args.remove(0);
17294 let flags = args.remove(0);
17295 Ok(Expression::Function(Box::new(Function::new(
17296 "REGEXP_SUBSTR".to_string(),
17297 vec![
17298 this,
17299 pattern,
17300 Expression::number(1),
17301 Expression::number(1),
17302 flags,
17303 group,
17304 ],
17305 ))))
17306 }
17307 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
17308 "REGEXP_SUBSTR"
17309 if f.args.len() == 3
17310 && matches!(
17311 target,
17312 DialectType::DuckDB
17313 | DialectType::Presto
17314 | DialectType::Trino
17315 | DialectType::Spark
17316 | DialectType::Databricks
17317 ) =>
17318 {
17319 let mut args = f.args;
17320 let this = args.remove(0);
17321 let pattern = args.remove(0);
17322 let position = args.remove(0);
17323 // Wrap subject in SUBSTRING(this, position) to apply the offset
17324 let substring_expr = Expression::Function(Box::new(Function::new(
17325 "SUBSTRING".to_string(),
17326 vec![this, position],
17327 )));
17328 let target_name = match target {
17329 DialectType::DuckDB => "REGEXP_EXTRACT",
17330 _ => "REGEXP_EXTRACT",
17331 };
17332 Ok(Expression::Function(Box::new(Function::new(
17333 target_name.to_string(),
17334 vec![substring_expr, pattern],
17335 ))))
17336 }
17337 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
17338 "TO_DAYS" if f.args.len() == 1 => {
17339 let x = f.args.into_iter().next().unwrap();
17340 let epoch = Expression::string("0000-01-01");
17341 // Build the final target-specific expression directly
17342 let datediff_expr = match target {
17343 DialectType::MySQL | DialectType::SingleStore => {
17344 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
17345 Expression::Function(Box::new(Function::new(
17346 "DATEDIFF".to_string(),
17347 vec![x, epoch],
17348 )))
17349 }
17350 DialectType::DuckDB => {
17351 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
17352 let cast_epoch = Expression::Cast(Box::new(Cast {
17353 this: epoch,
17354 to: DataType::Date,
17355 trailing_comments: Vec::new(),
17356 double_colon_syntax: false,
17357 format: None,
17358 default: None,
17359 inferred_type: None,
17360 }));
17361 let cast_x = Expression::Cast(Box::new(Cast {
17362 this: x,
17363 to: DataType::Date,
17364 trailing_comments: Vec::new(),
17365 double_colon_syntax: false,
17366 format: None,
17367 default: None,
17368 inferred_type: None,
17369 }));
17370 Expression::Function(Box::new(Function::new(
17371 "DATE_DIFF".to_string(),
17372 vec![Expression::string("DAY"), cast_epoch, cast_x],
17373 )))
17374 }
17375 DialectType::Presto
17376 | DialectType::Trino
17377 | DialectType::Athena => {
17378 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
17379 let cast_epoch = Self::double_cast_timestamp_date(epoch);
17380 let cast_x = Self::double_cast_timestamp_date(x);
17381 Expression::Function(Box::new(Function::new(
17382 "DATE_DIFF".to_string(),
17383 vec![Expression::string("DAY"), cast_epoch, cast_x],
17384 )))
17385 }
17386 _ => {
17387 // Default: (DATEDIFF(x, '0000-01-01') + 1)
17388 Expression::Function(Box::new(Function::new(
17389 "DATEDIFF".to_string(),
17390 vec![x, epoch],
17391 )))
17392 }
17393 };
17394 let add_one = Expression::Add(Box::new(BinaryOp::new(
17395 datediff_expr,
17396 Expression::number(1),
17397 )));
17398 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
17399 this: add_one,
17400 trailing_comments: Vec::new(),
17401 })))
17402 }
17403 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
17404 "STR_TO_DATE"
17405 if f.args.len() == 2
17406 && matches!(
17407 target,
17408 DialectType::Presto | DialectType::Trino
17409 ) =>
17410 {
17411 let mut args = f.args;
17412 let x = args.remove(0);
17413 let format_expr = args.remove(0);
17414 // Check if the format contains time components
17415 let has_time =
17416 if let Expression::Literal(Literal::String(ref fmt)) =
17417 format_expr
17418 {
17419 fmt.contains("%H")
17420 || fmt.contains("%T")
17421 || fmt.contains("%M")
17422 || fmt.contains("%S")
17423 || fmt.contains("%I")
17424 || fmt.contains("%p")
17425 } else {
17426 false
17427 };
17428 let date_parse = Expression::Function(Box::new(Function::new(
17429 "DATE_PARSE".to_string(),
17430 vec![x, format_expr],
17431 )));
17432 if has_time {
17433 // Has time components: just DATE_PARSE
17434 Ok(date_parse)
17435 } else {
17436 // Date-only: CAST(DATE_PARSE(...) AS DATE)
17437 Ok(Expression::Cast(Box::new(Cast {
17438 this: date_parse,
17439 to: DataType::Date,
17440 trailing_comments: Vec::new(),
17441 double_colon_syntax: false,
17442 format: None,
17443 default: None,
17444 inferred_type: None,
17445 })))
17446 }
17447 }
17448 "STR_TO_DATE"
17449 if f.args.len() == 2
17450 && matches!(
17451 target,
17452 DialectType::PostgreSQL | DialectType::Redshift
17453 ) =>
17454 {
17455 let mut args = f.args;
17456 let x = args.remove(0);
17457 let fmt = args.remove(0);
17458 let pg_fmt = match fmt {
17459 Expression::Literal(Literal::String(s)) => Expression::string(
17460 &s.replace("%Y", "YYYY")
17461 .replace("%m", "MM")
17462 .replace("%d", "DD")
17463 .replace("%H", "HH24")
17464 .replace("%M", "MI")
17465 .replace("%S", "SS"),
17466 ),
17467 other => other,
17468 };
17469 let to_date = Expression::Function(Box::new(Function::new(
17470 "TO_DATE".to_string(),
17471 vec![x, pg_fmt],
17472 )));
17473 Ok(Expression::Cast(Box::new(Cast {
17474 this: to_date,
17475 to: DataType::Timestamp {
17476 timezone: false,
17477 precision: None,
17478 },
17479 trailing_comments: Vec::new(),
17480 double_colon_syntax: false,
17481 format: None,
17482 default: None,
17483 inferred_type: None,
17484 })))
17485 }
17486 // RANGE(start, end) -> GENERATE_SERIES for SQLite
17487 "RANGE"
17488 if (f.args.len() == 1 || f.args.len() == 2)
17489 && matches!(target, DialectType::SQLite) =>
17490 {
17491 if f.args.len() == 2 {
17492 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
17493 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
17494 let mut args = f.args;
17495 let start = args.remove(0);
17496 let end = args.remove(0);
17497 Ok(Expression::Function(Box::new(Function::new(
17498 "GENERATE_SERIES".to_string(),
17499 vec![start, end],
17500 ))))
17501 } else {
17502 Ok(Expression::Function(f))
17503 }
17504 }
17505 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
17506 // When source is Snowflake, keep as-is (args already in correct form)
17507 "UNIFORM"
17508 if matches!(target, DialectType::Snowflake)
17509 && (f.args.len() == 2 || f.args.len() == 3) =>
17510 {
17511 if matches!(source, DialectType::Snowflake) {
17512 // Snowflake -> Snowflake: keep as-is
17513 Ok(Expression::Function(f))
17514 } else {
17515 let mut args = f.args;
17516 let low = args.remove(0);
17517 let high = args.remove(0);
17518 let random = if !args.is_empty() {
17519 let seed = args.remove(0);
17520 Expression::Function(Box::new(Function::new(
17521 "RANDOM".to_string(),
17522 vec![seed],
17523 )))
17524 } else {
17525 Expression::Function(Box::new(Function::new(
17526 "RANDOM".to_string(),
17527 vec![],
17528 )))
17529 };
17530 Ok(Expression::Function(Box::new(Function::new(
17531 "UNIFORM".to_string(),
17532 vec![low, high, random],
17533 ))))
17534 }
17535 }
17536 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
17537 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
17538 let mut args = f.args;
17539 let ts_arg = args.remove(0);
17540 let tz_arg = args.remove(0);
17541 // Cast string literal to TIMESTAMP for all targets
17542 let ts_cast =
17543 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
17544 Expression::Cast(Box::new(Cast {
17545 this: ts_arg,
17546 to: DataType::Timestamp {
17547 timezone: false,
17548 precision: None,
17549 },
17550 trailing_comments: vec![],
17551 double_colon_syntax: false,
17552 format: None,
17553 default: None,
17554 inferred_type: None,
17555 }))
17556 } else {
17557 ts_arg
17558 };
17559 match target {
17560 DialectType::Spark | DialectType::Databricks => {
17561 Ok(Expression::Function(Box::new(Function::new(
17562 "TO_UTC_TIMESTAMP".to_string(),
17563 vec![ts_cast, tz_arg],
17564 ))))
17565 }
17566 DialectType::Snowflake => {
17567 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
17568 Ok(Expression::Function(Box::new(Function::new(
17569 "CONVERT_TIMEZONE".to_string(),
17570 vec![tz_arg, Expression::string("UTC"), ts_cast],
17571 ))))
17572 }
17573 DialectType::Presto
17574 | DialectType::Trino
17575 | DialectType::Athena => {
17576 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
17577 let wtz = Expression::Function(Box::new(Function::new(
17578 "WITH_TIMEZONE".to_string(),
17579 vec![ts_cast, tz_arg],
17580 )));
17581 Ok(Expression::AtTimeZone(Box::new(
17582 crate::expressions::AtTimeZone {
17583 this: wtz,
17584 zone: Expression::string("UTC"),
17585 },
17586 )))
17587 }
17588 DialectType::BigQuery => {
17589 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
17590 let cast_dt = Expression::Cast(Box::new(Cast {
17591 this: if let Expression::Cast(c) = ts_cast {
17592 c.this
17593 } else {
17594 ts_cast.clone()
17595 },
17596 to: DataType::Custom {
17597 name: "DATETIME".to_string(),
17598 },
17599 trailing_comments: vec![],
17600 double_colon_syntax: false,
17601 format: None,
17602 default: None,
17603 inferred_type: None,
17604 }));
17605 let ts_func =
17606 Expression::Function(Box::new(Function::new(
17607 "TIMESTAMP".to_string(),
17608 vec![cast_dt, tz_arg],
17609 )));
17610 Ok(Expression::Function(Box::new(Function::new(
17611 "DATETIME".to_string(),
17612 vec![ts_func, Expression::string("UTC")],
17613 ))))
17614 }
17615 _ => {
17616 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
17617 let atz1 = Expression::AtTimeZone(Box::new(
17618 crate::expressions::AtTimeZone {
17619 this: ts_cast,
17620 zone: tz_arg,
17621 },
17622 ));
17623 Ok(Expression::AtTimeZone(Box::new(
17624 crate::expressions::AtTimeZone {
17625 this: atz1,
17626 zone: Expression::string("UTC"),
17627 },
17628 )))
17629 }
17630 }
17631 }
17632 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
17633 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
17634 let mut args = f.args;
17635 let ts_arg = args.remove(0);
17636 let tz_arg = args.remove(0);
17637 // Cast string literal to TIMESTAMP
17638 let ts_cast =
17639 if matches!(&ts_arg, Expression::Literal(Literal::String(_))) {
17640 Expression::Cast(Box::new(Cast {
17641 this: ts_arg,
17642 to: DataType::Timestamp {
17643 timezone: false,
17644 precision: None,
17645 },
17646 trailing_comments: vec![],
17647 double_colon_syntax: false,
17648 format: None,
17649 default: None,
17650 inferred_type: None,
17651 }))
17652 } else {
17653 ts_arg
17654 };
17655 match target {
17656 DialectType::Spark | DialectType::Databricks => {
17657 Ok(Expression::Function(Box::new(Function::new(
17658 "FROM_UTC_TIMESTAMP".to_string(),
17659 vec![ts_cast, tz_arg],
17660 ))))
17661 }
17662 DialectType::Presto
17663 | DialectType::Trino
17664 | DialectType::Athena => {
17665 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
17666 Ok(Expression::Function(Box::new(Function::new(
17667 "AT_TIMEZONE".to_string(),
17668 vec![ts_cast, tz_arg],
17669 ))))
17670 }
17671 DialectType::Snowflake => {
17672 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
17673 Ok(Expression::Function(Box::new(Function::new(
17674 "CONVERT_TIMEZONE".to_string(),
17675 vec![Expression::string("UTC"), tz_arg, ts_cast],
17676 ))))
17677 }
17678 _ => {
17679 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
17680 Ok(Expression::AtTimeZone(Box::new(
17681 crate::expressions::AtTimeZone {
17682 this: ts_cast,
17683 zone: tz_arg,
17684 },
17685 )))
17686 }
17687 }
17688 }
17689 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
17690 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
17691 let name = match target {
17692 DialectType::Snowflake => "OBJECT_CONSTRUCT",
17693 _ => "MAP",
17694 };
17695 Ok(Expression::Function(Box::new(Function::new(
17696 name.to_string(),
17697 f.args,
17698 ))))
17699 }
17700 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
17701 "STR_TO_MAP" if f.args.len() >= 1 => match target {
17702 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
17703 Ok(Expression::Function(Box::new(Function::new(
17704 "SPLIT_TO_MAP".to_string(),
17705 f.args,
17706 ))))
17707 }
17708 _ => Ok(Expression::Function(f)),
17709 },
17710 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
17711 "TIME_TO_STR" if f.args.len() == 2 => {
17712 let mut args = f.args;
17713 let this = args.remove(0);
17714 let fmt_expr = args.remove(0);
17715 let format =
17716 if let Expression::Literal(Literal::String(s)) = fmt_expr {
17717 s
17718 } else {
17719 "%Y-%m-%d %H:%M:%S".to_string()
17720 };
17721 Ok(Expression::TimeToStr(Box::new(
17722 crate::expressions::TimeToStr {
17723 this: Box::new(this),
17724 format,
17725 culture: None,
17726 zone: None,
17727 },
17728 )))
17729 }
17730 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
17731 "STR_TO_TIME" if f.args.len() == 2 => {
17732 let mut args = f.args;
17733 let this = args.remove(0);
17734 let fmt_expr = args.remove(0);
17735 let format =
17736 if let Expression::Literal(Literal::String(s)) = fmt_expr {
17737 s
17738 } else {
17739 "%Y-%m-%d %H:%M:%S".to_string()
17740 };
17741 Ok(Expression::StrToTime(Box::new(
17742 crate::expressions::StrToTime {
17743 this: Box::new(this),
17744 format,
17745 zone: None,
17746 safe: None,
17747 target_type: None,
17748 },
17749 )))
17750 }
17751 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
17752 "STR_TO_UNIX" if f.args.len() >= 1 => {
17753 let mut args = f.args;
17754 let this = args.remove(0);
17755 let format = if !args.is_empty() {
17756 if let Expression::Literal(Literal::String(s)) = args.remove(0)
17757 {
17758 Some(s)
17759 } else {
17760 None
17761 }
17762 } else {
17763 None
17764 };
17765 Ok(Expression::StrToUnix(Box::new(
17766 crate::expressions::StrToUnix {
17767 this: Some(Box::new(this)),
17768 format,
17769 },
17770 )))
17771 }
17772 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
17773 "TIME_TO_UNIX" if f.args.len() == 1 => {
17774 let mut args = f.args;
17775 let this = args.remove(0);
17776 Ok(Expression::TimeToUnix(Box::new(
17777 crate::expressions::UnaryFunc {
17778 this,
17779 original_name: None,
17780 inferred_type: None,
17781 },
17782 )))
17783 }
17784 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
17785 "UNIX_TO_STR" if f.args.len() >= 1 => {
17786 let mut args = f.args;
17787 let this = args.remove(0);
17788 let format = if !args.is_empty() {
17789 if let Expression::Literal(Literal::String(s)) = args.remove(0)
17790 {
17791 Some(s)
17792 } else {
17793 None
17794 }
17795 } else {
17796 None
17797 };
17798 Ok(Expression::UnixToStr(Box::new(
17799 crate::expressions::UnixToStr {
17800 this: Box::new(this),
17801 format,
17802 },
17803 )))
17804 }
17805 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
17806 "UNIX_TO_TIME" if f.args.len() == 1 => {
17807 let mut args = f.args;
17808 let this = args.remove(0);
17809 Ok(Expression::UnixToTime(Box::new(
17810 crate::expressions::UnixToTime {
17811 this: Box::new(this),
17812 scale: None,
17813 zone: None,
17814 hours: None,
17815 minutes: None,
17816 format: None,
17817 target_type: None,
17818 },
17819 )))
17820 }
17821 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
17822 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
17823 let mut args = f.args;
17824 let this = args.remove(0);
17825 Ok(Expression::TimeStrToDate(Box::new(
17826 crate::expressions::UnaryFunc {
17827 this,
17828 original_name: None,
17829 inferred_type: None,
17830 },
17831 )))
17832 }
17833 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
17834 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
17835 let mut args = f.args;
17836 let this = args.remove(0);
17837 Ok(Expression::TimeStrToTime(Box::new(
17838 crate::expressions::TimeStrToTime {
17839 this: Box::new(this),
17840 zone: None,
17841 },
17842 )))
17843 }
17844 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
17845 "MONTHS_BETWEEN" if f.args.len() == 2 => {
17846 match target {
17847 DialectType::DuckDB => {
17848 let mut args = f.args;
17849 let end_date = args.remove(0);
17850 let start_date = args.remove(0);
17851 let cast_end = Self::ensure_cast_date(end_date);
17852 let cast_start = Self::ensure_cast_date(start_date);
17853 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
17854 let dd = Expression::Function(Box::new(Function::new(
17855 "DATE_DIFF".to_string(),
17856 vec![
17857 Expression::string("MONTH"),
17858 cast_start.clone(),
17859 cast_end.clone(),
17860 ],
17861 )));
17862 let day_end =
17863 Expression::Function(Box::new(Function::new(
17864 "DAY".to_string(),
17865 vec![cast_end.clone()],
17866 )));
17867 let day_start =
17868 Expression::Function(Box::new(Function::new(
17869 "DAY".to_string(),
17870 vec![cast_start.clone()],
17871 )));
17872 let last_day_end =
17873 Expression::Function(Box::new(Function::new(
17874 "LAST_DAY".to_string(),
17875 vec![cast_end.clone()],
17876 )));
17877 let last_day_start =
17878 Expression::Function(Box::new(Function::new(
17879 "LAST_DAY".to_string(),
17880 vec![cast_start.clone()],
17881 )));
17882 let day_last_end = Expression::Function(Box::new(
17883 Function::new("DAY".to_string(), vec![last_day_end]),
17884 ));
17885 let day_last_start = Expression::Function(Box::new(
17886 Function::new("DAY".to_string(), vec![last_day_start]),
17887 ));
17888 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
17889 day_end.clone(),
17890 day_last_end,
17891 )));
17892 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
17893 day_start.clone(),
17894 day_last_start,
17895 )));
17896 let both_cond =
17897 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
17898 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
17899 day_end, day_start,
17900 )));
17901 let day_diff_paren = Expression::Paren(Box::new(
17902 crate::expressions::Paren {
17903 this: day_diff,
17904 trailing_comments: Vec::new(),
17905 },
17906 ));
17907 let frac = Expression::Div(Box::new(BinaryOp::new(
17908 day_diff_paren,
17909 Expression::Literal(Literal::Number(
17910 "31.0".to_string(),
17911 )),
17912 )));
17913 let case_expr = Expression::Case(Box::new(Case {
17914 operand: None,
17915 whens: vec![(both_cond, Expression::number(0))],
17916 else_: Some(frac),
17917 comments: Vec::new(),
17918 inferred_type: None,
17919 }));
17920 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
17921 }
17922 DialectType::Snowflake | DialectType::Redshift => {
17923 let mut args = f.args;
17924 let end_date = args.remove(0);
17925 let start_date = args.remove(0);
17926 let unit = Expression::Identifier(Identifier::new("MONTH"));
17927 Ok(Expression::Function(Box::new(Function::new(
17928 "DATEDIFF".to_string(),
17929 vec![unit, start_date, end_date],
17930 ))))
17931 }
17932 DialectType::Presto
17933 | DialectType::Trino
17934 | DialectType::Athena => {
17935 let mut args = f.args;
17936 let end_date = args.remove(0);
17937 let start_date = args.remove(0);
17938 Ok(Expression::Function(Box::new(Function::new(
17939 "DATE_DIFF".to_string(),
17940 vec![Expression::string("MONTH"), start_date, end_date],
17941 ))))
17942 }
17943 _ => Ok(Expression::Function(f)),
17944 }
17945 }
17946 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
17947 // Drop the roundOff arg for non-Spark targets, keep it for Spark
17948 "MONTHS_BETWEEN" if f.args.len() == 3 => {
17949 match target {
17950 DialectType::Spark | DialectType::Databricks => {
17951 Ok(Expression::Function(f))
17952 }
17953 _ => {
17954 // Drop the 3rd arg and delegate to the 2-arg logic
17955 let mut args = f.args;
17956 let end_date = args.remove(0);
17957 let start_date = args.remove(0);
17958 // Re-create as 2-arg and process
17959 let f2 = Function::new(
17960 "MONTHS_BETWEEN".to_string(),
17961 vec![end_date, start_date],
17962 );
17963 let e2 = Expression::Function(Box::new(f2));
17964 Self::cross_dialect_normalize(e2, source, target)
17965 }
17966 }
17967 }
17968 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
17969 "TO_TIMESTAMP"
17970 if f.args.len() == 1
17971 && matches!(
17972 source,
17973 DialectType::Spark
17974 | DialectType::Databricks
17975 | DialectType::Hive
17976 ) =>
17977 {
17978 let arg = f.args.into_iter().next().unwrap();
17979 Ok(Expression::Cast(Box::new(Cast {
17980 this: arg,
17981 to: DataType::Timestamp {
17982 timezone: false,
17983 precision: None,
17984 },
17985 trailing_comments: vec![],
17986 double_colon_syntax: false,
17987 format: None,
17988 default: None,
17989 inferred_type: None,
17990 })))
17991 }
17992 // STRING(x) -> CAST(x AS STRING) for Spark target
17993 "STRING"
17994 if f.args.len() == 1
17995 && matches!(
17996 source,
17997 DialectType::Spark | DialectType::Databricks
17998 ) =>
17999 {
18000 let arg = f.args.into_iter().next().unwrap();
18001 let dt = match target {
18002 DialectType::Spark
18003 | DialectType::Databricks
18004 | DialectType::Hive => DataType::Custom {
18005 name: "STRING".to_string(),
18006 },
18007 _ => DataType::Text,
18008 };
18009 Ok(Expression::Cast(Box::new(Cast {
18010 this: arg,
18011 to: dt,
18012 trailing_comments: vec![],
18013 double_colon_syntax: false,
18014 format: None,
18015 default: None,
18016 inferred_type: None,
18017 })))
18018 }
18019 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
18020 "LOGICAL_OR" if f.args.len() == 1 => {
18021 let name = match target {
18022 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
18023 _ => "LOGICAL_OR",
18024 };
18025 Ok(Expression::Function(Box::new(Function::new(
18026 name.to_string(),
18027 f.args,
18028 ))))
18029 }
18030 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
18031 "SPLIT"
18032 if f.args.len() == 2
18033 && matches!(
18034 source,
18035 DialectType::Spark
18036 | DialectType::Databricks
18037 | DialectType::Hive
18038 ) =>
18039 {
18040 let name = match target {
18041 DialectType::DuckDB => "STR_SPLIT_REGEX",
18042 DialectType::Presto
18043 | DialectType::Trino
18044 | DialectType::Athena => "REGEXP_SPLIT",
18045 DialectType::Spark
18046 | DialectType::Databricks
18047 | DialectType::Hive => "SPLIT",
18048 _ => "SPLIT",
18049 };
18050 Ok(Expression::Function(Box::new(Function::new(
18051 name.to_string(),
18052 f.args,
18053 ))))
18054 }
18055 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
18056 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
18057 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18058 Ok(Expression::Function(Box::new(Function::new(
18059 "ELEMENT_AT".to_string(),
18060 f.args,
18061 ))))
18062 }
18063 DialectType::DuckDB => {
18064 let mut args = f.args;
18065 let arr = args.remove(0);
18066 let idx = args.remove(0);
18067 Ok(Expression::Subscript(Box::new(
18068 crate::expressions::Subscript {
18069 this: arr,
18070 index: idx,
18071 },
18072 )))
18073 }
18074 _ => Ok(Expression::Function(f)),
18075 },
18076 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
18077 "ARRAY_FILTER" if f.args.len() == 2 => {
18078 let name = match target {
18079 DialectType::DuckDB => "LIST_FILTER",
18080 DialectType::StarRocks => "ARRAY_FILTER",
18081 _ => "FILTER",
18082 };
18083 Ok(Expression::Function(Box::new(Function::new(
18084 name.to_string(),
18085 f.args,
18086 ))))
18087 }
18088 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
18089 "FILTER" if f.args.len() == 2 => {
18090 let name = match target {
18091 DialectType::DuckDB => "LIST_FILTER",
18092 DialectType::StarRocks => "ARRAY_FILTER",
18093 _ => "FILTER",
18094 };
18095 Ok(Expression::Function(Box::new(Function::new(
18096 name.to_string(),
18097 f.args,
18098 ))))
18099 }
18100 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
18101 "REDUCE" if f.args.len() >= 3 => {
18102 let name = match target {
18103 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
18104 _ => "REDUCE",
18105 };
18106 Ok(Expression::Function(Box::new(Function::new(
18107 name.to_string(),
18108 f.args,
18109 ))))
18110 }
18111 // CURRENT_SCHEMA() -> dialect-specific
18112 "CURRENT_SCHEMA" => {
18113 match target {
18114 DialectType::PostgreSQL => {
18115 // PostgreSQL: CURRENT_SCHEMA (no parens)
18116 Ok(Expression::Function(Box::new(Function {
18117 name: "CURRENT_SCHEMA".to_string(),
18118 args: vec![],
18119 distinct: false,
18120 trailing_comments: vec![],
18121 use_bracket_syntax: false,
18122 no_parens: true,
18123 quoted: false,
18124 span: None,
18125 inferred_type: None,
18126 })))
18127 }
18128 DialectType::MySQL
18129 | DialectType::Doris
18130 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
18131 Function::new("SCHEMA".to_string(), vec![]),
18132 ))),
18133 DialectType::TSQL => Ok(Expression::Function(Box::new(
18134 Function::new("SCHEMA_NAME".to_string(), vec![]),
18135 ))),
18136 DialectType::SQLite => {
18137 Ok(Expression::Literal(Literal::String("main".to_string())))
18138 }
18139 _ => Ok(Expression::Function(f)),
18140 }
18141 }
18142 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
18143 "LTRIM" if f.args.len() == 2 => match target {
18144 DialectType::Spark
18145 | DialectType::Hive
18146 | DialectType::Databricks
18147 | DialectType::ClickHouse => {
18148 let mut args = f.args;
18149 let str_expr = args.remove(0);
18150 let chars = args.remove(0);
18151 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
18152 this: str_expr,
18153 characters: Some(chars),
18154 position: crate::expressions::TrimPosition::Leading,
18155 sql_standard_syntax: true,
18156 position_explicit: true,
18157 })))
18158 }
18159 _ => Ok(Expression::Function(f)),
18160 },
18161 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
18162 "RTRIM" if f.args.len() == 2 => match target {
18163 DialectType::Spark
18164 | DialectType::Hive
18165 | DialectType::Databricks
18166 | DialectType::ClickHouse => {
18167 let mut args = f.args;
18168 let str_expr = args.remove(0);
18169 let chars = args.remove(0);
18170 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
18171 this: str_expr,
18172 characters: Some(chars),
18173 position: crate::expressions::TrimPosition::Trailing,
18174 sql_standard_syntax: true,
18175 position_explicit: true,
18176 })))
18177 }
18178 _ => Ok(Expression::Function(f)),
18179 },
18180 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
18181 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
18182 DialectType::ClickHouse => {
18183 let mut new_f = *f;
18184 new_f.name = "arrayReverse".to_string();
18185 Ok(Expression::Function(Box::new(new_f)))
18186 }
18187 _ => Ok(Expression::Function(f)),
18188 },
18189 // UUID() -> NEWID() for TSQL
18190 "UUID" if f.args.is_empty() => match target {
18191 DialectType::TSQL | DialectType::Fabric => {
18192 Ok(Expression::Function(Box::new(Function::new(
18193 "NEWID".to_string(),
18194 vec![],
18195 ))))
18196 }
18197 _ => Ok(Expression::Function(f)),
18198 },
18199 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
18200 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
18201 DialectType::ClickHouse => {
18202 let mut new_f = *f;
18203 new_f.name = "farmFingerprint64".to_string();
18204 Ok(Expression::Function(Box::new(new_f)))
18205 }
18206 DialectType::Redshift => {
18207 let mut new_f = *f;
18208 new_f.name = "FARMFINGERPRINT64".to_string();
18209 Ok(Expression::Function(Box::new(new_f)))
18210 }
18211 _ => Ok(Expression::Function(f)),
18212 },
18213 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
18214 "JSON_KEYS" => match target {
18215 DialectType::Databricks | DialectType::Spark => {
18216 let mut new_f = *f;
18217 new_f.name = "JSON_OBJECT_KEYS".to_string();
18218 Ok(Expression::Function(Box::new(new_f)))
18219 }
18220 DialectType::Snowflake => {
18221 let mut new_f = *f;
18222 new_f.name = "OBJECT_KEYS".to_string();
18223 Ok(Expression::Function(Box::new(new_f)))
18224 }
18225 _ => Ok(Expression::Function(f)),
18226 },
18227 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
18228 "WEEKOFYEAR" => match target {
18229 DialectType::Snowflake => {
18230 let mut new_f = *f;
18231 new_f.name = "WEEKISO".to_string();
18232 Ok(Expression::Function(Box::new(new_f)))
18233 }
18234 _ => Ok(Expression::Function(f)),
18235 },
18236 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
18237 "FORMAT"
18238 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
18239 {
18240 match target {
18241 DialectType::Databricks | DialectType::Spark => {
18242 let mut new_f = *f;
18243 new_f.name = "FORMAT_STRING".to_string();
18244 Ok(Expression::Function(Box::new(new_f)))
18245 }
18246 _ => Ok(Expression::Function(f)),
18247 }
18248 }
18249 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
18250 "CONCAT_WS" if f.args.len() >= 2 => match target {
18251 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18252 let mut args = f.args;
18253 let sep = args.remove(0);
18254 let cast_args: Vec<Expression> = args
18255 .into_iter()
18256 .map(|a| {
18257 Expression::Cast(Box::new(Cast {
18258 this: a,
18259 to: DataType::VarChar {
18260 length: None,
18261 parenthesized_length: false,
18262 },
18263 double_colon_syntax: false,
18264 trailing_comments: Vec::new(),
18265 format: None,
18266 default: None,
18267 inferred_type: None,
18268 }))
18269 })
18270 .collect();
18271 let mut new_args = vec![sep];
18272 new_args.extend(cast_args);
18273 Ok(Expression::Function(Box::new(Function::new(
18274 "CONCAT_WS".to_string(),
18275 new_args,
18276 ))))
18277 }
18278 _ => Ok(Expression::Function(f)),
18279 },
18280 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
18281 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
18282 DialectType::Presto
18283 | DialectType::Trino
18284 | DialectType::Athena
18285 | DialectType::Databricks
18286 | DialectType::Spark => {
18287 let mut new_f = *f;
18288 new_f.name = "SLICE".to_string();
18289 Ok(Expression::Function(Box::new(new_f)))
18290 }
18291 DialectType::ClickHouse => {
18292 let mut new_f = *f;
18293 new_f.name = "arraySlice".to_string();
18294 Ok(Expression::Function(Box::new(new_f)))
18295 }
18296 _ => Ok(Expression::Function(f)),
18297 },
18298 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
18299 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
18300 DialectType::DuckDB => {
18301 let mut args = f.args;
18302 let arr = args.remove(0);
18303 let val = args.remove(0);
18304 Ok(Expression::Function(Box::new(Function::new(
18305 "LIST_PREPEND".to_string(),
18306 vec![val, arr],
18307 ))))
18308 }
18309 _ => Ok(Expression::Function(f)),
18310 },
18311 // ARRAY_REMOVE(arr, target) -> dialect-specific
18312 "ARRAY_REMOVE" if f.args.len() == 2 => {
18313 match target {
18314 DialectType::DuckDB => {
18315 let mut args = f.args;
18316 let arr = args.remove(0);
18317 let target_val = args.remove(0);
18318 let u_id = crate::expressions::Identifier::new("_u");
18319 // LIST_FILTER(arr, _u -> _u <> target)
18320 let lambda = Expression::Lambda(Box::new(
18321 crate::expressions::LambdaExpr {
18322 parameters: vec![u_id.clone()],
18323 body: Expression::Neq(Box::new(BinaryOp {
18324 left: Expression::Identifier(u_id),
18325 right: target_val,
18326 left_comments: Vec::new(),
18327 operator_comments: Vec::new(),
18328 trailing_comments: Vec::new(),
18329 inferred_type: None,
18330 })),
18331 colon: false,
18332 parameter_types: Vec::new(),
18333 },
18334 ));
18335 Ok(Expression::Function(Box::new(Function::new(
18336 "LIST_FILTER".to_string(),
18337 vec![arr, lambda],
18338 ))))
18339 }
18340 DialectType::ClickHouse => {
18341 let mut args = f.args;
18342 let arr = args.remove(0);
18343 let target_val = args.remove(0);
18344 let u_id = crate::expressions::Identifier::new("_u");
18345 // arrayFilter(_u -> _u <> target, arr)
18346 let lambda = Expression::Lambda(Box::new(
18347 crate::expressions::LambdaExpr {
18348 parameters: vec![u_id.clone()],
18349 body: Expression::Neq(Box::new(BinaryOp {
18350 left: Expression::Identifier(u_id),
18351 right: target_val,
18352 left_comments: Vec::new(),
18353 operator_comments: Vec::new(),
18354 trailing_comments: Vec::new(),
18355 inferred_type: None,
18356 })),
18357 colon: false,
18358 parameter_types: Vec::new(),
18359 },
18360 ));
18361 Ok(Expression::Function(Box::new(Function::new(
18362 "arrayFilter".to_string(),
18363 vec![lambda, arr],
18364 ))))
18365 }
18366 DialectType::BigQuery => {
18367 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
18368 let mut args = f.args;
18369 let arr = args.remove(0);
18370 let target_val = args.remove(0);
18371 let u_id = crate::expressions::Identifier::new("_u");
18372 let u_col =
18373 Expression::Column(Box::new(crate::expressions::Column {
18374 name: u_id.clone(),
18375 table: None,
18376 join_mark: false,
18377 trailing_comments: Vec::new(),
18378 span: None,
18379 inferred_type: None,
18380 }));
18381 // UNNEST(the_array) AS _u
18382 let unnest_expr = Expression::Unnest(Box::new(
18383 crate::expressions::UnnestFunc {
18384 this: arr,
18385 expressions: Vec::new(),
18386 with_ordinality: false,
18387 alias: None,
18388 offset_alias: None,
18389 },
18390 ));
18391 let aliased_unnest = Expression::Alias(Box::new(
18392 crate::expressions::Alias {
18393 this: unnest_expr,
18394 alias: u_id.clone(),
18395 column_aliases: Vec::new(),
18396 pre_alias_comments: Vec::new(),
18397 trailing_comments: Vec::new(),
18398 inferred_type: None,
18399 },
18400 ));
18401 // _u <> target
18402 let where_cond = Expression::Neq(Box::new(BinaryOp {
18403 left: u_col.clone(),
18404 right: target_val,
18405 left_comments: Vec::new(),
18406 operator_comments: Vec::new(),
18407 trailing_comments: Vec::new(),
18408 inferred_type: None,
18409 }));
18410 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
18411 let subquery = Expression::Select(Box::new(
18412 crate::expressions::Select::new()
18413 .column(u_col)
18414 .from(aliased_unnest)
18415 .where_(where_cond),
18416 ));
18417 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
18418 Ok(Expression::ArrayFunc(Box::new(
18419 crate::expressions::ArrayConstructor {
18420 expressions: vec![subquery],
18421 bracket_notation: false,
18422 use_list_keyword: false,
18423 },
18424 )))
18425 }
18426 _ => Ok(Expression::Function(f)),
18427 }
18428 }
18429 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
18430 "PARSE_JSON" if f.args.len() == 1 => {
18431 match target {
18432 DialectType::SQLite
18433 | DialectType::Doris
18434 | DialectType::MySQL
18435 | DialectType::StarRocks => {
18436 // Strip PARSE_JSON, return the inner argument
18437 Ok(f.args.into_iter().next().unwrap())
18438 }
18439 _ => Ok(Expression::Function(f)),
18440 }
18441 }
18442 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
18443 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
18444 "JSON_REMOVE" => Ok(Expression::Function(f)),
18445 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
18446 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
18447 "JSON_SET" => Ok(Expression::Function(f)),
18448 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
18449 // Behavior per search value type:
18450 // NULL literal -> CASE WHEN x IS NULL THEN result
18451 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
18452 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
18453 "DECODE" if f.args.len() >= 3 => {
18454 // Keep as DECODE for targets that support it natively
18455 let keep_as_decode = matches!(
18456 target,
18457 DialectType::Oracle
18458 | DialectType::Snowflake
18459 | DialectType::Redshift
18460 | DialectType::Teradata
18461 | DialectType::Spark
18462 | DialectType::Databricks
18463 );
18464 if keep_as_decode {
18465 return Ok(Expression::Function(f));
18466 }
18467
18468 let mut args = f.args;
18469 let this_expr = args.remove(0);
18470 let mut pairs = Vec::new();
18471 let mut default = None;
18472 let mut i = 0;
18473 while i + 1 < args.len() {
18474 pairs.push((args[i].clone(), args[i + 1].clone()));
18475 i += 2;
18476 }
18477 if i < args.len() {
18478 default = Some(args[i].clone());
18479 }
18480 // Helper: check if expression is a literal value
18481 fn is_literal(e: &Expression) -> bool {
18482 matches!(
18483 e,
18484 Expression::Literal(_)
18485 | Expression::Boolean(_)
18486 | Expression::Neg(_)
18487 )
18488 }
18489 let whens: Vec<(Expression, Expression)> = pairs
18490 .into_iter()
18491 .map(|(search, result)| {
18492 if matches!(&search, Expression::Null(_)) {
18493 // NULL search -> IS NULL
18494 let condition = Expression::Is(Box::new(BinaryOp {
18495 left: this_expr.clone(),
18496 right: Expression::Null(crate::expressions::Null),
18497 left_comments: Vec::new(),
18498 operator_comments: Vec::new(),
18499 trailing_comments: Vec::new(),
18500 inferred_type: None,
18501 }));
18502 (condition, result)
18503 } else if is_literal(&search) {
18504 // Literal search -> simple equality
18505 let eq = Expression::Eq(Box::new(BinaryOp {
18506 left: this_expr.clone(),
18507 right: search,
18508 left_comments: Vec::new(),
18509 operator_comments: Vec::new(),
18510 trailing_comments: Vec::new(),
18511 inferred_type: None,
18512 }));
18513 (eq, result)
18514 } else {
18515 // Non-literal (column ref, expression) -> null-safe comparison
18516 let needs_paren = matches!(
18517 &search,
18518 Expression::Eq(_)
18519 | Expression::Neq(_)
18520 | Expression::Gt(_)
18521 | Expression::Gte(_)
18522 | Expression::Lt(_)
18523 | Expression::Lte(_)
18524 );
18525 let search_for_eq = if needs_paren {
18526 Expression::Paren(Box::new(
18527 crate::expressions::Paren {
18528 this: search.clone(),
18529 trailing_comments: Vec::new(),
18530 },
18531 ))
18532 } else {
18533 search.clone()
18534 };
18535 let eq = Expression::Eq(Box::new(BinaryOp {
18536 left: this_expr.clone(),
18537 right: search_for_eq,
18538 left_comments: Vec::new(),
18539 operator_comments: Vec::new(),
18540 trailing_comments: Vec::new(),
18541 inferred_type: None,
18542 }));
18543 let search_for_null = if needs_paren {
18544 Expression::Paren(Box::new(
18545 crate::expressions::Paren {
18546 this: search.clone(),
18547 trailing_comments: Vec::new(),
18548 },
18549 ))
18550 } else {
18551 search.clone()
18552 };
18553 let x_is_null = Expression::Is(Box::new(BinaryOp {
18554 left: this_expr.clone(),
18555 right: Expression::Null(crate::expressions::Null),
18556 left_comments: Vec::new(),
18557 operator_comments: Vec::new(),
18558 trailing_comments: Vec::new(),
18559 inferred_type: None,
18560 }));
18561 let s_is_null = Expression::Is(Box::new(BinaryOp {
18562 left: search_for_null,
18563 right: Expression::Null(crate::expressions::Null),
18564 left_comments: Vec::new(),
18565 operator_comments: Vec::new(),
18566 trailing_comments: Vec::new(),
18567 inferred_type: None,
18568 }));
18569 let both_null = Expression::And(Box::new(BinaryOp {
18570 left: x_is_null,
18571 right: s_is_null,
18572 left_comments: Vec::new(),
18573 operator_comments: Vec::new(),
18574 trailing_comments: Vec::new(),
18575 inferred_type: None,
18576 }));
18577 let condition = Expression::Or(Box::new(BinaryOp {
18578 left: eq,
18579 right: Expression::Paren(Box::new(
18580 crate::expressions::Paren {
18581 this: both_null,
18582 trailing_comments: Vec::new(),
18583 },
18584 )),
18585 left_comments: Vec::new(),
18586 operator_comments: Vec::new(),
18587 trailing_comments: Vec::new(),
18588 inferred_type: None,
18589 }));
18590 (condition, result)
18591 }
18592 })
18593 .collect();
18594 Ok(Expression::Case(Box::new(Case {
18595 operand: None,
18596 whens,
18597 else_: default,
18598 comments: Vec::new(),
18599 inferred_type: None,
18600 })))
18601 }
18602 // LEVENSHTEIN(a, b, ...) -> dialect-specific
18603 "LEVENSHTEIN" => {
18604 match target {
18605 DialectType::BigQuery => {
18606 let mut new_f = *f;
18607 new_f.name = "EDIT_DISTANCE".to_string();
18608 Ok(Expression::Function(Box::new(new_f)))
18609 }
18610 DialectType::Drill => {
18611 let mut new_f = *f;
18612 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
18613 Ok(Expression::Function(Box::new(new_f)))
18614 }
18615 DialectType::PostgreSQL if f.args.len() == 6 => {
18616 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
18617 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
18618 let mut new_f = *f;
18619 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
18620 Ok(Expression::Function(Box::new(new_f)))
18621 }
18622 _ => Ok(Expression::Function(f)),
18623 }
18624 }
18625 // ARRAY_MAX(x) -> arrayMax(x) for ClickHouse, LIST_MAX(x) for DuckDB
18626 "ARRAY_MAX" => {
18627 let name = match target {
18628 DialectType::ClickHouse => "arrayMax",
18629 DialectType::DuckDB => "LIST_MAX",
18630 _ => "ARRAY_MAX",
18631 };
18632 let mut new_f = *f;
18633 new_f.name = name.to_string();
18634 Ok(Expression::Function(Box::new(new_f)))
18635 }
18636 // ARRAY_MIN(x) -> arrayMin(x) for ClickHouse, LIST_MIN(x) for DuckDB
18637 "ARRAY_MIN" => {
18638 let name = match target {
18639 DialectType::ClickHouse => "arrayMin",
18640 DialectType::DuckDB => "LIST_MIN",
18641 _ => "ARRAY_MIN",
18642 };
18643 let mut new_f = *f;
18644 new_f.name = name.to_string();
18645 Ok(Expression::Function(Box::new(new_f)))
18646 }
18647 // JAROWINKLER_SIMILARITY(a, b) -> jaroWinklerSimilarity(UPPER(a), UPPER(b)) for ClickHouse
18648 // -> JARO_WINKLER_SIMILARITY(UPPER(a), UPPER(b)) for DuckDB
18649 "JAROWINKLER_SIMILARITY" if f.args.len() == 2 => {
18650 let mut args = f.args;
18651 let b = args.pop().unwrap();
18652 let a = args.pop().unwrap();
18653 match target {
18654 DialectType::ClickHouse => {
18655 let upper_a = Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(a)));
18656 let upper_b = Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(b)));
18657 Ok(Expression::Function(Box::new(Function::new(
18658 "jaroWinklerSimilarity".to_string(),
18659 vec![upper_a, upper_b],
18660 ))))
18661 }
18662 DialectType::DuckDB => {
18663 let upper_a = Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(a)));
18664 let upper_b = Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(b)));
18665 Ok(Expression::Function(Box::new(Function::new(
18666 "JARO_WINKLER_SIMILARITY".to_string(),
18667 vec![upper_a, upper_b],
18668 ))))
18669 }
18670 _ => {
18671 Ok(Expression::Function(Box::new(Function::new(
18672 "JAROWINKLER_SIMILARITY".to_string(),
18673 vec![a, b],
18674 ))))
18675 }
18676 }
18677 }
18678 // CURRENT_SCHEMAS(x) -> CURRENT_SCHEMAS() for Snowflake (drop arg)
18679 "CURRENT_SCHEMAS" => match target {
18680 DialectType::Snowflake => {
18681 Ok(Expression::Function(Box::new(Function::new(
18682 "CURRENT_SCHEMAS".to_string(),
18683 vec![],
18684 ))))
18685 }
18686 _ => Ok(Expression::Function(f)),
18687 },
18688 // TRUNC/TRUNCATE (numeric) -> dialect-specific
18689 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 => {
18690 match target {
18691 DialectType::TSQL | DialectType::Fabric => {
18692 // ROUND(x, decimals, 1) - the 1 flag means truncation
18693 let mut args = f.args;
18694 let this = if args.is_empty() {
18695 return Ok(Expression::Function(Box::new(Function::new(
18696 "TRUNC".to_string(), args,
18697 ))));
18698 } else {
18699 args.remove(0)
18700 };
18701 let decimals = if args.is_empty() {
18702 Expression::Literal(Literal::Number("0".to_string()))
18703 } else {
18704 args.remove(0)
18705 };
18706 Ok(Expression::Function(Box::new(Function::new(
18707 "ROUND".to_string(),
18708 vec![this, decimals, Expression::Literal(Literal::Number("1".to_string()))],
18709 ))))
18710 }
18711 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18712 // TRUNCATE(x, decimals)
18713 let mut new_f = *f;
18714 new_f.name = "TRUNCATE".to_string();
18715 Ok(Expression::Function(Box::new(new_f)))
18716 }
18717 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
18718 // TRUNCATE(x, decimals)
18719 let mut new_f = *f;
18720 new_f.name = "TRUNCATE".to_string();
18721 Ok(Expression::Function(Box::new(new_f)))
18722 }
18723 DialectType::DuckDB => {
18724 // TRUNC(x) - drop decimals
18725 let this = f.args.into_iter().next().unwrap_or(
18726 Expression::Literal(Literal::Number("0".to_string()))
18727 );
18728 Ok(Expression::Function(Box::new(Function::new(
18729 "TRUNC".to_string(),
18730 vec![this],
18731 ))))
18732 }
18733 DialectType::ClickHouse => {
18734 // trunc(x, decimals) - lowercase
18735 let mut new_f = *f;
18736 new_f.name = "trunc".to_string();
18737 Ok(Expression::Function(Box::new(new_f)))
18738 }
18739 DialectType::Spark | DialectType::Databricks => {
18740 // Spark: TRUNC is date-only; numeric TRUNC → CAST(x AS BIGINT)
18741 let this = f.args.into_iter().next().unwrap_or(
18742 Expression::Literal(Literal::Number("0".to_string()))
18743 );
18744 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
18745 this,
18746 to: crate::expressions::DataType::BigInt { length: None },
18747 double_colon_syntax: false,
18748 trailing_comments: Vec::new(),
18749 format: None,
18750 default: None,
18751 inferred_type: None,
18752 })))
18753 }
18754 _ => {
18755 // TRUNC(x, decimals) for PostgreSQL, Oracle, Snowflake, etc.
18756 let mut new_f = *f;
18757 new_f.name = "TRUNC".to_string();
18758 Ok(Expression::Function(Box::new(new_f)))
18759 }
18760 }
18761 }
18762 // CURRENT_VERSION() -> VERSION() for most dialects
18763 "CURRENT_VERSION" => match target {
18764 DialectType::Snowflake
18765 | DialectType::Databricks
18766 | DialectType::StarRocks => {
18767 Ok(Expression::Function(f))
18768 }
18769 DialectType::SQLite => {
18770 let mut new_f = *f;
18771 new_f.name = "SQLITE_VERSION".to_string();
18772 Ok(Expression::Function(Box::new(new_f)))
18773 }
18774 _ => {
18775 let mut new_f = *f;
18776 new_f.name = "VERSION".to_string();
18777 Ok(Expression::Function(Box::new(new_f)))
18778 }
18779 },
18780 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
18781 "ARRAY_REVERSE" => match target {
18782 DialectType::ClickHouse => {
18783 let mut new_f = *f;
18784 new_f.name = "arrayReverse".to_string();
18785 Ok(Expression::Function(Box::new(new_f)))
18786 }
18787 _ => Ok(Expression::Function(f)),
18788 },
18789 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
18790 "GENERATE_DATE_ARRAY" => {
18791 let mut args = f.args;
18792 if matches!(target, DialectType::BigQuery) {
18793 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
18794 if args.len() == 2 {
18795 let default_interval = Expression::Interval(Box::new(
18796 crate::expressions::Interval {
18797 this: Some(Expression::Literal(Literal::String(
18798 "1".to_string(),
18799 ))),
18800 unit: Some(
18801 crate::expressions::IntervalUnitSpec::Simple {
18802 unit: crate::expressions::IntervalUnit::Day,
18803 use_plural: false,
18804 },
18805 ),
18806 },
18807 ));
18808 args.push(default_interval);
18809 }
18810 Ok(Expression::Function(Box::new(Function::new(
18811 "GENERATE_DATE_ARRAY".to_string(),
18812 args,
18813 ))))
18814 } else if matches!(target, DialectType::DuckDB) {
18815 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
18816 let start = args.get(0).cloned();
18817 let end = args.get(1).cloned();
18818 let step = args.get(2).cloned().or_else(|| {
18819 Some(Expression::Interval(Box::new(
18820 crate::expressions::Interval {
18821 this: Some(Expression::Literal(Literal::String(
18822 "1".to_string(),
18823 ))),
18824 unit: Some(
18825 crate::expressions::IntervalUnitSpec::Simple {
18826 unit: crate::expressions::IntervalUnit::Day,
18827 use_plural: false,
18828 },
18829 ),
18830 },
18831 )))
18832 });
18833 let gen_series = Expression::GenerateSeries(Box::new(
18834 crate::expressions::GenerateSeries {
18835 start: start.map(Box::new),
18836 end: end.map(Box::new),
18837 step: step.map(Box::new),
18838 is_end_exclusive: None,
18839 },
18840 ));
18841 Ok(Expression::Cast(Box::new(Cast {
18842 this: gen_series,
18843 to: DataType::Array {
18844 element_type: Box::new(DataType::Date),
18845 dimension: None,
18846 },
18847 trailing_comments: vec![],
18848 double_colon_syntax: false,
18849 format: None,
18850 default: None,
18851 inferred_type: None,
18852 })))
18853 } else if matches!(
18854 target,
18855 DialectType::Presto | DialectType::Trino | DialectType::Athena
18856 ) {
18857 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
18858 let start = args.get(0).cloned();
18859 let end = args.get(1).cloned();
18860 let step = args.get(2).cloned().or_else(|| {
18861 Some(Expression::Interval(Box::new(
18862 crate::expressions::Interval {
18863 this: Some(Expression::Literal(Literal::String(
18864 "1".to_string(),
18865 ))),
18866 unit: Some(
18867 crate::expressions::IntervalUnitSpec::Simple {
18868 unit: crate::expressions::IntervalUnit::Day,
18869 use_plural: false,
18870 },
18871 ),
18872 },
18873 )))
18874 });
18875 let gen_series = Expression::GenerateSeries(Box::new(
18876 crate::expressions::GenerateSeries {
18877 start: start.map(Box::new),
18878 end: end.map(Box::new),
18879 step: step.map(Box::new),
18880 is_end_exclusive: None,
18881 },
18882 ));
18883 Ok(gen_series)
18884 } else if matches!(
18885 target,
18886 DialectType::Spark | DialectType::Databricks
18887 ) {
18888 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
18889 let start = args.get(0).cloned();
18890 let end = args.get(1).cloned();
18891 let step = args.get(2).cloned().or_else(|| {
18892 Some(Expression::Interval(Box::new(
18893 crate::expressions::Interval {
18894 this: Some(Expression::Literal(Literal::String(
18895 "1".to_string(),
18896 ))),
18897 unit: Some(
18898 crate::expressions::IntervalUnitSpec::Simple {
18899 unit: crate::expressions::IntervalUnit::Day,
18900 use_plural: false,
18901 },
18902 ),
18903 },
18904 )))
18905 });
18906 let gen_series = Expression::GenerateSeries(Box::new(
18907 crate::expressions::GenerateSeries {
18908 start: start.map(Box::new),
18909 end: end.map(Box::new),
18910 step: step.map(Box::new),
18911 is_end_exclusive: None,
18912 },
18913 ));
18914 Ok(gen_series)
18915 } else if matches!(target, DialectType::Snowflake) {
18916 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
18917 if args.len() == 2 {
18918 let default_interval = Expression::Interval(Box::new(
18919 crate::expressions::Interval {
18920 this: Some(Expression::Literal(Literal::String(
18921 "1".to_string(),
18922 ))),
18923 unit: Some(
18924 crate::expressions::IntervalUnitSpec::Simple {
18925 unit: crate::expressions::IntervalUnit::Day,
18926 use_plural: false,
18927 },
18928 ),
18929 },
18930 ));
18931 args.push(default_interval);
18932 }
18933 Ok(Expression::Function(Box::new(Function::new(
18934 "GENERATE_DATE_ARRAY".to_string(),
18935 args,
18936 ))))
18937 } else if matches!(
18938 target,
18939 DialectType::MySQL
18940 | DialectType::TSQL
18941 | DialectType::Fabric
18942 | DialectType::Redshift
18943 ) {
18944 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
18945 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
18946 Ok(Expression::Function(Box::new(Function::new(
18947 "GENERATE_DATE_ARRAY".to_string(),
18948 args,
18949 ))))
18950 } else {
18951 // PostgreSQL/others: convert to GenerateSeries
18952 let start = args.get(0).cloned();
18953 let end = args.get(1).cloned();
18954 let step = args.get(2).cloned().or_else(|| {
18955 Some(Expression::Interval(Box::new(
18956 crate::expressions::Interval {
18957 this: Some(Expression::Literal(Literal::String(
18958 "1".to_string(),
18959 ))),
18960 unit: Some(
18961 crate::expressions::IntervalUnitSpec::Simple {
18962 unit: crate::expressions::IntervalUnit::Day,
18963 use_plural: false,
18964 },
18965 ),
18966 },
18967 )))
18968 });
18969 Ok(Expression::GenerateSeries(Box::new(
18970 crate::expressions::GenerateSeries {
18971 start: start.map(Box::new),
18972 end: end.map(Box::new),
18973 step: step.map(Box::new),
18974 is_end_exclusive: None,
18975 },
18976 )))
18977 }
18978 }
18979 _ => Ok(Expression::Function(f)),
18980 }
18981 } else if let Expression::AggregateFunction(mut af) = e {
18982 let name = af.name.to_ascii_uppercase();
18983 match name.as_str() {
18984 "ARBITRARY" if af.args.len() == 1 => {
18985 let arg = af.args.into_iter().next().unwrap();
18986 Ok(convert_arbitrary(arg, target))
18987 }
18988 "JSON_ARRAYAGG" => {
18989 match target {
18990 DialectType::PostgreSQL => {
18991 af.name = "JSON_AGG".to_string();
18992 // Add NULLS FIRST to ORDER BY items for PostgreSQL
18993 for ordered in af.order_by.iter_mut() {
18994 if ordered.nulls_first.is_none() {
18995 ordered.nulls_first = Some(true);
18996 }
18997 }
18998 Ok(Expression::AggregateFunction(af))
18999 }
19000 _ => Ok(Expression::AggregateFunction(af)),
19001 }
19002 }
19003 _ => Ok(Expression::AggregateFunction(af)),
19004 }
19005 } else if let Expression::JSONArrayAgg(ja) = e {
19006 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
19007 match target {
19008 DialectType::PostgreSQL => {
19009 let mut order_by = Vec::new();
19010 if let Some(order_expr) = ja.order {
19011 if let Expression::OrderBy(ob) = *order_expr {
19012 for mut ordered in ob.expressions {
19013 if ordered.nulls_first.is_none() {
19014 ordered.nulls_first = Some(true);
19015 }
19016 order_by.push(ordered);
19017 }
19018 }
19019 }
19020 Ok(Expression::AggregateFunction(Box::new(
19021 crate::expressions::AggregateFunction {
19022 name: "JSON_AGG".to_string(),
19023 args: vec![*ja.this],
19024 distinct: false,
19025 filter: None,
19026 order_by,
19027 limit: None,
19028 ignore_nulls: None,
19029 inferred_type: None,
19030 },
19031 )))
19032 }
19033 _ => Ok(Expression::JSONArrayAgg(ja)),
19034 }
19035 } else if let Expression::ToNumber(tn) = e {
19036 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
19037 let arg = *tn.this;
19038 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
19039 this: arg,
19040 to: crate::expressions::DataType::Double {
19041 precision: None,
19042 scale: None,
19043 },
19044 double_colon_syntax: false,
19045 trailing_comments: Vec::new(),
19046 format: None,
19047 default: None,
19048 inferred_type: None,
19049 })))
19050 } else {
19051 Ok(e)
19052 }
19053 }
19054
19055 Action::RegexpLikeToDuckDB => {
19056 if let Expression::RegexpLike(f) = e {
19057 let mut args = vec![f.this, f.pattern];
19058 if let Some(flags) = f.flags {
19059 args.push(flags);
19060 }
19061 Ok(Expression::Function(Box::new(Function::new(
19062 "REGEXP_MATCHES".to_string(),
19063 args,
19064 ))))
19065 } else {
19066 Ok(e)
19067 }
19068 }
19069 Action::EpochConvert => {
19070 if let Expression::Epoch(f) = e {
19071 let arg = f.this;
19072 let name = match target {
19073 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
19074 "UNIX_TIMESTAMP"
19075 }
19076 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
19077 DialectType::BigQuery => "TIME_TO_UNIX",
19078 _ => "EPOCH",
19079 };
19080 Ok(Expression::Function(Box::new(Function::new(
19081 name.to_string(),
19082 vec![arg],
19083 ))))
19084 } else {
19085 Ok(e)
19086 }
19087 }
19088 Action::EpochMsConvert => {
19089 use crate::expressions::{BinaryOp, Cast};
19090 if let Expression::EpochMs(f) = e {
19091 let arg = f.this;
19092 match target {
19093 DialectType::Spark | DialectType::Databricks => {
19094 Ok(Expression::Function(Box::new(Function::new(
19095 "TIMESTAMP_MILLIS".to_string(),
19096 vec![arg],
19097 ))))
19098 }
19099 DialectType::BigQuery => Ok(Expression::Function(Box::new(
19100 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
19101 ))),
19102 DialectType::Presto | DialectType::Trino => {
19103 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
19104 let cast_arg = Expression::Cast(Box::new(Cast {
19105 this: arg,
19106 to: DataType::Double {
19107 precision: None,
19108 scale: None,
19109 },
19110 trailing_comments: Vec::new(),
19111 double_colon_syntax: false,
19112 format: None,
19113 default: None,
19114 inferred_type: None,
19115 }));
19116 let div = Expression::Div(Box::new(BinaryOp::new(
19117 cast_arg,
19118 Expression::Function(Box::new(Function::new(
19119 "POW".to_string(),
19120 vec![Expression::number(10), Expression::number(3)],
19121 ))),
19122 )));
19123 Ok(Expression::Function(Box::new(Function::new(
19124 "FROM_UNIXTIME".to_string(),
19125 vec![div],
19126 ))))
19127 }
19128 DialectType::MySQL => {
19129 // FROM_UNIXTIME(x / POWER(10, 3))
19130 let div = Expression::Div(Box::new(BinaryOp::new(
19131 arg,
19132 Expression::Function(Box::new(Function::new(
19133 "POWER".to_string(),
19134 vec![Expression::number(10), Expression::number(3)],
19135 ))),
19136 )));
19137 Ok(Expression::Function(Box::new(Function::new(
19138 "FROM_UNIXTIME".to_string(),
19139 vec![div],
19140 ))))
19141 }
19142 DialectType::PostgreSQL | DialectType::Redshift => {
19143 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
19144 let cast_arg = Expression::Cast(Box::new(Cast {
19145 this: arg,
19146 to: DataType::Custom {
19147 name: "DOUBLE PRECISION".to_string(),
19148 },
19149 trailing_comments: Vec::new(),
19150 double_colon_syntax: false,
19151 format: None,
19152 default: None,
19153 inferred_type: None,
19154 }));
19155 let div = Expression::Div(Box::new(BinaryOp::new(
19156 cast_arg,
19157 Expression::Function(Box::new(Function::new(
19158 "POWER".to_string(),
19159 vec![Expression::number(10), Expression::number(3)],
19160 ))),
19161 )));
19162 Ok(Expression::Function(Box::new(Function::new(
19163 "TO_TIMESTAMP".to_string(),
19164 vec![div],
19165 ))))
19166 }
19167 DialectType::ClickHouse => {
19168 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
19169 let cast_arg = Expression::Cast(Box::new(Cast {
19170 this: arg,
19171 to: DataType::Nullable {
19172 inner: Box::new(DataType::BigInt { length: None }),
19173 },
19174 trailing_comments: Vec::new(),
19175 double_colon_syntax: false,
19176 format: None,
19177 default: None,
19178 inferred_type: None,
19179 }));
19180 Ok(Expression::Function(Box::new(Function::new(
19181 "fromUnixTimestamp64Milli".to_string(),
19182 vec![cast_arg],
19183 ))))
19184 }
19185 _ => Ok(Expression::Function(Box::new(Function::new(
19186 "EPOCH_MS".to_string(),
19187 vec![arg],
19188 )))),
19189 }
19190 } else {
19191 Ok(e)
19192 }
19193 }
19194 Action::TSQLTypeNormalize => {
19195 if let Expression::DataType(dt) = e {
19196 let new_dt = match &dt {
19197 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
19198 DataType::Decimal {
19199 precision: Some(15),
19200 scale: Some(4),
19201 }
19202 }
19203 DataType::Custom { name }
19204 if name.eq_ignore_ascii_case("SMALLMONEY") =>
19205 {
19206 DataType::Decimal {
19207 precision: Some(6),
19208 scale: Some(4),
19209 }
19210 }
19211 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
19212 DataType::Timestamp {
19213 timezone: false,
19214 precision: None,
19215 }
19216 }
19217 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
19218 DataType::Float {
19219 precision: None,
19220 scale: None,
19221 real_spelling: false,
19222 }
19223 }
19224 DataType::Float {
19225 real_spelling: true,
19226 ..
19227 } => DataType::Float {
19228 precision: None,
19229 scale: None,
19230 real_spelling: false,
19231 },
19232 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
19233 DataType::Custom {
19234 name: "BLOB".to_string(),
19235 }
19236 }
19237 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
19238 DataType::Boolean
19239 }
19240 DataType::Custom { name }
19241 if name.eq_ignore_ascii_case("ROWVERSION") =>
19242 {
19243 DataType::Custom {
19244 name: "BINARY".to_string(),
19245 }
19246 }
19247 DataType::Custom { name }
19248 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
19249 {
19250 match target {
19251 DialectType::Spark
19252 | DialectType::Databricks
19253 | DialectType::Hive => DataType::Custom {
19254 name: "STRING".to_string(),
19255 },
19256 _ => DataType::VarChar {
19257 length: Some(36),
19258 parenthesized_length: true,
19259 },
19260 }
19261 }
19262 DataType::Custom { name }
19263 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
19264 {
19265 match target {
19266 DialectType::Spark
19267 | DialectType::Databricks
19268 | DialectType::Hive => DataType::Timestamp {
19269 timezone: false,
19270 precision: None,
19271 },
19272 _ => DataType::Timestamp {
19273 timezone: true,
19274 precision: None,
19275 },
19276 }
19277 }
19278 DataType::Custom { ref name }
19279 if name.len() >= 10 && name[..10].eq_ignore_ascii_case("DATETIME2(") =>
19280 {
19281 // DATETIME2(n) -> TIMESTAMP
19282 DataType::Timestamp {
19283 timezone: false,
19284 precision: None,
19285 }
19286 }
19287 DataType::Custom { ref name }
19288 if name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME(") =>
19289 {
19290 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
19291 match target {
19292 DialectType::Spark
19293 | DialectType::Databricks
19294 | DialectType::Hive => DataType::Timestamp {
19295 timezone: false,
19296 precision: None,
19297 },
19298 _ => return Ok(Expression::DataType(dt)),
19299 }
19300 }
19301 DataType::Custom { ref name }
19302 if name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC") =>
19303 {
19304 // Parse NUMERIC(p,s) back to Decimal(p,s)
19305 let upper = name.to_ascii_uppercase();
19306 if let Some(inner) = upper
19307 .strip_prefix("NUMERIC(")
19308 .and_then(|s| s.strip_suffix(')'))
19309 {
19310 let parts: Vec<&str> = inner.split(',').collect();
19311 let precision =
19312 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
19313 let scale =
19314 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
19315 DataType::Decimal { precision, scale }
19316 } else if upper == "NUMERIC" {
19317 DataType::Decimal {
19318 precision: None,
19319 scale: None,
19320 }
19321 } else {
19322 return Ok(Expression::DataType(dt));
19323 }
19324 }
19325 DataType::Float {
19326 precision: Some(p), ..
19327 } => {
19328 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
19329 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
19330 let boundary = match target {
19331 DialectType::Hive
19332 | DialectType::Spark
19333 | DialectType::Databricks => 32,
19334 _ => 24,
19335 };
19336 if *p <= boundary {
19337 DataType::Float {
19338 precision: None,
19339 scale: None,
19340 real_spelling: false,
19341 }
19342 } else {
19343 DataType::Double {
19344 precision: None,
19345 scale: None,
19346 }
19347 }
19348 }
19349 DataType::TinyInt { .. } => match target {
19350 DialectType::DuckDB => DataType::Custom {
19351 name: "UTINYINT".to_string(),
19352 },
19353 DialectType::Hive
19354 | DialectType::Spark
19355 | DialectType::Databricks => DataType::SmallInt { length: None },
19356 _ => return Ok(Expression::DataType(dt)),
19357 },
19358 // INTEGER -> INT for Spark/Databricks
19359 DataType::Int {
19360 length,
19361 integer_spelling: true,
19362 } => DataType::Int {
19363 length: *length,
19364 integer_spelling: false,
19365 },
19366 _ => return Ok(Expression::DataType(dt)),
19367 };
19368 Ok(Expression::DataType(new_dt))
19369 } else {
19370 Ok(e)
19371 }
19372 }
19373 Action::MySQLSafeDivide => {
19374 use crate::expressions::{BinaryOp, Cast};
19375 if let Expression::Div(op) = e {
19376 let left = op.left;
19377 let right = op.right;
19378 // For SQLite: CAST left as REAL but NO NULLIF wrapping
19379 if matches!(target, DialectType::SQLite) {
19380 let new_left = Expression::Cast(Box::new(Cast {
19381 this: left,
19382 to: DataType::Float {
19383 precision: None,
19384 scale: None,
19385 real_spelling: true,
19386 },
19387 trailing_comments: Vec::new(),
19388 double_colon_syntax: false,
19389 format: None,
19390 default: None,
19391 inferred_type: None,
19392 }));
19393 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
19394 }
19395 // Wrap right in NULLIF(right, 0)
19396 let nullif_right = Expression::Function(Box::new(Function::new(
19397 "NULLIF".to_string(),
19398 vec![right, Expression::number(0)],
19399 )));
19400 // For some dialects, also CAST the left side
19401 let new_left = match target {
19402 DialectType::PostgreSQL
19403 | DialectType::Redshift
19404 | DialectType::Teradata
19405 | DialectType::Materialize
19406 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
19407 this: left,
19408 to: DataType::Custom {
19409 name: "DOUBLE PRECISION".to_string(),
19410 },
19411 trailing_comments: Vec::new(),
19412 double_colon_syntax: false,
19413 format: None,
19414 default: None,
19415 inferred_type: None,
19416 })),
19417 DialectType::Drill
19418 | DialectType::Trino
19419 | DialectType::Presto
19420 | DialectType::Athena => Expression::Cast(Box::new(Cast {
19421 this: left,
19422 to: DataType::Double {
19423 precision: None,
19424 scale: None,
19425 },
19426 trailing_comments: Vec::new(),
19427 double_colon_syntax: false,
19428 format: None,
19429 default: None,
19430 inferred_type: None,
19431 })),
19432 DialectType::TSQL => Expression::Cast(Box::new(Cast {
19433 this: left,
19434 to: DataType::Float {
19435 precision: None,
19436 scale: None,
19437 real_spelling: false,
19438 },
19439 trailing_comments: Vec::new(),
19440 double_colon_syntax: false,
19441 format: None,
19442 default: None,
19443 inferred_type: None,
19444 })),
19445 _ => left,
19446 };
19447 Ok(Expression::Div(Box::new(BinaryOp::new(
19448 new_left,
19449 nullif_right,
19450 ))))
19451 } else {
19452 Ok(e)
19453 }
19454 }
19455 Action::AlterTableRenameStripSchema => {
19456 if let Expression::AlterTable(mut at) = e {
19457 if let Some(crate::expressions::AlterTableAction::RenameTable(
19458 ref mut new_tbl,
19459 )) = at.actions.first_mut()
19460 {
19461 new_tbl.schema = None;
19462 new_tbl.catalog = None;
19463 }
19464 Ok(Expression::AlterTable(at))
19465 } else {
19466 Ok(e)
19467 }
19468 }
19469 Action::NullsOrdering => {
19470 // Fill in the source dialect's implied null ordering default.
19471 // This makes implicit null ordering explicit so the target generator
19472 // can correctly strip or keep it.
19473 //
19474 // Dialect null ordering categories:
19475 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
19476 // ASC -> NULLS LAST, DESC -> NULLS FIRST
19477 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
19478 // ASC -> NULLS FIRST, DESC -> NULLS LAST
19479 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
19480 // NULLS LAST always (both ASC and DESC)
19481 if let Expression::Ordered(mut o) = e {
19482 let is_asc = !o.desc;
19483
19484 let is_source_nulls_large = matches!(
19485 source,
19486 DialectType::Oracle
19487 | DialectType::PostgreSQL
19488 | DialectType::Redshift
19489 | DialectType::Snowflake
19490 );
19491 let is_source_nulls_last = matches!(
19492 source,
19493 DialectType::DuckDB
19494 | DialectType::Presto
19495 | DialectType::Trino
19496 | DialectType::Dremio
19497 | DialectType::Athena
19498 | DialectType::ClickHouse
19499 | DialectType::Drill
19500 | DialectType::Exasol
19501 | DialectType::DataFusion
19502 );
19503
19504 // Determine target category to check if default matches
19505 let is_target_nulls_large = matches!(
19506 target,
19507 DialectType::Oracle
19508 | DialectType::PostgreSQL
19509 | DialectType::Redshift
19510 | DialectType::Snowflake
19511 );
19512 let is_target_nulls_last = matches!(
19513 target,
19514 DialectType::DuckDB
19515 | DialectType::Presto
19516 | DialectType::Trino
19517 | DialectType::Dremio
19518 | DialectType::Athena
19519 | DialectType::ClickHouse
19520 | DialectType::Drill
19521 | DialectType::Exasol
19522 | DialectType::DataFusion
19523 );
19524
19525 // Compute the implied nulls_first for source
19526 let source_nulls_first = if is_source_nulls_large {
19527 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
19528 } else if is_source_nulls_last {
19529 false // NULLS LAST always
19530 } else {
19531 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
19532 };
19533
19534 // Compute the target's default
19535 let target_nulls_first = if is_target_nulls_large {
19536 !is_asc
19537 } else if is_target_nulls_last {
19538 false
19539 } else {
19540 is_asc
19541 };
19542
19543 // Only add explicit nulls ordering if source and target defaults differ
19544 if source_nulls_first != target_nulls_first {
19545 o.nulls_first = Some(source_nulls_first);
19546 }
19547 // If they match, leave nulls_first as None so the generator won't output it
19548
19549 Ok(Expression::Ordered(o))
19550 } else {
19551 Ok(e)
19552 }
19553 }
19554 Action::StringAggConvert => {
19555 match e {
19556 Expression::WithinGroup(wg) => {
19557 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
19558 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
19559 let (x_opt, sep_opt, distinct) = match wg.this {
19560 Expression::AggregateFunction(ref af)
19561 if af.name.eq_ignore_ascii_case("STRING_AGG")
19562 && af.args.len() >= 2 =>
19563 {
19564 (
19565 Some(af.args[0].clone()),
19566 Some(af.args[1].clone()),
19567 af.distinct,
19568 )
19569 }
19570 Expression::Function(ref f)
19571 if f.name.eq_ignore_ascii_case("STRING_AGG")
19572 && f.args.len() >= 2 =>
19573 {
19574 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
19575 }
19576 Expression::StringAgg(ref sa) => {
19577 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
19578 }
19579 _ => (None, None, false),
19580 };
19581 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
19582 let order_by = wg.order_by;
19583
19584 match target {
19585 DialectType::TSQL | DialectType::Fabric => {
19586 // Keep as WithinGroup(StringAgg) for TSQL
19587 Ok(Expression::WithinGroup(Box::new(
19588 crate::expressions::WithinGroup {
19589 this: Expression::StringAgg(Box::new(
19590 crate::expressions::StringAggFunc {
19591 this: x,
19592 separator: Some(sep),
19593 order_by: None, // order_by goes in WithinGroup, not StringAgg
19594 distinct,
19595 filter: None,
19596 limit: None,
19597 inferred_type: None,
19598 },
19599 )),
19600 order_by,
19601 },
19602 )))
19603 }
19604 DialectType::MySQL
19605 | DialectType::SingleStore
19606 | DialectType::Doris
19607 | DialectType::StarRocks => {
19608 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
19609 Ok(Expression::GroupConcat(Box::new(
19610 crate::expressions::GroupConcatFunc {
19611 this: x,
19612 separator: Some(sep),
19613 order_by: Some(order_by),
19614 distinct,
19615 filter: None,
19616 inferred_type: None,
19617 },
19618 )))
19619 }
19620 DialectType::SQLite => {
19621 // GROUP_CONCAT(x, sep) - no ORDER BY support
19622 Ok(Expression::GroupConcat(Box::new(
19623 crate::expressions::GroupConcatFunc {
19624 this: x,
19625 separator: Some(sep),
19626 order_by: None,
19627 distinct,
19628 filter: None,
19629 inferred_type: None,
19630 },
19631 )))
19632 }
19633 DialectType::PostgreSQL | DialectType::Redshift => {
19634 // STRING_AGG(x, sep ORDER BY z)
19635 Ok(Expression::StringAgg(Box::new(
19636 crate::expressions::StringAggFunc {
19637 this: x,
19638 separator: Some(sep),
19639 order_by: Some(order_by),
19640 distinct,
19641 filter: None,
19642 limit: None,
19643 inferred_type: None,
19644 },
19645 )))
19646 }
19647 _ => {
19648 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
19649 Ok(Expression::StringAgg(Box::new(
19650 crate::expressions::StringAggFunc {
19651 this: x,
19652 separator: Some(sep),
19653 order_by: Some(order_by),
19654 distinct,
19655 filter: None,
19656 limit: None,
19657 inferred_type: None,
19658 },
19659 )))
19660 }
19661 }
19662 } else {
19663 Ok(Expression::WithinGroup(wg))
19664 }
19665 }
19666 Expression::StringAgg(sa) => {
19667 match target {
19668 DialectType::MySQL
19669 | DialectType::SingleStore
19670 | DialectType::Doris
19671 | DialectType::StarRocks => {
19672 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
19673 Ok(Expression::GroupConcat(Box::new(
19674 crate::expressions::GroupConcatFunc {
19675 this: sa.this,
19676 separator: sa.separator,
19677 order_by: sa.order_by,
19678 distinct: sa.distinct,
19679 filter: sa.filter,
19680 inferred_type: None,
19681 },
19682 )))
19683 }
19684 DialectType::SQLite => {
19685 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
19686 Ok(Expression::GroupConcat(Box::new(
19687 crate::expressions::GroupConcatFunc {
19688 this: sa.this,
19689 separator: sa.separator,
19690 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
19691 distinct: sa.distinct,
19692 filter: sa.filter,
19693 inferred_type: None,
19694 },
19695 )))
19696 }
19697 DialectType::Spark | DialectType::Databricks => {
19698 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
19699 Ok(Expression::ListAgg(Box::new(
19700 crate::expressions::ListAggFunc {
19701 this: sa.this,
19702 separator: sa.separator,
19703 on_overflow: None,
19704 order_by: sa.order_by,
19705 distinct: sa.distinct,
19706 filter: None,
19707 inferred_type: None,
19708 },
19709 )))
19710 }
19711 _ => Ok(Expression::StringAgg(sa)),
19712 }
19713 }
19714 _ => Ok(e),
19715 }
19716 }
19717 Action::GroupConcatConvert => {
19718 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
19719 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
19720 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
19721 if let Expression::Function(ref f) = expr {
19722 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
19723 let mut result = f.args[0].clone();
19724 for arg in &f.args[1..] {
19725 result = Expression::Concat(Box::new(BinaryOp {
19726 left: result,
19727 right: arg.clone(),
19728 left_comments: vec![],
19729 operator_comments: vec![],
19730 trailing_comments: vec![],
19731 inferred_type: None,
19732 }));
19733 }
19734 return result;
19735 }
19736 }
19737 expr
19738 }
19739 fn expand_concat_to_plus(expr: Expression) -> Expression {
19740 if let Expression::Function(ref f) = expr {
19741 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
19742 let mut result = f.args[0].clone();
19743 for arg in &f.args[1..] {
19744 result = Expression::Add(Box::new(BinaryOp {
19745 left: result,
19746 right: arg.clone(),
19747 left_comments: vec![],
19748 operator_comments: vec![],
19749 trailing_comments: vec![],
19750 inferred_type: None,
19751 }));
19752 }
19753 return result;
19754 }
19755 }
19756 expr
19757 }
19758 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
19759 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
19760 if let Expression::Function(ref f) = expr {
19761 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
19762 let new_args: Vec<Expression> = f
19763 .args
19764 .iter()
19765 .map(|arg| {
19766 Expression::Cast(Box::new(crate::expressions::Cast {
19767 this: arg.clone(),
19768 to: crate::expressions::DataType::VarChar {
19769 length: None,
19770 parenthesized_length: false,
19771 },
19772 trailing_comments: Vec::new(),
19773 double_colon_syntax: false,
19774 format: None,
19775 default: None,
19776 inferred_type: None,
19777 }))
19778 })
19779 .collect();
19780 return Expression::Function(Box::new(
19781 crate::expressions::Function::new(
19782 "CONCAT".to_string(),
19783 new_args,
19784 ),
19785 ));
19786 }
19787 }
19788 expr
19789 }
19790 if let Expression::GroupConcat(gc) = e {
19791 match target {
19792 DialectType::Presto => {
19793 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
19794 let sep = gc.separator.unwrap_or(Expression::string(","));
19795 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
19796 let this = wrap_concat_args_in_varchar_cast(gc.this);
19797 let array_agg =
19798 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
19799 this,
19800 distinct: gc.distinct,
19801 filter: gc.filter,
19802 order_by: gc.order_by.unwrap_or_default(),
19803 name: None,
19804 ignore_nulls: None,
19805 having_max: None,
19806 limit: None,
19807 inferred_type: None,
19808 }));
19809 Ok(Expression::ArrayJoin(Box::new(
19810 crate::expressions::ArrayJoinFunc {
19811 this: array_agg,
19812 separator: sep,
19813 null_replacement: None,
19814 },
19815 )))
19816 }
19817 DialectType::Trino => {
19818 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
19819 let sep = gc.separator.unwrap_or(Expression::string(","));
19820 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
19821 let this = wrap_concat_args_in_varchar_cast(gc.this);
19822 Ok(Expression::ListAgg(Box::new(
19823 crate::expressions::ListAggFunc {
19824 this,
19825 separator: Some(sep),
19826 on_overflow: None,
19827 order_by: gc.order_by,
19828 distinct: gc.distinct,
19829 filter: gc.filter,
19830 inferred_type: None,
19831 },
19832 )))
19833 }
19834 DialectType::PostgreSQL
19835 | DialectType::Redshift
19836 | DialectType::Snowflake
19837 | DialectType::DuckDB
19838 | DialectType::Hive
19839 | DialectType::ClickHouse => {
19840 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
19841 let sep = gc.separator.unwrap_or(Expression::string(","));
19842 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
19843 let this = expand_concat_to_dpipe(gc.this);
19844 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
19845 let order_by = if target == DialectType::PostgreSQL {
19846 gc.order_by.map(|ords| {
19847 ords.into_iter()
19848 .map(|mut o| {
19849 if o.nulls_first.is_none() {
19850 if o.desc {
19851 o.nulls_first = Some(false);
19852 // NULLS LAST
19853 } else {
19854 o.nulls_first = Some(true);
19855 // NULLS FIRST
19856 }
19857 }
19858 o
19859 })
19860 .collect()
19861 })
19862 } else {
19863 gc.order_by
19864 };
19865 Ok(Expression::StringAgg(Box::new(
19866 crate::expressions::StringAggFunc {
19867 this,
19868 separator: Some(sep),
19869 order_by,
19870 distinct: gc.distinct,
19871 filter: gc.filter,
19872 limit: None,
19873 inferred_type: None,
19874 },
19875 )))
19876 }
19877 DialectType::TSQL => {
19878 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
19879 // TSQL doesn't support DISTINCT in STRING_AGG
19880 let sep = gc.separator.unwrap_or(Expression::string(","));
19881 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
19882 let this = expand_concat_to_plus(gc.this);
19883 Ok(Expression::StringAgg(Box::new(
19884 crate::expressions::StringAggFunc {
19885 this,
19886 separator: Some(sep),
19887 order_by: gc.order_by,
19888 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
19889 filter: gc.filter,
19890 limit: None,
19891 inferred_type: None,
19892 },
19893 )))
19894 }
19895 DialectType::SQLite => {
19896 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
19897 // SQLite GROUP_CONCAT doesn't support ORDER BY
19898 // Expand CONCAT(a,b,c) -> a || b || c
19899 let this = expand_concat_to_dpipe(gc.this);
19900 Ok(Expression::GroupConcat(Box::new(
19901 crate::expressions::GroupConcatFunc {
19902 this,
19903 separator: gc.separator,
19904 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
19905 distinct: gc.distinct,
19906 filter: gc.filter,
19907 inferred_type: None,
19908 },
19909 )))
19910 }
19911 DialectType::Spark | DialectType::Databricks => {
19912 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
19913 let sep = gc.separator.unwrap_or(Expression::string(","));
19914 Ok(Expression::ListAgg(Box::new(
19915 crate::expressions::ListAggFunc {
19916 this: gc.this,
19917 separator: Some(sep),
19918 on_overflow: None,
19919 order_by: gc.order_by,
19920 distinct: gc.distinct,
19921 filter: None,
19922 inferred_type: None,
19923 },
19924 )))
19925 }
19926 DialectType::MySQL
19927 | DialectType::SingleStore
19928 | DialectType::StarRocks => {
19929 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
19930 if gc.separator.is_none() {
19931 let mut gc = gc;
19932 gc.separator = Some(Expression::string(","));
19933 Ok(Expression::GroupConcat(gc))
19934 } else {
19935 Ok(Expression::GroupConcat(gc))
19936 }
19937 }
19938 _ => Ok(Expression::GroupConcat(gc)),
19939 }
19940 } else {
19941 Ok(e)
19942 }
19943 }
19944 Action::TempTableHash => {
19945 match e {
19946 Expression::CreateTable(mut ct) => {
19947 // TSQL #table -> TEMPORARY TABLE with # stripped from name
19948 let name = &ct.name.name.name;
19949 if name.starts_with('#') {
19950 ct.name.name.name = name.trim_start_matches('#').to_string();
19951 }
19952 // Set temporary flag
19953 ct.temporary = true;
19954 Ok(Expression::CreateTable(ct))
19955 }
19956 Expression::Table(mut tr) => {
19957 // Strip # from table references
19958 let name = &tr.name.name;
19959 if name.starts_with('#') {
19960 tr.name.name = name.trim_start_matches('#').to_string();
19961 }
19962 Ok(Expression::Table(tr))
19963 }
19964 Expression::DropTable(mut dt) => {
19965 // Strip # from DROP TABLE names
19966 for table_ref in &mut dt.names {
19967 if table_ref.name.name.starts_with('#') {
19968 table_ref.name.name =
19969 table_ref.name.name.trim_start_matches('#').to_string();
19970 }
19971 }
19972 Ok(Expression::DropTable(dt))
19973 }
19974 _ => Ok(e),
19975 }
19976 }
19977 Action::NvlClearOriginal => {
19978 if let Expression::Nvl(mut f) = e {
19979 f.original_name = None;
19980 Ok(Expression::Nvl(f))
19981 } else {
19982 Ok(e)
19983 }
19984 }
19985 Action::HiveCastToTryCast => {
19986 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
19987 if let Expression::Cast(mut c) = e {
19988 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
19989 // (Spark's TIMESTAMP is always timezone-aware)
19990 if matches!(target, DialectType::DuckDB)
19991 && matches!(source, DialectType::Spark | DialectType::Databricks)
19992 && matches!(
19993 c.to,
19994 DataType::Timestamp {
19995 timezone: false,
19996 ..
19997 }
19998 )
19999 {
20000 c.to = DataType::Custom {
20001 name: "TIMESTAMPTZ".to_string(),
20002 };
20003 }
20004 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
20005 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
20006 if matches!(target, DialectType::Databricks | DialectType::Spark)
20007 && matches!(
20008 source,
20009 DialectType::Spark | DialectType::Databricks | DialectType::Hive
20010 )
20011 && Self::has_varchar_char_type(&c.to)
20012 {
20013 c.to = Self::normalize_varchar_to_string(c.to);
20014 }
20015 Ok(Expression::TryCast(c))
20016 } else {
20017 Ok(e)
20018 }
20019 }
20020 Action::XorExpand => {
20021 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
20022 // Snowflake: use BOOLXOR(a, b) instead
20023 if let Expression::Xor(xor) = e {
20024 // Collect all XOR operands
20025 let mut operands = Vec::new();
20026 if let Some(this) = xor.this {
20027 operands.push(*this);
20028 }
20029 if let Some(expr) = xor.expression {
20030 operands.push(*expr);
20031 }
20032 operands.extend(xor.expressions);
20033
20034 // Snowflake: use BOOLXOR(a, b)
20035 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
20036 let a = operands.remove(0);
20037 let b = operands.remove(0);
20038 return Ok(Expression::Function(Box::new(Function::new(
20039 "BOOLXOR".to_string(),
20040 vec![a, b],
20041 ))));
20042 }
20043
20044 // Helper to build (a AND NOT b) OR (NOT a AND b)
20045 let make_xor = |a: Expression, b: Expression| -> Expression {
20046 let not_b = Expression::Not(Box::new(
20047 crate::expressions::UnaryOp::new(b.clone()),
20048 ));
20049 let not_a = Expression::Not(Box::new(
20050 crate::expressions::UnaryOp::new(a.clone()),
20051 ));
20052 let left_and = Expression::And(Box::new(BinaryOp {
20053 left: a,
20054 right: Expression::Paren(Box::new(Paren {
20055 this: not_b,
20056 trailing_comments: Vec::new(),
20057 })),
20058 left_comments: Vec::new(),
20059 operator_comments: Vec::new(),
20060 trailing_comments: Vec::new(),
20061 inferred_type: None,
20062 }));
20063 let right_and = Expression::And(Box::new(BinaryOp {
20064 left: Expression::Paren(Box::new(Paren {
20065 this: not_a,
20066 trailing_comments: Vec::new(),
20067 })),
20068 right: b,
20069 left_comments: Vec::new(),
20070 operator_comments: Vec::new(),
20071 trailing_comments: Vec::new(),
20072 inferred_type: None,
20073 }));
20074 Expression::Or(Box::new(BinaryOp {
20075 left: Expression::Paren(Box::new(Paren {
20076 this: left_and,
20077 trailing_comments: Vec::new(),
20078 })),
20079 right: Expression::Paren(Box::new(Paren {
20080 this: right_and,
20081 trailing_comments: Vec::new(),
20082 })),
20083 left_comments: Vec::new(),
20084 operator_comments: Vec::new(),
20085 trailing_comments: Vec::new(),
20086 inferred_type: None,
20087 }))
20088 };
20089
20090 if operands.len() >= 2 {
20091 let mut result = make_xor(operands.remove(0), operands.remove(0));
20092 for operand in operands {
20093 result = make_xor(result, operand);
20094 }
20095 Ok(result)
20096 } else if operands.len() == 1 {
20097 Ok(operands.remove(0))
20098 } else {
20099 // No operands - return FALSE (shouldn't happen)
20100 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
20101 value: false,
20102 }))
20103 }
20104 } else {
20105 Ok(e)
20106 }
20107 }
20108 Action::DatePartUnquote => {
20109 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
20110 // Convert the quoted string first arg to a bare Column/Identifier
20111 if let Expression::Function(mut f) = e {
20112 if let Some(Expression::Literal(crate::expressions::Literal::String(s))) =
20113 f.args.first()
20114 {
20115 let bare_name = s.to_ascii_lowercase();
20116 f.args[0] = Expression::Column(Box::new(crate::expressions::Column {
20117 name: Identifier::new(bare_name),
20118 table: None,
20119 join_mark: false,
20120 trailing_comments: Vec::new(),
20121 span: None,
20122 inferred_type: None,
20123 }));
20124 }
20125 Ok(Expression::Function(f))
20126 } else {
20127 Ok(e)
20128 }
20129 }
20130 Action::ArrayLengthConvert => {
20131 // Extract the argument from the expression
20132 let arg = match e {
20133 Expression::Cardinality(ref f) => f.this.clone(),
20134 Expression::ArrayLength(ref f) => f.this.clone(),
20135 Expression::ArraySize(ref f) => f.this.clone(),
20136 _ => return Ok(e),
20137 };
20138 match target {
20139 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
20140 Ok(Expression::Function(Box::new(Function::new(
20141 "SIZE".to_string(),
20142 vec![arg],
20143 ))))
20144 }
20145 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20146 Ok(Expression::Cardinality(Box::new(
20147 crate::expressions::UnaryFunc::new(arg),
20148 )))
20149 }
20150 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
20151 crate::expressions::UnaryFunc::new(arg),
20152 ))),
20153 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
20154 crate::expressions::UnaryFunc::new(arg),
20155 ))),
20156 DialectType::PostgreSQL | DialectType::Redshift => {
20157 // PostgreSQL ARRAY_LENGTH requires dimension arg
20158 Ok(Expression::Function(Box::new(Function::new(
20159 "ARRAY_LENGTH".to_string(),
20160 vec![arg, Expression::number(1)],
20161 ))))
20162 }
20163 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
20164 crate::expressions::UnaryFunc::new(arg),
20165 ))),
20166 _ => Ok(e), // Keep original
20167 }
20168 }
20169
20170 Action::JsonExtractToArrow => {
20171 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
20172 if let Expression::JsonExtract(mut f) = e {
20173 f.arrow_syntax = true;
20174 // Transform path: convert bracket notation to dot notation
20175 // SQLite strips wildcards, DuckDB preserves them
20176 if let Expression::Literal(Literal::String(ref s)) = f.path {
20177 let mut transformed = s.clone();
20178 if matches!(target, DialectType::SQLite) {
20179 transformed = Self::strip_json_wildcards(&transformed);
20180 }
20181 transformed = Self::bracket_to_dot_notation(&transformed);
20182 if transformed != *s {
20183 f.path = Expression::string(&transformed);
20184 }
20185 }
20186 Ok(Expression::JsonExtract(f))
20187 } else {
20188 Ok(e)
20189 }
20190 }
20191
20192 Action::JsonExtractToGetJsonObject => {
20193 if let Expression::JsonExtract(f) = e {
20194 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
20195 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
20196 // Use proper decomposition that handles brackets
20197 let keys: Vec<Expression> =
20198 if let Expression::Literal(Literal::String(ref s)) = f.path {
20199 let parts = Self::decompose_json_path(s);
20200 parts.into_iter().map(|k| Expression::string(&k)).collect()
20201 } else {
20202 vec![f.path]
20203 };
20204 let func_name = if matches!(target, DialectType::Redshift) {
20205 "JSON_EXTRACT_PATH_TEXT"
20206 } else {
20207 "JSON_EXTRACT_PATH"
20208 };
20209 let mut args = vec![f.this];
20210 args.extend(keys);
20211 Ok(Expression::Function(Box::new(Function::new(
20212 func_name.to_string(),
20213 args,
20214 ))))
20215 } else {
20216 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
20217 // Convert bracket double quotes to single quotes
20218 let path = if let Expression::Literal(Literal::String(ref s)) = f.path {
20219 let normalized = Self::bracket_to_single_quotes(s);
20220 if normalized != *s {
20221 Expression::string(&normalized)
20222 } else {
20223 f.path
20224 }
20225 } else {
20226 f.path
20227 };
20228 Ok(Expression::Function(Box::new(Function::new(
20229 "GET_JSON_OBJECT".to_string(),
20230 vec![f.this, path],
20231 ))))
20232 }
20233 } else {
20234 Ok(e)
20235 }
20236 }
20237
20238 Action::JsonExtractScalarToGetJsonObject => {
20239 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
20240 if let Expression::JsonExtractScalar(f) = e {
20241 Ok(Expression::Function(Box::new(Function::new(
20242 "GET_JSON_OBJECT".to_string(),
20243 vec![f.this, f.path],
20244 ))))
20245 } else {
20246 Ok(e)
20247 }
20248 }
20249
20250 Action::JsonExtractToTsql => {
20251 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
20252 let (this, path) = match e {
20253 Expression::JsonExtract(f) => (f.this, f.path),
20254 Expression::JsonExtractScalar(f) => (f.this, f.path),
20255 _ => return Ok(e),
20256 };
20257 // Transform path: strip wildcards, convert bracket notation to dot notation
20258 let transformed_path = if let Expression::Literal(Literal::String(ref s)) = path
20259 {
20260 let stripped = Self::strip_json_wildcards(s);
20261 let dotted = Self::bracket_to_dot_notation(&stripped);
20262 Expression::string(&dotted)
20263 } else {
20264 path
20265 };
20266 let json_query = Expression::Function(Box::new(Function::new(
20267 "JSON_QUERY".to_string(),
20268 vec![this.clone(), transformed_path.clone()],
20269 )));
20270 let json_value = Expression::Function(Box::new(Function::new(
20271 "JSON_VALUE".to_string(),
20272 vec![this, transformed_path],
20273 )));
20274 Ok(Expression::Function(Box::new(Function::new(
20275 "ISNULL".to_string(),
20276 vec![json_query, json_value],
20277 ))))
20278 }
20279
20280 Action::JsonExtractToClickHouse => {
20281 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
20282 let (this, path) = match e {
20283 Expression::JsonExtract(f) => (f.this, f.path),
20284 Expression::JsonExtractScalar(f) => (f.this, f.path),
20285 _ => return Ok(e),
20286 };
20287 let args: Vec<Expression> =
20288 if let Expression::Literal(Literal::String(ref s)) = path {
20289 let parts = Self::decompose_json_path(s);
20290 let mut result = vec![this];
20291 for part in parts {
20292 // ClickHouse uses 1-based integer indices for array access
20293 if let Ok(idx) = part.parse::<i64>() {
20294 result.push(Expression::number(idx + 1));
20295 } else {
20296 result.push(Expression::string(&part));
20297 }
20298 }
20299 result
20300 } else {
20301 vec![this, path]
20302 };
20303 Ok(Expression::Function(Box::new(Function::new(
20304 "JSONExtractString".to_string(),
20305 args,
20306 ))))
20307 }
20308
20309 Action::JsonExtractScalarConvert => {
20310 // JSON_EXTRACT_SCALAR -> target-specific
20311 if let Expression::JsonExtractScalar(f) = e {
20312 match target {
20313 DialectType::PostgreSQL | DialectType::Redshift => {
20314 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
20315 let keys: Vec<Expression> =
20316 if let Expression::Literal(Literal::String(ref s)) = f.path {
20317 let parts = Self::decompose_json_path(s);
20318 parts.into_iter().map(|k| Expression::string(&k)).collect()
20319 } else {
20320 vec![f.path]
20321 };
20322 let mut args = vec![f.this];
20323 args.extend(keys);
20324 Ok(Expression::Function(Box::new(Function::new(
20325 "JSON_EXTRACT_PATH_TEXT".to_string(),
20326 args,
20327 ))))
20328 }
20329 DialectType::Snowflake => {
20330 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
20331 let stripped_path =
20332 if let Expression::Literal(Literal::String(ref s)) = f.path {
20333 let stripped = Self::strip_json_dollar_prefix(s);
20334 Expression::string(&stripped)
20335 } else {
20336 f.path
20337 };
20338 Ok(Expression::Function(Box::new(Function::new(
20339 "JSON_EXTRACT_PATH_TEXT".to_string(),
20340 vec![f.this, stripped_path],
20341 ))))
20342 }
20343 DialectType::SQLite | DialectType::DuckDB => {
20344 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
20345 Ok(Expression::JsonExtractScalar(Box::new(
20346 crate::expressions::JsonExtractFunc {
20347 this: f.this,
20348 path: f.path,
20349 returning: f.returning,
20350 arrow_syntax: true,
20351 hash_arrow_syntax: false,
20352 wrapper_option: None,
20353 quotes_option: None,
20354 on_scalar_string: false,
20355 on_error: None,
20356 },
20357 )))
20358 }
20359 _ => Ok(Expression::JsonExtractScalar(f)),
20360 }
20361 } else {
20362 Ok(e)
20363 }
20364 }
20365
20366 Action::JsonPathNormalize => {
20367 // Normalize JSON path format for BigQuery, MySQL, etc.
20368 if let Expression::JsonExtract(mut f) = e {
20369 if let Expression::Literal(Literal::String(ref s)) = f.path {
20370 let mut normalized = s.clone();
20371 // Convert bracket notation and handle wildcards per dialect
20372 match target {
20373 DialectType::BigQuery => {
20374 // BigQuery strips wildcards and uses single quotes in brackets
20375 normalized = Self::strip_json_wildcards(&normalized);
20376 normalized = Self::bracket_to_single_quotes(&normalized);
20377 }
20378 DialectType::MySQL => {
20379 // MySQL preserves wildcards, converts brackets to dot notation
20380 normalized = Self::bracket_to_dot_notation(&normalized);
20381 }
20382 _ => {}
20383 }
20384 if normalized != *s {
20385 f.path = Expression::string(&normalized);
20386 }
20387 }
20388 Ok(Expression::JsonExtract(f))
20389 } else {
20390 Ok(e)
20391 }
20392 }
20393
20394 Action::JsonQueryValueConvert => {
20395 // JsonQuery/JsonValue -> target-specific
20396 let (f, is_query) = match e {
20397 Expression::JsonQuery(f) => (f, true),
20398 Expression::JsonValue(f) => (f, false),
20399 _ => return Ok(e),
20400 };
20401 match target {
20402 DialectType::TSQL | DialectType::Fabric => {
20403 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
20404 let json_query = Expression::Function(Box::new(Function::new(
20405 "JSON_QUERY".to_string(),
20406 vec![f.this.clone(), f.path.clone()],
20407 )));
20408 let json_value = Expression::Function(Box::new(Function::new(
20409 "JSON_VALUE".to_string(),
20410 vec![f.this, f.path],
20411 )));
20412 Ok(Expression::Function(Box::new(Function::new(
20413 "ISNULL".to_string(),
20414 vec![json_query, json_value],
20415 ))))
20416 }
20417 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
20418 Ok(Expression::Function(Box::new(Function::new(
20419 "GET_JSON_OBJECT".to_string(),
20420 vec![f.this, f.path],
20421 ))))
20422 }
20423 DialectType::PostgreSQL | DialectType::Redshift => {
20424 Ok(Expression::Function(Box::new(Function::new(
20425 "JSON_EXTRACT_PATH_TEXT".to_string(),
20426 vec![f.this, f.path],
20427 ))))
20428 }
20429 DialectType::DuckDB | DialectType::SQLite => {
20430 // json -> path arrow syntax
20431 Ok(Expression::JsonExtract(Box::new(
20432 crate::expressions::JsonExtractFunc {
20433 this: f.this,
20434 path: f.path,
20435 returning: f.returning,
20436 arrow_syntax: true,
20437 hash_arrow_syntax: false,
20438 wrapper_option: f.wrapper_option,
20439 quotes_option: f.quotes_option,
20440 on_scalar_string: f.on_scalar_string,
20441 on_error: f.on_error,
20442 },
20443 )))
20444 }
20445 DialectType::Snowflake => {
20446 // GET_PATH(PARSE_JSON(json), 'path')
20447 // Strip $. prefix from path
20448 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
20449 let json_expr = match &f.this {
20450 Expression::Function(ref inner_f)
20451 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
20452 {
20453 f.this
20454 }
20455 Expression::ParseJson(_) => {
20456 // Already a ParseJson expression, which generates as PARSE_JSON(...)
20457 f.this
20458 }
20459 _ => Expression::Function(Box::new(Function::new(
20460 "PARSE_JSON".to_string(),
20461 vec![f.this],
20462 ))),
20463 };
20464 let path_str = match &f.path {
20465 Expression::Literal(Literal::String(s)) => {
20466 let stripped = s.strip_prefix("$.").unwrap_or(s);
20467 Expression::Literal(Literal::String(stripped.to_string()))
20468 }
20469 other => other.clone(),
20470 };
20471 Ok(Expression::Function(Box::new(Function::new(
20472 "GET_PATH".to_string(),
20473 vec![json_expr, path_str],
20474 ))))
20475 }
20476 _ => {
20477 // Default: keep as JSON_QUERY/JSON_VALUE function
20478 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
20479 Ok(Expression::Function(Box::new(Function::new(
20480 func_name.to_string(),
20481 vec![f.this, f.path],
20482 ))))
20483 }
20484 }
20485 }
20486
20487 Action::JsonLiteralToJsonParse => {
20488 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
20489 if let Expression::Cast(c) = e {
20490 let func_name = if matches!(target, DialectType::Snowflake) {
20491 "PARSE_JSON"
20492 } else {
20493 "JSON_PARSE"
20494 };
20495 Ok(Expression::Function(Box::new(Function::new(
20496 func_name.to_string(),
20497 vec![c.this],
20498 ))))
20499 } else {
20500 Ok(e)
20501 }
20502 }
20503
20504 Action::AtTimeZoneConvert => {
20505 // AT TIME ZONE -> target-specific conversion
20506 if let Expression::AtTimeZone(atz) = e {
20507 match target {
20508 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20509 Ok(Expression::Function(Box::new(Function::new(
20510 "AT_TIMEZONE".to_string(),
20511 vec![atz.this, atz.zone],
20512 ))))
20513 }
20514 DialectType::Spark | DialectType::Databricks => {
20515 Ok(Expression::Function(Box::new(Function::new(
20516 "FROM_UTC_TIMESTAMP".to_string(),
20517 vec![atz.this, atz.zone],
20518 ))))
20519 }
20520 DialectType::Snowflake => {
20521 // CONVERT_TIMEZONE('zone', expr)
20522 Ok(Expression::Function(Box::new(Function::new(
20523 "CONVERT_TIMEZONE".to_string(),
20524 vec![atz.zone, atz.this],
20525 ))))
20526 }
20527 DialectType::BigQuery => {
20528 // TIMESTAMP(DATETIME(expr, 'zone'))
20529 let datetime_call = Expression::Function(Box::new(Function::new(
20530 "DATETIME".to_string(),
20531 vec![atz.this, atz.zone],
20532 )));
20533 Ok(Expression::Function(Box::new(Function::new(
20534 "TIMESTAMP".to_string(),
20535 vec![datetime_call],
20536 ))))
20537 }
20538 _ => Ok(Expression::Function(Box::new(Function::new(
20539 "AT_TIMEZONE".to_string(),
20540 vec![atz.this, atz.zone],
20541 )))),
20542 }
20543 } else {
20544 Ok(e)
20545 }
20546 }
20547
20548 Action::DayOfWeekConvert => {
20549 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
20550 if let Expression::DayOfWeek(f) = e {
20551 match target {
20552 DialectType::DuckDB => Ok(Expression::Function(Box::new(
20553 Function::new("ISODOW".to_string(), vec![f.this]),
20554 ))),
20555 DialectType::Spark | DialectType::Databricks => {
20556 // ((DAYOFWEEK(x) % 7) + 1)
20557 let dayofweek = Expression::Function(Box::new(Function::new(
20558 "DAYOFWEEK".to_string(),
20559 vec![f.this],
20560 )));
20561 let modulo = Expression::Mod(Box::new(BinaryOp {
20562 left: dayofweek,
20563 right: Expression::number(7),
20564 left_comments: Vec::new(),
20565 operator_comments: Vec::new(),
20566 trailing_comments: Vec::new(),
20567 inferred_type: None,
20568 }));
20569 let paren_mod = Expression::Paren(Box::new(Paren {
20570 this: modulo,
20571 trailing_comments: Vec::new(),
20572 }));
20573 let add_one = Expression::Add(Box::new(BinaryOp {
20574 left: paren_mod,
20575 right: Expression::number(1),
20576 left_comments: Vec::new(),
20577 operator_comments: Vec::new(),
20578 trailing_comments: Vec::new(),
20579 inferred_type: None,
20580 }));
20581 Ok(Expression::Paren(Box::new(Paren {
20582 this: add_one,
20583 trailing_comments: Vec::new(),
20584 })))
20585 }
20586 _ => Ok(Expression::DayOfWeek(f)),
20587 }
20588 } else {
20589 Ok(e)
20590 }
20591 }
20592
20593 Action::MaxByMinByConvert => {
20594 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
20595 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
20596 // Handle both Expression::Function and Expression::AggregateFunction
20597 let (is_max, args) = match &e {
20598 Expression::Function(f) => {
20599 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
20600 }
20601 Expression::AggregateFunction(af) => {
20602 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
20603 }
20604 _ => return Ok(e),
20605 };
20606 match target {
20607 DialectType::ClickHouse => {
20608 let name = if is_max { "argMax" } else { "argMin" };
20609 let mut args = args;
20610 args.truncate(2);
20611 Ok(Expression::Function(Box::new(Function::new(
20612 name.to_string(),
20613 args,
20614 ))))
20615 }
20616 DialectType::DuckDB => {
20617 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
20618 Ok(Expression::Function(Box::new(Function::new(
20619 name.to_string(),
20620 args,
20621 ))))
20622 }
20623 DialectType::Spark | DialectType::Databricks => {
20624 let mut args = args;
20625 args.truncate(2);
20626 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
20627 Ok(Expression::Function(Box::new(Function::new(
20628 name.to_string(),
20629 args,
20630 ))))
20631 }
20632 _ => Ok(e),
20633 }
20634 }
20635
20636 Action::ElementAtConvert => {
20637 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
20638 let (arr, idx) = if let Expression::ElementAt(bf) = e {
20639 (bf.this, bf.expression)
20640 } else if let Expression::Function(ref f) = e {
20641 if f.args.len() >= 2 {
20642 if let Expression::Function(f) = e {
20643 let mut args = f.args;
20644 let arr = args.remove(0);
20645 let idx = args.remove(0);
20646 (arr, idx)
20647 } else {
20648 unreachable!("outer condition already matched Expression::Function")
20649 }
20650 } else {
20651 return Ok(e);
20652 }
20653 } else {
20654 return Ok(e);
20655 };
20656 match target {
20657 DialectType::PostgreSQL => {
20658 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
20659 let arr_expr = Expression::Paren(Box::new(Paren {
20660 this: arr,
20661 trailing_comments: vec![],
20662 }));
20663 Ok(Expression::Subscript(Box::new(
20664 crate::expressions::Subscript {
20665 this: arr_expr,
20666 index: idx,
20667 },
20668 )))
20669 }
20670 DialectType::BigQuery => {
20671 // BigQuery: convert ARRAY[...] to bare [...] for subscript
20672 let arr_expr = match arr {
20673 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
20674 crate::expressions::ArrayConstructor {
20675 expressions: af.expressions,
20676 bracket_notation: true,
20677 use_list_keyword: false,
20678 },
20679 )),
20680 other => other,
20681 };
20682 let safe_ordinal = Expression::Function(Box::new(Function::new(
20683 "SAFE_ORDINAL".to_string(),
20684 vec![idx],
20685 )));
20686 Ok(Expression::Subscript(Box::new(
20687 crate::expressions::Subscript {
20688 this: arr_expr,
20689 index: safe_ordinal,
20690 },
20691 )))
20692 }
20693 _ => Ok(Expression::Function(Box::new(Function::new(
20694 "ELEMENT_AT".to_string(),
20695 vec![arr, idx],
20696 )))),
20697 }
20698 }
20699
20700 Action::CurrentUserParens => {
20701 // CURRENT_USER -> CURRENT_USER() for Snowflake
20702 Ok(Expression::Function(Box::new(Function::new(
20703 "CURRENT_USER".to_string(),
20704 vec![],
20705 ))))
20706 }
20707
20708 Action::ArrayAggToCollectList => {
20709 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
20710 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
20711 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
20712 match e {
20713 Expression::AggregateFunction(mut af) => {
20714 let is_simple =
20715 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
20716 let args = if af.args.is_empty() {
20717 vec![]
20718 } else {
20719 vec![af.args[0].clone()]
20720 };
20721 af.name = "COLLECT_LIST".to_string();
20722 af.args = args;
20723 if is_simple {
20724 af.order_by = Vec::new();
20725 }
20726 Ok(Expression::AggregateFunction(af))
20727 }
20728 Expression::ArrayAgg(agg) => {
20729 let is_simple =
20730 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
20731 Ok(Expression::AggregateFunction(Box::new(
20732 crate::expressions::AggregateFunction {
20733 name: "COLLECT_LIST".to_string(),
20734 args: vec![agg.this.clone()],
20735 distinct: agg.distinct,
20736 filter: agg.filter.clone(),
20737 order_by: if is_simple {
20738 Vec::new()
20739 } else {
20740 agg.order_by.clone()
20741 },
20742 limit: agg.limit.clone(),
20743 ignore_nulls: agg.ignore_nulls,
20744 inferred_type: None,
20745 },
20746 )))
20747 }
20748 _ => Ok(e),
20749 }
20750 }
20751
20752 Action::ArraySyntaxConvert => {
20753 match e {
20754 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
20755 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
20756 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
20757 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
20758 expressions: arr.expressions,
20759 bracket_notation: true,
20760 use_list_keyword: false,
20761 })),
20762 ),
20763 // ARRAY(y) function style -> ArrayFunc for target dialect
20764 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
20765 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
20766 let bracket = matches!(
20767 target,
20768 DialectType::BigQuery
20769 | DialectType::DuckDB
20770 | DialectType::Snowflake
20771 | DialectType::ClickHouse
20772 | DialectType::StarRocks
20773 );
20774 Ok(Expression::ArrayFunc(Box::new(
20775 crate::expressions::ArrayConstructor {
20776 expressions: f.args,
20777 bracket_notation: bracket,
20778 use_list_keyword: false,
20779 },
20780 )))
20781 }
20782 _ => Ok(e),
20783 }
20784 }
20785
20786 Action::CastToJsonForSpark => {
20787 // CAST(x AS JSON) -> TO_JSON(x) for Spark
20788 if let Expression::Cast(c) = e {
20789 Ok(Expression::Function(Box::new(Function::new(
20790 "TO_JSON".to_string(),
20791 vec![c.this],
20792 ))))
20793 } else {
20794 Ok(e)
20795 }
20796 }
20797
20798 Action::CastJsonToFromJson => {
20799 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
20800 if let Expression::Cast(c) = e {
20801 // Extract the string literal from ParseJson
20802 let literal_expr = if let Expression::ParseJson(pj) = c.this {
20803 pj.this
20804 } else {
20805 c.this
20806 };
20807 // Convert the target DataType to Spark's type string format
20808 let type_str = Self::data_type_to_spark_string(&c.to);
20809 Ok(Expression::Function(Box::new(Function::new(
20810 "FROM_JSON".to_string(),
20811 vec![literal_expr, Expression::Literal(Literal::String(type_str))],
20812 ))))
20813 } else {
20814 Ok(e)
20815 }
20816 }
20817
20818 Action::ToJsonConvert => {
20819 // TO_JSON(x) -> target-specific conversion
20820 if let Expression::ToJson(f) = e {
20821 let arg = f.this;
20822 match target {
20823 DialectType::Presto | DialectType::Trino => {
20824 // JSON_FORMAT(CAST(x AS JSON))
20825 let cast_json = Expression::Cast(Box::new(Cast {
20826 this: arg,
20827 to: DataType::Custom {
20828 name: "JSON".to_string(),
20829 },
20830 trailing_comments: vec![],
20831 double_colon_syntax: false,
20832 format: None,
20833 default: None,
20834 inferred_type: None,
20835 }));
20836 Ok(Expression::Function(Box::new(Function::new(
20837 "JSON_FORMAT".to_string(),
20838 vec![cast_json],
20839 ))))
20840 }
20841 DialectType::BigQuery => Ok(Expression::Function(Box::new(
20842 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
20843 ))),
20844 DialectType::DuckDB => {
20845 // CAST(TO_JSON(x) AS TEXT)
20846 let to_json =
20847 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
20848 this: arg,
20849 original_name: None,
20850 inferred_type: None,
20851 }));
20852 Ok(Expression::Cast(Box::new(Cast {
20853 this: to_json,
20854 to: DataType::Text,
20855 trailing_comments: vec![],
20856 double_colon_syntax: false,
20857 format: None,
20858 default: None,
20859 inferred_type: None,
20860 })))
20861 }
20862 _ => Ok(Expression::ToJson(Box::new(
20863 crate::expressions::UnaryFunc {
20864 this: arg,
20865 original_name: None,
20866 inferred_type: None,
20867 },
20868 ))),
20869 }
20870 } else {
20871 Ok(e)
20872 }
20873 }
20874
20875 Action::VarianceToClickHouse => {
20876 if let Expression::Variance(f) = e {
20877 Ok(Expression::Function(Box::new(Function::new(
20878 "varSamp".to_string(),
20879 vec![f.this],
20880 ))))
20881 } else {
20882 Ok(e)
20883 }
20884 }
20885
20886 Action::StddevToClickHouse => {
20887 if let Expression::Stddev(f) = e {
20888 Ok(Expression::Function(Box::new(Function::new(
20889 "stddevSamp".to_string(),
20890 vec![f.this],
20891 ))))
20892 } else {
20893 Ok(e)
20894 }
20895 }
20896
20897 Action::ApproxQuantileConvert => {
20898 if let Expression::ApproxQuantile(aq) = e {
20899 let mut args = vec![*aq.this];
20900 if let Some(q) = aq.quantile {
20901 args.push(*q);
20902 }
20903 Ok(Expression::Function(Box::new(Function::new(
20904 "APPROX_PERCENTILE".to_string(),
20905 args,
20906 ))))
20907 } else {
20908 Ok(e)
20909 }
20910 }
20911
20912 Action::DollarParamConvert => {
20913 if let Expression::Parameter(p) = e {
20914 Ok(Expression::Parameter(Box::new(
20915 crate::expressions::Parameter {
20916 name: p.name,
20917 index: p.index,
20918 style: crate::expressions::ParameterStyle::At,
20919 quoted: p.quoted,
20920 string_quoted: p.string_quoted,
20921 expression: p.expression,
20922 },
20923 )))
20924 } else {
20925 Ok(e)
20926 }
20927 }
20928
20929 Action::EscapeStringNormalize => {
20930 if let Expression::Literal(Literal::EscapeString(s)) = e {
20931 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
20932 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
20933 s[2..].to_string()
20934 } else {
20935 s
20936 };
20937 let normalized = stripped
20938 .replace('\n', "\\n")
20939 .replace('\r', "\\r")
20940 .replace('\t', "\\t");
20941 match target {
20942 DialectType::BigQuery => {
20943 // BigQuery: e'...' -> CAST(b'...' AS STRING)
20944 // Use Raw for the b'...' part to avoid double-escaping
20945 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
20946 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
20947 }
20948 _ => Ok(Expression::Literal(Literal::EscapeString(normalized))),
20949 }
20950 } else {
20951 Ok(e)
20952 }
20953 }
20954
20955 Action::StraightJoinCase => {
20956 // straight_join: keep lowercase for DuckDB, quote for MySQL
20957 if let Expression::Column(col) = e {
20958 if col.name.name == "STRAIGHT_JOIN" {
20959 let mut new_col = col;
20960 new_col.name.name = "straight_join".to_string();
20961 if matches!(target, DialectType::MySQL) {
20962 // MySQL: needs quoting since it's a reserved keyword
20963 new_col.name.quoted = true;
20964 }
20965 Ok(Expression::Column(new_col))
20966 } else {
20967 Ok(Expression::Column(col))
20968 }
20969 } else {
20970 Ok(e)
20971 }
20972 }
20973
20974 Action::TablesampleReservoir => {
20975 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
20976 if let Expression::TableSample(mut ts) = e {
20977 if let Some(ref mut sample) = ts.sample {
20978 sample.method = crate::expressions::SampleMethod::Reservoir;
20979 sample.explicit_method = true;
20980 }
20981 Ok(Expression::TableSample(ts))
20982 } else {
20983 Ok(e)
20984 }
20985 }
20986
20987 Action::TablesampleSnowflakeStrip => {
20988 // Strip method and PERCENT for Snowflake target from non-Snowflake source
20989 match e {
20990 Expression::TableSample(mut ts) => {
20991 if let Some(ref mut sample) = ts.sample {
20992 sample.suppress_method_output = true;
20993 sample.unit_after_size = false;
20994 sample.is_percent = false;
20995 }
20996 Ok(Expression::TableSample(ts))
20997 }
20998 Expression::Table(mut t) => {
20999 if let Some(ref mut sample) = t.table_sample {
21000 sample.suppress_method_output = true;
21001 sample.unit_after_size = false;
21002 sample.is_percent = false;
21003 }
21004 Ok(Expression::Table(t))
21005 }
21006 _ => Ok(e),
21007 }
21008 }
21009
21010 Action::FirstToAnyValue => {
21011 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
21012 if let Expression::First(mut agg) = e {
21013 agg.ignore_nulls = None;
21014 agg.name = Some("ANY_VALUE".to_string());
21015 Ok(Expression::AnyValue(agg))
21016 } else {
21017 Ok(e)
21018 }
21019 }
21020
21021 Action::ArrayIndexConvert => {
21022 // Subscript index: 1-based to 0-based for BigQuery
21023 if let Expression::Subscript(mut sub) = e {
21024 if let Expression::Literal(Literal::Number(ref n)) = sub.index {
21025 if let Ok(val) = n.parse::<i64>() {
21026 sub.index =
21027 Expression::Literal(Literal::Number((val - 1).to_string()));
21028 }
21029 }
21030 Ok(Expression::Subscript(sub))
21031 } else {
21032 Ok(e)
21033 }
21034 }
21035
21036 Action::AnyValueIgnoreNulls => {
21037 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
21038 if let Expression::AnyValue(mut av) = e {
21039 if av.ignore_nulls.is_none() {
21040 av.ignore_nulls = Some(true);
21041 }
21042 Ok(Expression::AnyValue(av))
21043 } else {
21044 Ok(e)
21045 }
21046 }
21047
21048 Action::BigQueryNullsOrdering => {
21049 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
21050 if let Expression::WindowFunction(mut wf) = e {
21051 for o in &mut wf.over.order_by {
21052 o.nulls_first = None;
21053 }
21054 Ok(Expression::WindowFunction(wf))
21055 } else if let Expression::Ordered(mut o) = e {
21056 o.nulls_first = None;
21057 Ok(Expression::Ordered(o))
21058 } else {
21059 Ok(e)
21060 }
21061 }
21062
21063 Action::SnowflakeFloatProtect => {
21064 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
21065 // Snowflake's target transform from converting it to DOUBLE.
21066 // Non-Snowflake sources should keep their FLOAT spelling.
21067 if let Expression::DataType(DataType::Float { .. }) = e {
21068 Ok(Expression::DataType(DataType::Custom {
21069 name: "FLOAT".to_string(),
21070 }))
21071 } else {
21072 Ok(e)
21073 }
21074 }
21075
21076 Action::MysqlNullsOrdering => {
21077 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
21078 if let Expression::Ordered(mut o) = e {
21079 let nulls_last = o.nulls_first == Some(false);
21080 let desc = o.desc;
21081 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
21082 // If requested ordering matches default, just strip NULLS clause
21083 let matches_default = if desc {
21084 // DESC default is NULLS FIRST, so nulls_first=true matches
21085 o.nulls_first == Some(true)
21086 } else {
21087 // ASC default is NULLS LAST, so nulls_first=false matches
21088 nulls_last
21089 };
21090 if matches_default {
21091 o.nulls_first = None;
21092 Ok(Expression::Ordered(o))
21093 } else {
21094 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
21095 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
21096 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
21097 let null_val = if desc { 1 } else { 0 };
21098 let non_null_val = if desc { 0 } else { 1 };
21099 let _case_expr = Expression::Case(Box::new(Case {
21100 operand: None,
21101 whens: vec![(
21102 Expression::IsNull(Box::new(crate::expressions::IsNull {
21103 this: o.this.clone(),
21104 not: false,
21105 postfix_form: false,
21106 })),
21107 Expression::number(null_val),
21108 )],
21109 else_: Some(Expression::number(non_null_val)),
21110 comments: Vec::new(),
21111 inferred_type: None,
21112 }));
21113 o.nulls_first = None;
21114 // Return a tuple of [case_expr, ordered_expr]
21115 // We need to return both as part of the ORDER BY
21116 // But since transform_recursive processes individual expressions,
21117 // we can't easily add extra ORDER BY items here.
21118 // Instead, strip the nulls_first
21119 o.nulls_first = None;
21120 Ok(Expression::Ordered(o))
21121 }
21122 } else {
21123 Ok(e)
21124 }
21125 }
21126
21127 Action::MysqlNullsLastRewrite => {
21128 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
21129 // to simulate NULLS LAST for ASC ordering
21130 if let Expression::WindowFunction(mut wf) = e {
21131 let mut new_order_by = Vec::new();
21132 for o in wf.over.order_by {
21133 if !o.desc {
21134 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
21135 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
21136 let case_expr = Expression::Case(Box::new(Case {
21137 operand: None,
21138 whens: vec![(
21139 Expression::IsNull(Box::new(crate::expressions::IsNull {
21140 this: o.this.clone(),
21141 not: false,
21142 postfix_form: false,
21143 })),
21144 Expression::Literal(Literal::Number("1".to_string())),
21145 )],
21146 else_: Some(Expression::Literal(Literal::Number(
21147 "0".to_string(),
21148 ))),
21149 comments: Vec::new(),
21150 inferred_type: None,
21151 }));
21152 new_order_by.push(crate::expressions::Ordered {
21153 this: case_expr,
21154 desc: false,
21155 nulls_first: None,
21156 explicit_asc: false,
21157 with_fill: None,
21158 });
21159 let mut ordered = o;
21160 ordered.nulls_first = None;
21161 new_order_by.push(ordered);
21162 } else {
21163 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
21164 // No change needed
21165 let mut ordered = o;
21166 ordered.nulls_first = None;
21167 new_order_by.push(ordered);
21168 }
21169 }
21170 wf.over.order_by = new_order_by;
21171 Ok(Expression::WindowFunction(wf))
21172 } else {
21173 Ok(e)
21174 }
21175 }
21176
21177 Action::RespectNullsConvert => {
21178 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
21179 if let Expression::WindowFunction(mut wf) = e {
21180 match &mut wf.this {
21181 Expression::FirstValue(ref mut vf) => {
21182 if vf.ignore_nulls == Some(false) {
21183 vf.ignore_nulls = None;
21184 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
21185 // but that's handled by the generator's NULLS ordering
21186 }
21187 }
21188 Expression::LastValue(ref mut vf) => {
21189 if vf.ignore_nulls == Some(false) {
21190 vf.ignore_nulls = None;
21191 }
21192 }
21193 _ => {}
21194 }
21195 Ok(Expression::WindowFunction(wf))
21196 } else {
21197 Ok(e)
21198 }
21199 }
21200
21201 Action::CreateTableStripComment => {
21202 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
21203 if let Expression::CreateTable(mut ct) = e {
21204 for col in &mut ct.columns {
21205 col.comment = None;
21206 col.constraints.retain(|c| {
21207 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
21208 });
21209 // Also remove Comment from constraint_order
21210 col.constraint_order.retain(|c| {
21211 !matches!(c, crate::expressions::ConstraintType::Comment)
21212 });
21213 }
21214 // Strip properties (USING, PARTITIONED BY, etc.)
21215 ct.properties.clear();
21216 Ok(Expression::CreateTable(ct))
21217 } else {
21218 Ok(e)
21219 }
21220 }
21221
21222 Action::AlterTableToSpRename => {
21223 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
21224 if let Expression::AlterTable(ref at) = e {
21225 if let Some(crate::expressions::AlterTableAction::RenameTable(
21226 ref new_tbl,
21227 )) = at.actions.first()
21228 {
21229 // Build the old table name using TSQL bracket quoting
21230 let old_name = if let Some(ref schema) = at.name.schema {
21231 if at.name.name.quoted || schema.quoted {
21232 format!("[{}].[{}]", schema.name, at.name.name.name)
21233 } else {
21234 format!("{}.{}", schema.name, at.name.name.name)
21235 }
21236 } else {
21237 if at.name.name.quoted {
21238 format!("[{}]", at.name.name.name)
21239 } else {
21240 at.name.name.name.clone()
21241 }
21242 };
21243 let new_name = new_tbl.name.name.clone();
21244 // EXEC sp_rename 'old_name', 'new_name'
21245 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
21246 Ok(Expression::Raw(crate::expressions::Raw { sql }))
21247 } else {
21248 Ok(e)
21249 }
21250 } else {
21251 Ok(e)
21252 }
21253 }
21254
21255 Action::SnowflakeIntervalFormat => {
21256 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
21257 if let Expression::Interval(mut iv) = e {
21258 if let (
21259 Some(Expression::Literal(Literal::String(ref val))),
21260 Some(ref unit_spec),
21261 ) = (&iv.this, &iv.unit)
21262 {
21263 let unit_str = match unit_spec {
21264 crate::expressions::IntervalUnitSpec::Simple { unit, .. } => {
21265 match unit {
21266 crate::expressions::IntervalUnit::Year => "YEAR",
21267 crate::expressions::IntervalUnit::Quarter => "QUARTER",
21268 crate::expressions::IntervalUnit::Month => "MONTH",
21269 crate::expressions::IntervalUnit::Week => "WEEK",
21270 crate::expressions::IntervalUnit::Day => "DAY",
21271 crate::expressions::IntervalUnit::Hour => "HOUR",
21272 crate::expressions::IntervalUnit::Minute => "MINUTE",
21273 crate::expressions::IntervalUnit::Second => "SECOND",
21274 crate::expressions::IntervalUnit::Millisecond => {
21275 "MILLISECOND"
21276 }
21277 crate::expressions::IntervalUnit::Microsecond => {
21278 "MICROSECOND"
21279 }
21280 crate::expressions::IntervalUnit::Nanosecond => {
21281 "NANOSECOND"
21282 }
21283 }
21284 }
21285 _ => "",
21286 };
21287 if !unit_str.is_empty() {
21288 let combined = format!("{} {}", val, unit_str);
21289 iv.this = Some(Expression::Literal(Literal::String(combined)));
21290 iv.unit = None;
21291 }
21292 }
21293 Ok(Expression::Interval(iv))
21294 } else {
21295 Ok(e)
21296 }
21297 }
21298
21299 Action::ArrayConcatBracketConvert => {
21300 // Expression::Array/ArrayFunc -> target-specific
21301 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
21302 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
21303 match e {
21304 Expression::Array(arr) => {
21305 if matches!(target, DialectType::Redshift) {
21306 Ok(Expression::Function(Box::new(Function::new(
21307 "ARRAY".to_string(),
21308 arr.expressions,
21309 ))))
21310 } else {
21311 Ok(Expression::ArrayFunc(Box::new(
21312 crate::expressions::ArrayConstructor {
21313 expressions: arr.expressions,
21314 bracket_notation: false,
21315 use_list_keyword: false,
21316 },
21317 )))
21318 }
21319 }
21320 Expression::ArrayFunc(arr) => {
21321 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
21322 if matches!(target, DialectType::Redshift) {
21323 Ok(Expression::Function(Box::new(Function::new(
21324 "ARRAY".to_string(),
21325 arr.expressions,
21326 ))))
21327 } else {
21328 Ok(Expression::ArrayFunc(arr))
21329 }
21330 }
21331 _ => Ok(e),
21332 }
21333 }
21334
21335 Action::BitAggFloatCast => {
21336 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
21337 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
21338 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
21339 let int_type = DataType::Int {
21340 length: None,
21341 integer_spelling: false,
21342 };
21343 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
21344 if let Expression::Cast(c) = agg_this {
21345 match &c.to {
21346 DataType::Float { .. }
21347 | DataType::Double { .. }
21348 | DataType::Custom { .. } => {
21349 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
21350 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
21351 let inner_type = match &c.to {
21352 DataType::Float {
21353 precision, scale, ..
21354 } => DataType::Float {
21355 precision: *precision,
21356 scale: *scale,
21357 real_spelling: true,
21358 },
21359 other => other.clone(),
21360 };
21361 let inner_cast =
21362 Expression::Cast(Box::new(crate::expressions::Cast {
21363 this: c.this.clone(),
21364 to: inner_type,
21365 trailing_comments: Vec::new(),
21366 double_colon_syntax: false,
21367 format: None,
21368 default: None,
21369 inferred_type: None,
21370 }));
21371 let rounded = Expression::Function(Box::new(Function::new(
21372 "ROUND".to_string(),
21373 vec![inner_cast],
21374 )));
21375 Expression::Cast(Box::new(crate::expressions::Cast {
21376 this: rounded,
21377 to: int_dt,
21378 trailing_comments: Vec::new(),
21379 double_colon_syntax: false,
21380 format: None,
21381 default: None,
21382 inferred_type: None,
21383 }))
21384 }
21385 DataType::Decimal { .. } => {
21386 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
21387 Expression::Cast(Box::new(crate::expressions::Cast {
21388 this: Expression::Cast(c),
21389 to: int_dt,
21390 trailing_comments: Vec::new(),
21391 double_colon_syntax: false,
21392 format: None,
21393 default: None,
21394 inferred_type: None,
21395 }))
21396 }
21397 _ => Expression::Cast(c),
21398 }
21399 } else {
21400 agg_this
21401 }
21402 };
21403 match e {
21404 Expression::BitwiseOrAgg(mut f) => {
21405 f.this = wrap_agg(f.this, int_type);
21406 Ok(Expression::BitwiseOrAgg(f))
21407 }
21408 Expression::BitwiseAndAgg(mut f) => {
21409 let int_type = DataType::Int {
21410 length: None,
21411 integer_spelling: false,
21412 };
21413 f.this = wrap_agg(f.this, int_type);
21414 Ok(Expression::BitwiseAndAgg(f))
21415 }
21416 Expression::BitwiseXorAgg(mut f) => {
21417 let int_type = DataType::Int {
21418 length: None,
21419 integer_spelling: false,
21420 };
21421 f.this = wrap_agg(f.this, int_type);
21422 Ok(Expression::BitwiseXorAgg(f))
21423 }
21424 _ => Ok(e),
21425 }
21426 }
21427
21428 Action::BitAggSnowflakeRename => {
21429 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
21430 match e {
21431 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
21432 Function::new("BITORAGG".to_string(), vec![f.this]),
21433 ))),
21434 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
21435 Function::new("BITANDAGG".to_string(), vec![f.this]),
21436 ))),
21437 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
21438 Function::new("BITXORAGG".to_string(), vec![f.this]),
21439 ))),
21440 _ => Ok(e),
21441 }
21442 }
21443
21444 Action::StrftimeCastTimestamp => {
21445 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
21446 if let Expression::Cast(mut c) = e {
21447 if matches!(
21448 c.to,
21449 DataType::Timestamp {
21450 timezone: false,
21451 ..
21452 }
21453 ) {
21454 c.to = DataType::Custom {
21455 name: "TIMESTAMP_NTZ".to_string(),
21456 };
21457 }
21458 Ok(Expression::Cast(c))
21459 } else {
21460 Ok(e)
21461 }
21462 }
21463
21464 Action::DecimalDefaultPrecision => {
21465 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
21466 if let Expression::Cast(mut c) = e {
21467 if matches!(
21468 c.to,
21469 DataType::Decimal {
21470 precision: None,
21471 ..
21472 }
21473 ) {
21474 c.to = DataType::Decimal {
21475 precision: Some(18),
21476 scale: Some(3),
21477 };
21478 }
21479 Ok(Expression::Cast(c))
21480 } else {
21481 Ok(e)
21482 }
21483 }
21484
21485 Action::FilterToIff => {
21486 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
21487 if let Expression::Filter(f) = e {
21488 let condition = *f.expression;
21489 let agg = *f.this;
21490 // Strip WHERE from condition
21491 let cond = match condition {
21492 Expression::Where(w) => w.this,
21493 other => other,
21494 };
21495 // Extract the aggregate function and its argument
21496 // We want AVG(IFF(condition, x, NULL))
21497 match agg {
21498 Expression::Function(mut func) => {
21499 if !func.args.is_empty() {
21500 let orig_arg = func.args[0].clone();
21501 let iff_call = Expression::Function(Box::new(Function::new(
21502 "IFF".to_string(),
21503 vec![cond, orig_arg, Expression::Null(Null)],
21504 )));
21505 func.args[0] = iff_call;
21506 Ok(Expression::Function(func))
21507 } else {
21508 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
21509 this: Box::new(Expression::Function(func)),
21510 expression: Box::new(cond),
21511 })))
21512 }
21513 }
21514 Expression::Avg(mut avg) => {
21515 let iff_call = Expression::Function(Box::new(Function::new(
21516 "IFF".to_string(),
21517 vec![cond, avg.this.clone(), Expression::Null(Null)],
21518 )));
21519 avg.this = iff_call;
21520 Ok(Expression::Avg(avg))
21521 }
21522 Expression::Sum(mut s) => {
21523 let iff_call = Expression::Function(Box::new(Function::new(
21524 "IFF".to_string(),
21525 vec![cond, s.this.clone(), Expression::Null(Null)],
21526 )));
21527 s.this = iff_call;
21528 Ok(Expression::Sum(s))
21529 }
21530 Expression::Count(mut c) => {
21531 if let Some(ref this_expr) = c.this {
21532 let iff_call = Expression::Function(Box::new(Function::new(
21533 "IFF".to_string(),
21534 vec![cond, this_expr.clone(), Expression::Null(Null)],
21535 )));
21536 c.this = Some(iff_call);
21537 }
21538 Ok(Expression::Count(c))
21539 }
21540 other => {
21541 // Fallback: keep as Filter
21542 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
21543 this: Box::new(other),
21544 expression: Box::new(cond),
21545 })))
21546 }
21547 }
21548 } else {
21549 Ok(e)
21550 }
21551 }
21552
21553 Action::AggFilterToIff => {
21554 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
21555 // Helper macro to handle the common AggFunc case
21556 macro_rules! handle_agg_filter_to_iff {
21557 ($variant:ident, $agg:expr) => {{
21558 let mut agg = $agg;
21559 if let Some(filter_cond) = agg.filter.take() {
21560 let iff_call = Expression::Function(Box::new(Function::new(
21561 "IFF".to_string(),
21562 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
21563 )));
21564 agg.this = iff_call;
21565 }
21566 Ok(Expression::$variant(agg))
21567 }};
21568 }
21569
21570 match e {
21571 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
21572 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
21573 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
21574 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
21575 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
21576 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
21577 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
21578 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
21579 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
21580 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
21581 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
21582 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
21583 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
21584 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
21585 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
21586 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
21587 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
21588 Expression::ApproxDistinct(agg) => {
21589 handle_agg_filter_to_iff!(ApproxDistinct, agg)
21590 }
21591 Expression::Count(mut c) => {
21592 if let Some(filter_cond) = c.filter.take() {
21593 if let Some(ref this_expr) = c.this {
21594 let iff_call = Expression::Function(Box::new(Function::new(
21595 "IFF".to_string(),
21596 vec![
21597 filter_cond,
21598 this_expr.clone(),
21599 Expression::Null(Null),
21600 ],
21601 )));
21602 c.this = Some(iff_call);
21603 }
21604 }
21605 Ok(Expression::Count(c))
21606 }
21607 other => Ok(other),
21608 }
21609 }
21610
21611 Action::JsonToGetPath => {
21612 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
21613 if let Expression::JsonExtract(je) = e {
21614 // Convert to PARSE_JSON() wrapper:
21615 // - JSON(x) -> PARSE_JSON(x)
21616 // - PARSE_JSON(x) -> keep as-is
21617 // - anything else -> wrap in PARSE_JSON()
21618 let this = match &je.this {
21619 Expression::Function(f)
21620 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
21621 {
21622 Expression::Function(Box::new(Function::new(
21623 "PARSE_JSON".to_string(),
21624 f.args.clone(),
21625 )))
21626 }
21627 Expression::Function(f)
21628 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
21629 {
21630 je.this.clone()
21631 }
21632 // GET_PATH result is already JSON, don't wrap
21633 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
21634 je.this.clone()
21635 }
21636 other => {
21637 // Wrap non-JSON expressions in PARSE_JSON()
21638 Expression::Function(Box::new(Function::new(
21639 "PARSE_JSON".to_string(),
21640 vec![other.clone()],
21641 )))
21642 }
21643 };
21644 // Convert path: extract key from JSONPath or strip $. prefix from string
21645 let path = match &je.path {
21646 Expression::JSONPath(jp) => {
21647 // Extract the key from JSONPath: $root.key -> 'key'
21648 let mut key_parts = Vec::new();
21649 for expr in &jp.expressions {
21650 match expr {
21651 Expression::JSONPathRoot(_) => {} // skip root
21652 Expression::JSONPathKey(k) => {
21653 if let Expression::Literal(Literal::String(s)) =
21654 &*k.this
21655 {
21656 key_parts.push(s.clone());
21657 }
21658 }
21659 _ => {}
21660 }
21661 }
21662 if !key_parts.is_empty() {
21663 Expression::Literal(Literal::String(key_parts.join(".")))
21664 } else {
21665 je.path.clone()
21666 }
21667 }
21668 Expression::Literal(Literal::String(s)) if s.starts_with("$.") => {
21669 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
21670 Expression::Literal(Literal::String(stripped))
21671 }
21672 Expression::Literal(Literal::String(s)) if s.starts_with('$') => {
21673 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
21674 Expression::Literal(Literal::String(stripped))
21675 }
21676 _ => je.path.clone(),
21677 };
21678 Ok(Expression::Function(Box::new(Function::new(
21679 "GET_PATH".to_string(),
21680 vec![this, path],
21681 ))))
21682 } else {
21683 Ok(e)
21684 }
21685 }
21686
21687 Action::StructToRow => {
21688 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
21689 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
21690
21691 // Extract key-value pairs from either Struct or MapFunc
21692 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
21693 Expression::Struct(s) => Some(
21694 s.fields
21695 .iter()
21696 .map(|(opt_name, field_expr)| {
21697 if let Some(name) = opt_name {
21698 (name.clone(), field_expr.clone())
21699 } else if let Expression::NamedArgument(na) = field_expr {
21700 (na.name.name.clone(), na.value.clone())
21701 } else {
21702 (String::new(), field_expr.clone())
21703 }
21704 })
21705 .collect(),
21706 ),
21707 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
21708 m.keys
21709 .iter()
21710 .zip(m.values.iter())
21711 .map(|(key, value)| {
21712 let key_name = match key {
21713 Expression::Literal(Literal::String(s)) => s.clone(),
21714 Expression::Identifier(id) => id.name.clone(),
21715 _ => String::new(),
21716 };
21717 (key_name, value.clone())
21718 })
21719 .collect(),
21720 ),
21721 _ => None,
21722 };
21723
21724 if let Some(pairs) = kv_pairs {
21725 let mut named_args = Vec::new();
21726 for (key_name, value) in pairs {
21727 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
21728 named_args.push(Expression::Alias(Box::new(
21729 crate::expressions::Alias::new(
21730 value,
21731 Identifier::new(key_name),
21732 ),
21733 )));
21734 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
21735 named_args.push(value);
21736 } else {
21737 named_args.push(value);
21738 }
21739 }
21740
21741 if matches!(target, DialectType::BigQuery) {
21742 Ok(Expression::Function(Box::new(Function::new(
21743 "STRUCT".to_string(),
21744 named_args,
21745 ))))
21746 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
21747 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
21748 let row_func = Expression::Function(Box::new(Function::new(
21749 "ROW".to_string(),
21750 named_args,
21751 )));
21752
21753 // Try to infer types for each pair
21754 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
21755 Expression::Struct(s) => Some(
21756 s.fields
21757 .iter()
21758 .map(|(opt_name, field_expr)| {
21759 if let Some(name) = opt_name {
21760 (name.clone(), field_expr.clone())
21761 } else if let Expression::NamedArgument(na) = field_expr
21762 {
21763 (na.name.name.clone(), na.value.clone())
21764 } else {
21765 (String::new(), field_expr.clone())
21766 }
21767 })
21768 .collect(),
21769 ),
21770 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
21771 m.keys
21772 .iter()
21773 .zip(m.values.iter())
21774 .map(|(key, value)| {
21775 let key_name = match key {
21776 Expression::Literal(Literal::String(s)) => {
21777 s.clone()
21778 }
21779 Expression::Identifier(id) => id.name.clone(),
21780 _ => String::new(),
21781 };
21782 (key_name, value.clone())
21783 })
21784 .collect(),
21785 ),
21786 _ => None,
21787 };
21788
21789 if let Some(pairs) = kv_pairs_again {
21790 // Infer types for all values
21791 let mut all_inferred = true;
21792 let mut fields = Vec::new();
21793 for (name, value) in &pairs {
21794 let inferred_type = match value {
21795 Expression::Literal(Literal::Number(n)) => {
21796 if n.contains('.') {
21797 Some(DataType::Double {
21798 precision: None,
21799 scale: None,
21800 })
21801 } else {
21802 Some(DataType::Int {
21803 length: None,
21804 integer_spelling: true,
21805 })
21806 }
21807 }
21808 Expression::Literal(Literal::String(_)) => {
21809 Some(DataType::VarChar {
21810 length: None,
21811 parenthesized_length: false,
21812 })
21813 }
21814 Expression::Boolean(_) => Some(DataType::Boolean),
21815 _ => None,
21816 };
21817 if let Some(dt) = inferred_type {
21818 fields.push(crate::expressions::StructField::new(
21819 name.clone(),
21820 dt,
21821 ));
21822 } else {
21823 all_inferred = false;
21824 break;
21825 }
21826 }
21827
21828 if all_inferred && !fields.is_empty() {
21829 let row_type = DataType::Struct {
21830 fields,
21831 nested: true,
21832 };
21833 Ok(Expression::Cast(Box::new(Cast {
21834 this: row_func,
21835 to: row_type,
21836 trailing_comments: Vec::new(),
21837 double_colon_syntax: false,
21838 format: None,
21839 default: None,
21840 inferred_type: None,
21841 })))
21842 } else {
21843 Ok(row_func)
21844 }
21845 } else {
21846 Ok(row_func)
21847 }
21848 } else {
21849 Ok(Expression::Function(Box::new(Function::new(
21850 "ROW".to_string(),
21851 named_args,
21852 ))))
21853 }
21854 } else {
21855 Ok(e)
21856 }
21857 }
21858
21859 Action::SparkStructConvert => {
21860 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
21861 // or DuckDB {'name': val, ...}
21862 if let Expression::Function(f) = e {
21863 // Extract name-value pairs from aliased args
21864 let mut pairs: Vec<(String, Expression)> = Vec::new();
21865 for arg in &f.args {
21866 match arg {
21867 Expression::Alias(a) => {
21868 pairs.push((a.alias.name.clone(), a.this.clone()));
21869 }
21870 _ => {
21871 pairs.push((String::new(), arg.clone()));
21872 }
21873 }
21874 }
21875
21876 match target {
21877 DialectType::DuckDB => {
21878 // Convert to DuckDB struct literal {'name': value, ...}
21879 let mut keys = Vec::new();
21880 let mut values = Vec::new();
21881 for (name, value) in &pairs {
21882 keys.push(Expression::Literal(Literal::String(name.clone())));
21883 values.push(value.clone());
21884 }
21885 Ok(Expression::MapFunc(Box::new(
21886 crate::expressions::MapConstructor {
21887 keys,
21888 values,
21889 curly_brace_syntax: true,
21890 with_map_keyword: false,
21891 },
21892 )))
21893 }
21894 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21895 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
21896 let row_args: Vec<Expression> =
21897 pairs.iter().map(|(_, v)| v.clone()).collect();
21898 let row_func = Expression::Function(Box::new(Function::new(
21899 "ROW".to_string(),
21900 row_args,
21901 )));
21902
21903 // Infer types
21904 let mut all_inferred = true;
21905 let mut fields = Vec::new();
21906 for (name, value) in &pairs {
21907 let inferred_type = match value {
21908 Expression::Literal(Literal::Number(n)) => {
21909 if n.contains('.') {
21910 Some(DataType::Double {
21911 precision: None,
21912 scale: None,
21913 })
21914 } else {
21915 Some(DataType::Int {
21916 length: None,
21917 integer_spelling: true,
21918 })
21919 }
21920 }
21921 Expression::Literal(Literal::String(_)) => {
21922 Some(DataType::VarChar {
21923 length: None,
21924 parenthesized_length: false,
21925 })
21926 }
21927 Expression::Boolean(_) => Some(DataType::Boolean),
21928 _ => None,
21929 };
21930 if let Some(dt) = inferred_type {
21931 fields.push(crate::expressions::StructField::new(
21932 name.clone(),
21933 dt,
21934 ));
21935 } else {
21936 all_inferred = false;
21937 break;
21938 }
21939 }
21940
21941 if all_inferred && !fields.is_empty() {
21942 let row_type = DataType::Struct {
21943 fields,
21944 nested: true,
21945 };
21946 Ok(Expression::Cast(Box::new(Cast {
21947 this: row_func,
21948 to: row_type,
21949 trailing_comments: Vec::new(),
21950 double_colon_syntax: false,
21951 format: None,
21952 default: None,
21953 inferred_type: None,
21954 })))
21955 } else {
21956 Ok(row_func)
21957 }
21958 }
21959 _ => Ok(Expression::Function(f)),
21960 }
21961 } else {
21962 Ok(e)
21963 }
21964 }
21965
21966 Action::ApproxCountDistinctToApproxDistinct => {
21967 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
21968 if let Expression::ApproxCountDistinct(f) = e {
21969 Ok(Expression::ApproxDistinct(f))
21970 } else {
21971 Ok(e)
21972 }
21973 }
21974
21975 Action::CollectListToArrayAgg => {
21976 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
21977 if let Expression::AggregateFunction(f) = e {
21978 let filter_expr = if !f.args.is_empty() {
21979 let arg = f.args[0].clone();
21980 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
21981 this: arg,
21982 not: true,
21983 postfix_form: false,
21984 })))
21985 } else {
21986 None
21987 };
21988 let agg = crate::expressions::AggFunc {
21989 this: if f.args.is_empty() {
21990 Expression::Null(crate::expressions::Null)
21991 } else {
21992 f.args[0].clone()
21993 },
21994 distinct: f.distinct,
21995 order_by: f.order_by.clone(),
21996 filter: filter_expr,
21997 ignore_nulls: None,
21998 name: None,
21999 having_max: None,
22000 limit: None,
22001 inferred_type: None,
22002 };
22003 Ok(Expression::ArrayAgg(Box::new(agg)))
22004 } else {
22005 Ok(e)
22006 }
22007 }
22008
22009 Action::CollectSetConvert => {
22010 // COLLECT_SET(x) -> target-specific
22011 if let Expression::AggregateFunction(f) = e {
22012 match target {
22013 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
22014 crate::expressions::AggregateFunction {
22015 name: "SET_AGG".to_string(),
22016 args: f.args,
22017 distinct: false,
22018 order_by: f.order_by,
22019 filter: f.filter,
22020 limit: f.limit,
22021 ignore_nulls: f.ignore_nulls,
22022 inferred_type: None,
22023 },
22024 ))),
22025 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
22026 crate::expressions::AggregateFunction {
22027 name: "ARRAY_UNIQUE_AGG".to_string(),
22028 args: f.args,
22029 distinct: false,
22030 order_by: f.order_by,
22031 filter: f.filter,
22032 limit: f.limit,
22033 ignore_nulls: f.ignore_nulls,
22034 inferred_type: None,
22035 },
22036 ))),
22037 DialectType::Trino | DialectType::DuckDB => {
22038 let agg = crate::expressions::AggFunc {
22039 this: if f.args.is_empty() {
22040 Expression::Null(crate::expressions::Null)
22041 } else {
22042 f.args[0].clone()
22043 },
22044 distinct: true,
22045 order_by: Vec::new(),
22046 filter: None,
22047 ignore_nulls: None,
22048 name: None,
22049 having_max: None,
22050 limit: None,
22051 inferred_type: None,
22052 };
22053 Ok(Expression::ArrayAgg(Box::new(agg)))
22054 }
22055 _ => Ok(Expression::AggregateFunction(f)),
22056 }
22057 } else {
22058 Ok(e)
22059 }
22060 }
22061
22062 Action::PercentileConvert => {
22063 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
22064 if let Expression::AggregateFunction(f) = e {
22065 let name = match target {
22066 DialectType::DuckDB => "QUANTILE",
22067 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
22068 _ => "PERCENTILE",
22069 };
22070 Ok(Expression::AggregateFunction(Box::new(
22071 crate::expressions::AggregateFunction {
22072 name: name.to_string(),
22073 args: f.args,
22074 distinct: f.distinct,
22075 order_by: f.order_by,
22076 filter: f.filter,
22077 limit: f.limit,
22078 ignore_nulls: f.ignore_nulls,
22079 inferred_type: None,
22080 },
22081 )))
22082 } else {
22083 Ok(e)
22084 }
22085 }
22086
22087 Action::CorrIsnanWrap => {
22088 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
22089 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
22090 let corr_clone = e.clone();
22091 let isnan = Expression::Function(Box::new(Function::new(
22092 "ISNAN".to_string(),
22093 vec![corr_clone.clone()],
22094 )));
22095 let case_expr = Expression::Case(Box::new(Case {
22096 operand: None,
22097 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
22098 else_: Some(corr_clone),
22099 comments: Vec::new(),
22100 inferred_type: None,
22101 }));
22102 Ok(case_expr)
22103 }
22104
22105 Action::TruncToDateTrunc => {
22106 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
22107 if let Expression::Function(f) = e {
22108 if f.args.len() == 2 {
22109 let timestamp = f.args[0].clone();
22110 let unit_expr = f.args[1].clone();
22111
22112 if matches!(target, DialectType::ClickHouse) {
22113 // For ClickHouse, produce Expression::DateTrunc which the generator
22114 // outputs as DATE_TRUNC(...) without going through the ClickHouse
22115 // target transform that would convert it to dateTrunc
22116 let unit_str = Self::get_unit_str_static(&unit_expr);
22117 let dt_field = match unit_str.as_str() {
22118 "YEAR" => DateTimeField::Year,
22119 "MONTH" => DateTimeField::Month,
22120 "DAY" => DateTimeField::Day,
22121 "HOUR" => DateTimeField::Hour,
22122 "MINUTE" => DateTimeField::Minute,
22123 "SECOND" => DateTimeField::Second,
22124 "WEEK" => DateTimeField::Week,
22125 "QUARTER" => DateTimeField::Quarter,
22126 _ => DateTimeField::Custom(unit_str),
22127 };
22128 Ok(Expression::DateTrunc(Box::new(
22129 crate::expressions::DateTruncFunc {
22130 this: timestamp,
22131 unit: dt_field,
22132 },
22133 )))
22134 } else {
22135 let new_args = vec![unit_expr, timestamp];
22136 Ok(Expression::Function(Box::new(Function::new(
22137 "DATE_TRUNC".to_string(),
22138 new_args,
22139 ))))
22140 }
22141 } else {
22142 Ok(Expression::Function(f))
22143 }
22144 } else {
22145 Ok(e)
22146 }
22147 }
22148
22149 Action::ArrayContainsConvert => {
22150 if let Expression::ArrayContains(f) = e {
22151 match target {
22152 DialectType::Presto | DialectType::Trino => {
22153 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
22154 Ok(Expression::Function(Box::new(Function::new(
22155 "CONTAINS".to_string(),
22156 vec![f.this, f.expression],
22157 ))))
22158 }
22159 DialectType::Snowflake => {
22160 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
22161 let cast_val =
22162 Expression::Cast(Box::new(crate::expressions::Cast {
22163 this: f.expression,
22164 to: crate::expressions::DataType::Custom {
22165 name: "VARIANT".to_string(),
22166 },
22167 trailing_comments: Vec::new(),
22168 double_colon_syntax: false,
22169 format: None,
22170 default: None,
22171 inferred_type: None,
22172 }));
22173 Ok(Expression::Function(Box::new(Function::new(
22174 "ARRAY_CONTAINS".to_string(),
22175 vec![cast_val, f.this],
22176 ))))
22177 }
22178 _ => Ok(Expression::ArrayContains(f)),
22179 }
22180 } else {
22181 Ok(e)
22182 }
22183 }
22184
22185 Action::ArrayExceptConvert => {
22186 if let Expression::ArrayExcept(f) = e {
22187 let source_arr = f.this;
22188 let exclude_arr = f.expression;
22189 match target {
22190 DialectType::DuckDB => {
22191 // ARRAY_EXCEPT(source, exclude) -> complex CASE expression for DuckDB:
22192 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
22193 // ELSE LIST_TRANSFORM(LIST_FILTER(LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source))),
22194 // pair -> (LENGTH(LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1]))
22195 // > LENGTH(LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])))),
22196 // pair -> pair[1])
22197 // END
22198
22199 // Build: source IS NULL
22200 let source_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
22201 this: source_arr.clone(),
22202 not: false,
22203 postfix_form: false,
22204 }));
22205 // Build: exclude IS NULL
22206 let exclude_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
22207 this: exclude_arr.clone(),
22208 not: false,
22209 postfix_form: false,
22210 }));
22211 // source IS NULL OR exclude IS NULL
22212 let null_check = Expression::Or(Box::new(crate::expressions::BinaryOp {
22213 left: source_is_null,
22214 right: exclude_is_null,
22215 left_comments: vec![],
22216 operator_comments: vec![],
22217 trailing_comments: vec![],
22218 inferred_type: None,
22219 }));
22220
22221 // GENERATE_SERIES(1, LENGTH(source))
22222 let length_source = Expression::Function(Box::new(Function::new(
22223 "LENGTH".to_string(),
22224 vec![source_arr.clone()],
22225 )));
22226 let gen_series = Expression::Function(Box::new(Function::new(
22227 "GENERATE_SERIES".to_string(),
22228 vec![Expression::number(1), length_source],
22229 )));
22230
22231 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source)))
22232 let list_zip = Expression::Function(Box::new(Function::new(
22233 "LIST_ZIP".to_string(),
22234 vec![source_arr.clone(), gen_series],
22235 )));
22236
22237 // pair[1] - first element of pair
22238 let pair_col = Expression::column("pair");
22239 let pair_1 = Expression::Subscript(Box::new(crate::expressions::Subscript {
22240 this: pair_col.clone(),
22241 index: Expression::number(1),
22242 }));
22243 // pair[2] - second element of pair (index)
22244 let pair_2 = Expression::Subscript(Box::new(crate::expressions::Subscript {
22245 this: pair_col.clone(),
22246 index: Expression::number(2),
22247 }));
22248
22249 // source[1:pair[2]] - slice from 1 to pair[2]
22250 let source_slice = Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
22251 this: source_arr.clone(),
22252 start: Some(Expression::number(1)),
22253 end: Some(pair_2.clone()),
22254 }));
22255
22256 // e column for lambda
22257 let e_col = Expression::column("e");
22258
22259 // e IS NOT DISTINCT FROM pair[1] (for source slice filter)
22260 let is_not_distinct_1 = Expression::NullSafeEq(Box::new(crate::expressions::BinaryOp {
22261 left: e_col.clone(),
22262 right: pair_1.clone(),
22263 left_comments: vec![],
22264 operator_comments: vec![],
22265 trailing_comments: vec![],
22266 inferred_type: None,
22267 }));
22268
22269 // LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
22270 let lambda_1 = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22271 parameters: vec![crate::expressions::Identifier::new("e")],
22272 body: is_not_distinct_1,
22273 colon: false,
22274 parameter_types: vec![],
22275 }));
22276 let list_filter_source_slice = Expression::Function(Box::new(Function::new(
22277 "LIST_FILTER".to_string(),
22278 vec![source_slice, lambda_1],
22279 )));
22280 // LENGTH(LIST_FILTER(source[1:pair[2]], e -> ...))
22281 let len_source_slice = Expression::Function(Box::new(Function::new(
22282 "LENGTH".to_string(),
22283 vec![list_filter_source_slice],
22284 )));
22285
22286 // e IS NOT DISTINCT FROM pair[1] (for exclude filter)
22287 let is_not_distinct_2 = Expression::NullSafeEq(Box::new(crate::expressions::BinaryOp {
22288 left: e_col.clone(),
22289 right: pair_1.clone(),
22290 left_comments: vec![],
22291 operator_comments: vec![],
22292 trailing_comments: vec![],
22293 inferred_type: None,
22294 }));
22295
22296 // LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])
22297 let lambda_2 = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22298 parameters: vec![crate::expressions::Identifier::new("e")],
22299 body: is_not_distinct_2,
22300 colon: false,
22301 parameter_types: vec![],
22302 }));
22303 let list_filter_exclude = Expression::Function(Box::new(Function::new(
22304 "LIST_FILTER".to_string(),
22305 vec![exclude_arr.clone(), lambda_2],
22306 )));
22307 // LENGTH(LIST_FILTER(exclude, e -> ...))
22308 let len_exclude = Expression::Function(Box::new(Function::new(
22309 "LENGTH".to_string(),
22310 vec![list_filter_exclude],
22311 )));
22312
22313 // LENGTH(...) > LENGTH(...)
22314 let gt_expr = Expression::Gt(Box::new(crate::expressions::BinaryOp {
22315 left: len_source_slice,
22316 right: len_exclude,
22317 left_comments: vec![],
22318 operator_comments: vec![],
22319 trailing_comments: vec![],
22320 inferred_type: None,
22321 }));
22322
22323 // Wrap in parens for the lambda body
22324 let gt_paren = Expression::Paren(Box::new(crate::expressions::Paren {
22325 this: gt_expr,
22326 trailing_comments: vec![],
22327 }));
22328
22329 // pair -> (LENGTH(...) > LENGTH(...))
22330 let filter_lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22331 parameters: vec![crate::expressions::Identifier::new("pair")],
22332 body: gt_paren,
22333 colon: false,
22334 parameter_types: vec![],
22335 }));
22336
22337 // LIST_FILTER(LIST_ZIP(...), pair -> (...))
22338 let list_filter_outer = Expression::Function(Box::new(Function::new(
22339 "LIST_FILTER".to_string(),
22340 vec![list_zip, filter_lambda],
22341 )));
22342
22343 // pair -> pair[1] (for LIST_TRANSFORM)
22344 let transform_lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22345 parameters: vec![crate::expressions::Identifier::new("pair")],
22346 body: pair_1.clone(),
22347 colon: false,
22348 parameter_types: vec![],
22349 }));
22350
22351 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
22352 let list_transform = Expression::Function(Box::new(Function::new(
22353 "LIST_TRANSFORM".to_string(),
22354 vec![list_filter_outer, transform_lambda],
22355 )));
22356
22357 // CASE WHEN ... IS NULL ... THEN NULL ELSE LIST_TRANSFORM(...) END
22358 Ok(Expression::Case(Box::new(Case {
22359 operand: None,
22360 whens: vec![(
22361 null_check,
22362 Expression::Null(Null),
22363 )],
22364 else_: Some(list_transform),
22365 comments: Vec::new(),
22366 inferred_type: None,
22367 })))
22368 }
22369 DialectType::Snowflake => {
22370 // Snowflake: ARRAY_EXCEPT(source, exclude) - keep as-is
22371 Ok(Expression::ArrayExcept(Box::new(crate::expressions::BinaryFunc {
22372 this: source_arr,
22373 expression: exclude_arr,
22374 original_name: None,
22375 inferred_type: None,
22376 })))
22377 }
22378 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22379 // Presto/Trino: ARRAY_EXCEPT(source, exclude) - keep function name, array syntax already converted
22380 Ok(Expression::Function(Box::new(Function::new(
22381 "ARRAY_EXCEPT".to_string(),
22382 vec![source_arr, exclude_arr],
22383 ))))
22384 }
22385 _ => Ok(Expression::ArrayExcept(Box::new(crate::expressions::BinaryFunc {
22386 this: source_arr,
22387 expression: exclude_arr,
22388 original_name: None,
22389 inferred_type: None,
22390 }))),
22391 }
22392 } else {
22393 Ok(e)
22394 }
22395 }
22396
22397 Action::ArrayDistinctConvert => {
22398 // ARRAY_DISTINCT(arr) -> DuckDB NULL-aware CASE:
22399 // CASE WHEN ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
22400 // THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(arr, _u -> NOT _u IS NULL)), NULL)
22401 // ELSE LIST_DISTINCT(arr)
22402 // END
22403 if let Expression::ArrayDistinct(f) = e {
22404 let arr = f.this;
22405
22406 // ARRAY_LENGTH(arr)
22407 let array_length = Expression::Function(Box::new(Function::new(
22408 "ARRAY_LENGTH".to_string(),
22409 vec![arr.clone()],
22410 )));
22411 // LIST_COUNT(arr)
22412 let list_count = Expression::Function(Box::new(Function::new(
22413 "LIST_COUNT".to_string(),
22414 vec![arr.clone()],
22415 )));
22416 // ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
22417 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
22418 left: array_length,
22419 right: list_count,
22420 left_comments: vec![],
22421 operator_comments: vec![],
22422 trailing_comments: vec![],
22423 inferred_type: None,
22424 }));
22425
22426 // _u column
22427 let u_col = Expression::column("_u");
22428 // NOT _u IS NULL
22429 let u_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
22430 this: u_col.clone(),
22431 not: false,
22432 postfix_form: false,
22433 }));
22434 let not_u_is_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
22435 this: u_is_null,
22436 inferred_type: None,
22437 }));
22438 // _u -> NOT _u IS NULL
22439 let filter_lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22440 parameters: vec![crate::expressions::Identifier::new("_u")],
22441 body: not_u_is_null,
22442 colon: false,
22443 parameter_types: vec![],
22444 }));
22445 // LIST_FILTER(arr, _u -> NOT _u IS NULL)
22446 let list_filter = Expression::Function(Box::new(Function::new(
22447 "LIST_FILTER".to_string(),
22448 vec![arr.clone(), filter_lambda],
22449 )));
22450 // LIST_DISTINCT(LIST_FILTER(arr, ...))
22451 let list_distinct_filtered = Expression::Function(Box::new(Function::new(
22452 "LIST_DISTINCT".to_string(),
22453 vec![list_filter],
22454 )));
22455 // LIST_APPEND(LIST_DISTINCT(LIST_FILTER(...)), NULL)
22456 let list_append = Expression::Function(Box::new(Function::new(
22457 "LIST_APPEND".to_string(),
22458 vec![list_distinct_filtered, Expression::Null(Null)],
22459 )));
22460
22461 // LIST_DISTINCT(arr)
22462 let list_distinct = Expression::Function(Box::new(Function::new(
22463 "LIST_DISTINCT".to_string(),
22464 vec![arr],
22465 )));
22466
22467 // CASE WHEN neq THEN list_append ELSE list_distinct END
22468 Ok(Expression::Case(Box::new(Case {
22469 operand: None,
22470 whens: vec![(neq, list_append)],
22471 else_: Some(list_distinct),
22472 comments: Vec::new(),
22473 inferred_type: None,
22474 })))
22475 } else {
22476 Ok(e)
22477 }
22478 }
22479
22480 Action::ArrayContainsDuckDBConvert => {
22481 // Snowflake ARRAY_CONTAINS(value, array) -> DuckDB NULL-aware:
22482 // CASE WHEN value IS NULL
22483 // THEN NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
22484 // ELSE ARRAY_CONTAINS(array, value)
22485 // END
22486 // Note: In Rust AST from Snowflake parse, this=value (first arg), expression=array (second arg)
22487 if let Expression::ArrayContains(f) = e {
22488 let value = f.this;
22489 let array = f.expression;
22490
22491 // value IS NULL
22492 let value_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
22493 this: value.clone(),
22494 not: false,
22495 postfix_form: false,
22496 }));
22497
22498 // ARRAY_LENGTH(array)
22499 let array_length = Expression::Function(Box::new(Function::new(
22500 "ARRAY_LENGTH".to_string(),
22501 vec![array.clone()],
22502 )));
22503 // LIST_COUNT(array)
22504 let list_count = Expression::Function(Box::new(Function::new(
22505 "LIST_COUNT".to_string(),
22506 vec![array.clone()],
22507 )));
22508 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
22509 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
22510 left: array_length,
22511 right: list_count,
22512 left_comments: vec![],
22513 operator_comments: vec![],
22514 trailing_comments: vec![],
22515 inferred_type: None,
22516 }));
22517 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
22518 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
22519 this: Box::new(neq),
22520 expression: Box::new(Expression::Boolean(crate::expressions::BooleanLiteral { value: false })),
22521 }));
22522
22523 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
22524 let array_contains = Expression::Function(Box::new(Function::new(
22525 "ARRAY_CONTAINS".to_string(),
22526 vec![array, value],
22527 )));
22528
22529 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
22530 Ok(Expression::Case(Box::new(Case {
22531 operand: None,
22532 whens: vec![(value_is_null, nullif)],
22533 else_: Some(array_contains),
22534 comments: Vec::new(),
22535 inferred_type: None,
22536 })))
22537 } else {
22538 Ok(e)
22539 }
22540 }
22541
22542 Action::StrPositionExpand => {
22543 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
22544 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
22545 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
22546 if let Expression::StrPosition(sp) = e {
22547 let crate::expressions::StrPosition {
22548 this,
22549 substr,
22550 position,
22551 occurrence,
22552 } = *sp;
22553 let string = *this;
22554 let substr_expr = match substr {
22555 Some(s) => *s,
22556 None => Expression::Null(Null),
22557 };
22558 let pos = match position {
22559 Some(p) => *p,
22560 None => Expression::number(1),
22561 };
22562
22563 // SUBSTRING(string, pos)
22564 let substring_call = Expression::Function(Box::new(Function::new(
22565 "SUBSTRING".to_string(),
22566 vec![string.clone(), pos.clone()],
22567 )));
22568 // STRPOS(SUBSTRING(string, pos), substr)
22569 let strpos_call = Expression::Function(Box::new(Function::new(
22570 "STRPOS".to_string(),
22571 vec![substring_call, substr_expr.clone()],
22572 )));
22573 // STRPOS(...) + pos - 1
22574 let pos_adjusted =
22575 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
22576 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
22577 strpos_call.clone(),
22578 pos.clone(),
22579 ))),
22580 Expression::number(1),
22581 )));
22582 // STRPOS(...) = 0
22583 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
22584 strpos_call.clone(),
22585 Expression::number(0),
22586 )));
22587
22588 match target {
22589 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22590 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
22591 Ok(Expression::Function(Box::new(Function::new(
22592 "IF".to_string(),
22593 vec![is_zero, Expression::number(0), pos_adjusted],
22594 ))))
22595 }
22596 DialectType::DuckDB => {
22597 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
22598 Ok(Expression::Case(Box::new(Case {
22599 operand: None,
22600 whens: vec![(is_zero, Expression::number(0))],
22601 else_: Some(pos_adjusted),
22602 comments: Vec::new(),
22603 inferred_type: None,
22604 })))
22605 }
22606 _ => {
22607 // Reconstruct StrPosition
22608 Ok(Expression::StrPosition(Box::new(
22609 crate::expressions::StrPosition {
22610 this: Box::new(string),
22611 substr: Some(Box::new(substr_expr)),
22612 position: Some(Box::new(pos)),
22613 occurrence,
22614 },
22615 )))
22616 }
22617 }
22618 } else {
22619 Ok(e)
22620 }
22621 }
22622
22623 Action::MonthsBetweenConvert => {
22624 if let Expression::MonthsBetween(mb) = e {
22625 let crate::expressions::BinaryFunc {
22626 this: end_date,
22627 expression: start_date,
22628 ..
22629 } = *mb;
22630 match target {
22631 DialectType::DuckDB => {
22632 let cast_end = Self::ensure_cast_date(end_date);
22633 let cast_start = Self::ensure_cast_date(start_date);
22634 let dd = Expression::Function(Box::new(Function::new(
22635 "DATE_DIFF".to_string(),
22636 vec![
22637 Expression::string("MONTH"),
22638 cast_start.clone(),
22639 cast_end.clone(),
22640 ],
22641 )));
22642 let day_end = Expression::Function(Box::new(Function::new(
22643 "DAY".to_string(),
22644 vec![cast_end.clone()],
22645 )));
22646 let day_start = Expression::Function(Box::new(Function::new(
22647 "DAY".to_string(),
22648 vec![cast_start.clone()],
22649 )));
22650 let last_day_end = Expression::Function(Box::new(Function::new(
22651 "LAST_DAY".to_string(),
22652 vec![cast_end.clone()],
22653 )));
22654 let last_day_start = Expression::Function(Box::new(Function::new(
22655 "LAST_DAY".to_string(),
22656 vec![cast_start.clone()],
22657 )));
22658 let day_last_end = Expression::Function(Box::new(Function::new(
22659 "DAY".to_string(),
22660 vec![last_day_end],
22661 )));
22662 let day_last_start = Expression::Function(Box::new(Function::new(
22663 "DAY".to_string(),
22664 vec![last_day_start],
22665 )));
22666 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
22667 day_end.clone(),
22668 day_last_end,
22669 )));
22670 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
22671 day_start.clone(),
22672 day_last_start,
22673 )));
22674 let both_cond =
22675 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
22676 let day_diff =
22677 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
22678 let day_diff_paren =
22679 Expression::Paren(Box::new(crate::expressions::Paren {
22680 this: day_diff,
22681 trailing_comments: Vec::new(),
22682 }));
22683 let frac = Expression::Div(Box::new(BinaryOp::new(
22684 day_diff_paren,
22685 Expression::Literal(Literal::Number("31.0".to_string())),
22686 )));
22687 let case_expr = Expression::Case(Box::new(Case {
22688 operand: None,
22689 whens: vec![(both_cond, Expression::number(0))],
22690 else_: Some(frac),
22691 comments: Vec::new(),
22692 inferred_type: None,
22693 }));
22694 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
22695 }
22696 DialectType::Snowflake | DialectType::Redshift => {
22697 let unit = Expression::Identifier(Identifier::new("MONTH"));
22698 Ok(Expression::Function(Box::new(Function::new(
22699 "DATEDIFF".to_string(),
22700 vec![unit, start_date, end_date],
22701 ))))
22702 }
22703 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22704 Ok(Expression::Function(Box::new(Function::new(
22705 "DATE_DIFF".to_string(),
22706 vec![Expression::string("MONTH"), start_date, end_date],
22707 ))))
22708 }
22709 _ => Ok(Expression::MonthsBetween(Box::new(
22710 crate::expressions::BinaryFunc {
22711 this: end_date,
22712 expression: start_date,
22713 original_name: None,
22714 inferred_type: None,
22715 },
22716 ))),
22717 }
22718 } else {
22719 Ok(e)
22720 }
22721 }
22722
22723 Action::AddMonthsConvert => {
22724 if let Expression::AddMonths(am) = e {
22725 let date = am.this;
22726 let val = am.expression;
22727 match target {
22728 DialectType::TSQL | DialectType::Fabric => {
22729 let cast_date = Self::ensure_cast_datetime2(date);
22730 Ok(Expression::Function(Box::new(Function::new(
22731 "DATEADD".to_string(),
22732 vec![
22733 Expression::Identifier(Identifier::new("MONTH")),
22734 val,
22735 cast_date,
22736 ],
22737 ))))
22738 }
22739 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
22740 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
22741 // Optionally wrapped in CAST(... AS type) if the input had a specific type
22742
22743 // Determine the cast type from the date expression
22744 let (cast_date, return_type) = match &date {
22745 Expression::Literal(Literal::String(_)) => {
22746 // String literal: CAST(str AS TIMESTAMP), no outer CAST
22747 (
22748 Expression::Cast(Box::new(Cast {
22749 this: date.clone(),
22750 to: DataType::Timestamp {
22751 precision: None,
22752 timezone: false,
22753 },
22754 trailing_comments: Vec::new(),
22755 double_colon_syntax: false,
22756 format: None,
22757 default: None,
22758 inferred_type: None,
22759 })),
22760 None,
22761 )
22762 }
22763 Expression::Cast(c) => {
22764 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
22765 (date.clone(), Some(c.to.clone()))
22766 }
22767 _ => {
22768 // Expression or NULL::TYPE - keep as-is, check for cast type
22769 if let Expression::Cast(c) = &date {
22770 (date.clone(), Some(c.to.clone()))
22771 } else {
22772 (date.clone(), None)
22773 }
22774 }
22775 };
22776
22777 // Build the interval expression
22778 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
22779 // For integer values, use INTERVAL val MONTH
22780 let is_non_integer_val = match &val {
22781 Expression::Literal(Literal::Number(n)) => n.contains('.'),
22782 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
22783 Expression::Neg(n) => {
22784 if let Expression::Literal(Literal::Number(s)) = &n.this {
22785 s.contains('.')
22786 } else {
22787 false
22788 }
22789 }
22790 _ => false,
22791 };
22792
22793 let add_interval = if is_non_integer_val {
22794 // TO_MONTHS(CAST(ROUND(val) AS INT))
22795 let round_val = Expression::Function(Box::new(Function::new(
22796 "ROUND".to_string(),
22797 vec![val.clone()],
22798 )));
22799 let cast_int = Expression::Cast(Box::new(Cast {
22800 this: round_val,
22801 to: DataType::Int {
22802 length: None,
22803 integer_spelling: false,
22804 },
22805 trailing_comments: Vec::new(),
22806 double_colon_syntax: false,
22807 format: None,
22808 default: None,
22809 inferred_type: None,
22810 }));
22811 Expression::Function(Box::new(Function::new(
22812 "TO_MONTHS".to_string(),
22813 vec![cast_int],
22814 )))
22815 } else {
22816 // INTERVAL val MONTH
22817 // For negative numbers, wrap in parens
22818 let interval_val = match &val {
22819 Expression::Literal(Literal::Number(n))
22820 if n.starts_with('-') =>
22821 {
22822 Expression::Paren(Box::new(Paren {
22823 this: val.clone(),
22824 trailing_comments: Vec::new(),
22825 }))
22826 }
22827 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
22828 this: val.clone(),
22829 trailing_comments: Vec::new(),
22830 })),
22831 Expression::Null(_) => Expression::Paren(Box::new(Paren {
22832 this: val.clone(),
22833 trailing_comments: Vec::new(),
22834 })),
22835 _ => val.clone(),
22836 };
22837 Expression::Interval(Box::new(crate::expressions::Interval {
22838 this: Some(interval_val),
22839 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
22840 unit: crate::expressions::IntervalUnit::Month,
22841 use_plural: false,
22842 }),
22843 }))
22844 };
22845
22846 // Build: date + interval
22847 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
22848 cast_date.clone(),
22849 add_interval.clone(),
22850 )));
22851
22852 // Build LAST_DAY(date)
22853 let last_day_date = Expression::Function(Box::new(Function::new(
22854 "LAST_DAY".to_string(),
22855 vec![cast_date.clone()],
22856 )));
22857
22858 // Build LAST_DAY(date + interval)
22859 let last_day_date_plus =
22860 Expression::Function(Box::new(Function::new(
22861 "LAST_DAY".to_string(),
22862 vec![date_plus_interval.clone()],
22863 )));
22864
22865 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
22866 let case_expr = Expression::Case(Box::new(Case {
22867 operand: None,
22868 whens: vec![(
22869 Expression::Eq(Box::new(BinaryOp::new(
22870 last_day_date,
22871 cast_date.clone(),
22872 ))),
22873 last_day_date_plus,
22874 )],
22875 else_: Some(date_plus_interval),
22876 comments: Vec::new(),
22877 inferred_type: None,
22878 }));
22879
22880 // Wrap in CAST(... AS type) if needed
22881 if let Some(dt) = return_type {
22882 Ok(Expression::Cast(Box::new(Cast {
22883 this: case_expr,
22884 to: dt,
22885 trailing_comments: Vec::new(),
22886 double_colon_syntax: false,
22887 format: None,
22888 default: None,
22889 inferred_type: None,
22890 })))
22891 } else {
22892 Ok(case_expr)
22893 }
22894 }
22895 DialectType::DuckDB => {
22896 // Non-Snowflake source: simple date + INTERVAL
22897 let cast_date =
22898 if matches!(&date, Expression::Literal(Literal::String(_))) {
22899 Expression::Cast(Box::new(Cast {
22900 this: date,
22901 to: DataType::Timestamp {
22902 precision: None,
22903 timezone: false,
22904 },
22905 trailing_comments: Vec::new(),
22906 double_colon_syntax: false,
22907 format: None,
22908 default: None,
22909 inferred_type: None,
22910 }))
22911 } else {
22912 date
22913 };
22914 let interval =
22915 Expression::Interval(Box::new(crate::expressions::Interval {
22916 this: Some(val),
22917 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
22918 unit: crate::expressions::IntervalUnit::Month,
22919 use_plural: false,
22920 }),
22921 }));
22922 Ok(Expression::Add(Box::new(BinaryOp::new(
22923 cast_date, interval,
22924 ))))
22925 }
22926 DialectType::Snowflake => {
22927 // Keep ADD_MONTHS when source is also Snowflake
22928 if matches!(source, DialectType::Snowflake) {
22929 Ok(Expression::Function(Box::new(Function::new(
22930 "ADD_MONTHS".to_string(),
22931 vec![date, val],
22932 ))))
22933 } else {
22934 Ok(Expression::Function(Box::new(Function::new(
22935 "DATEADD".to_string(),
22936 vec![
22937 Expression::Identifier(Identifier::new("MONTH")),
22938 val,
22939 date,
22940 ],
22941 ))))
22942 }
22943 }
22944 DialectType::Redshift => {
22945 Ok(Expression::Function(Box::new(Function::new(
22946 "DATEADD".to_string(),
22947 vec![
22948 Expression::Identifier(Identifier::new("MONTH")),
22949 val,
22950 date,
22951 ],
22952 ))))
22953 }
22954 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22955 let cast_date =
22956 if matches!(&date, Expression::Literal(Literal::String(_))) {
22957 Expression::Cast(Box::new(Cast {
22958 this: date,
22959 to: DataType::Timestamp {
22960 precision: None,
22961 timezone: false,
22962 },
22963 trailing_comments: Vec::new(),
22964 double_colon_syntax: false,
22965 format: None,
22966 default: None,
22967 inferred_type: None,
22968 }))
22969 } else {
22970 date
22971 };
22972 Ok(Expression::Function(Box::new(Function::new(
22973 "DATE_ADD".to_string(),
22974 vec![Expression::string("MONTH"), val, cast_date],
22975 ))))
22976 }
22977 DialectType::BigQuery => {
22978 let interval =
22979 Expression::Interval(Box::new(crate::expressions::Interval {
22980 this: Some(val),
22981 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
22982 unit: crate::expressions::IntervalUnit::Month,
22983 use_plural: false,
22984 }),
22985 }));
22986 let cast_date =
22987 if matches!(&date, Expression::Literal(Literal::String(_))) {
22988 Expression::Cast(Box::new(Cast {
22989 this: date,
22990 to: DataType::Custom {
22991 name: "DATETIME".to_string(),
22992 },
22993 trailing_comments: Vec::new(),
22994 double_colon_syntax: false,
22995 format: None,
22996 default: None,
22997 inferred_type: None,
22998 }))
22999 } else {
23000 date
23001 };
23002 Ok(Expression::Function(Box::new(Function::new(
23003 "DATE_ADD".to_string(),
23004 vec![cast_date, interval],
23005 ))))
23006 }
23007 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
23008 Ok(Expression::Function(Box::new(Function::new(
23009 "ADD_MONTHS".to_string(),
23010 vec![date, val],
23011 ))))
23012 }
23013 _ => {
23014 // Default: keep as AddMonths expression
23015 Ok(Expression::AddMonths(Box::new(
23016 crate::expressions::BinaryFunc {
23017 this: date,
23018 expression: val,
23019 original_name: None,
23020 inferred_type: None,
23021 },
23022 )))
23023 }
23024 }
23025 } else {
23026 Ok(e)
23027 }
23028 }
23029
23030 Action::PercentileContConvert => {
23031 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
23032 // Presto/Trino: APPROX_PERCENTILE(col, p)
23033 // Spark/Databricks: PERCENTILE_APPROX(col, p)
23034 if let Expression::WithinGroup(wg) = e {
23035 // Extract percentile value and order by column
23036 let (percentile, _is_disc) = match &wg.this {
23037 Expression::Function(f) => {
23038 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
23039 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
23040 Literal::Number("0.5".to_string()),
23041 ));
23042 (pct, is_disc)
23043 }
23044 Expression::AggregateFunction(af) => {
23045 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
23046 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
23047 Literal::Number("0.5".to_string()),
23048 ));
23049 (pct, is_disc)
23050 }
23051 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
23052 _ => return Ok(Expression::WithinGroup(wg)),
23053 };
23054 let col = wg
23055 .order_by
23056 .first()
23057 .map(|o| o.this.clone())
23058 .unwrap_or(Expression::Literal(Literal::Number("1".to_string())));
23059
23060 let func_name = match target {
23061 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23062 "APPROX_PERCENTILE"
23063 }
23064 _ => "PERCENTILE_APPROX", // Spark, Databricks
23065 };
23066 Ok(Expression::Function(Box::new(Function::new(
23067 func_name.to_string(),
23068 vec![col, percentile],
23069 ))))
23070 } else {
23071 Ok(e)
23072 }
23073 }
23074
23075 Action::CurrentUserSparkParens => {
23076 // CURRENT_USER -> CURRENT_USER() for Spark
23077 if let Expression::CurrentUser(_) = e {
23078 Ok(Expression::Function(Box::new(Function::new(
23079 "CURRENT_USER".to_string(),
23080 vec![],
23081 ))))
23082 } else {
23083 Ok(e)
23084 }
23085 }
23086
23087 Action::SparkDateFuncCast => {
23088 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
23089 let cast_arg = |arg: Expression| -> Expression {
23090 match target {
23091 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23092 Self::double_cast_timestamp_date(arg)
23093 }
23094 _ => {
23095 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
23096 Self::ensure_cast_date(arg)
23097 }
23098 }
23099 };
23100 match e {
23101 Expression::Month(f) => Ok(Expression::Month(Box::new(
23102 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
23103 ))),
23104 Expression::Year(f) => Ok(Expression::Year(Box::new(
23105 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
23106 ))),
23107 Expression::Day(f) => Ok(Expression::Day(Box::new(
23108 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
23109 ))),
23110 other => Ok(other),
23111 }
23112 }
23113
23114 Action::MapFromArraysConvert => {
23115 // Expression::MapFromArrays -> target-specific
23116 if let Expression::MapFromArrays(mfa) = e {
23117 let keys = mfa.this;
23118 let values = mfa.expression;
23119 match target {
23120 DialectType::Snowflake => Ok(Expression::Function(Box::new(
23121 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
23122 ))),
23123 _ => {
23124 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
23125 Ok(Expression::Function(Box::new(Function::new(
23126 "MAP".to_string(),
23127 vec![keys, values],
23128 ))))
23129 }
23130 }
23131 } else {
23132 Ok(e)
23133 }
23134 }
23135
23136 Action::AnyToExists => {
23137 if let Expression::Any(q) = e {
23138 if let Some(op) = q.op.clone() {
23139 let lambda_param = crate::expressions::Identifier::new("x");
23140 let rhs = Expression::Identifier(lambda_param.clone());
23141 let body = match op {
23142 crate::expressions::QuantifiedOp::Eq => {
23143 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
23144 }
23145 crate::expressions::QuantifiedOp::Neq => {
23146 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
23147 }
23148 crate::expressions::QuantifiedOp::Lt => {
23149 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
23150 }
23151 crate::expressions::QuantifiedOp::Lte => {
23152 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
23153 }
23154 crate::expressions::QuantifiedOp::Gt => {
23155 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
23156 }
23157 crate::expressions::QuantifiedOp::Gte => {
23158 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
23159 }
23160 };
23161 let lambda =
23162 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23163 parameters: vec![lambda_param],
23164 body,
23165 colon: false,
23166 parameter_types: Vec::new(),
23167 }));
23168 Ok(Expression::Function(Box::new(Function::new(
23169 "EXISTS".to_string(),
23170 vec![q.subquery, lambda],
23171 ))))
23172 } else {
23173 Ok(Expression::Any(q))
23174 }
23175 } else {
23176 Ok(e)
23177 }
23178 }
23179
23180 Action::GenerateSeriesConvert => {
23181 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
23182 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
23183 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
23184 if let Expression::Function(f) = e {
23185 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
23186 let start = f.args[0].clone();
23187 let end = f.args[1].clone();
23188 let step = f.args.get(2).cloned();
23189
23190 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
23191 let step = step.map(|s| Self::normalize_interval_string(s, target));
23192
23193 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
23194 let maybe_cast_timestamp = |arg: Expression| -> Expression {
23195 if matches!(
23196 target,
23197 DialectType::Presto
23198 | DialectType::Trino
23199 | DialectType::Athena
23200 | DialectType::Spark
23201 | DialectType::Databricks
23202 | DialectType::Hive
23203 ) {
23204 match &arg {
23205 Expression::CurrentTimestamp(_) => {
23206 Expression::Cast(Box::new(Cast {
23207 this: arg,
23208 to: DataType::Timestamp {
23209 precision: None,
23210 timezone: false,
23211 },
23212 trailing_comments: Vec::new(),
23213 double_colon_syntax: false,
23214 format: None,
23215 default: None,
23216 inferred_type: None,
23217 }))
23218 }
23219 _ => arg,
23220 }
23221 } else {
23222 arg
23223 }
23224 };
23225
23226 let start = maybe_cast_timestamp(start);
23227 let end = maybe_cast_timestamp(end);
23228
23229 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
23230 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
23231 let mut gs_args = vec![start, end];
23232 if let Some(step) = step {
23233 gs_args.push(step);
23234 }
23235 return Ok(Expression::Function(Box::new(Function::new(
23236 "GENERATE_SERIES".to_string(),
23237 gs_args,
23238 ))));
23239 }
23240
23241 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
23242 if matches!(target, DialectType::DuckDB) {
23243 let mut gs_args = vec![start, end];
23244 if let Some(step) = step {
23245 gs_args.push(step);
23246 }
23247 let gs = Expression::Function(Box::new(Function::new(
23248 "GENERATE_SERIES".to_string(),
23249 gs_args,
23250 )));
23251 return Ok(Expression::Function(Box::new(Function::new(
23252 "UNNEST".to_string(),
23253 vec![gs],
23254 ))));
23255 }
23256
23257 let mut seq_args = vec![start, end];
23258 if let Some(step) = step {
23259 seq_args.push(step);
23260 }
23261
23262 let seq = Expression::Function(Box::new(Function::new(
23263 "SEQUENCE".to_string(),
23264 seq_args,
23265 )));
23266
23267 match target {
23268 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23269 // Wrap in UNNEST
23270 Ok(Expression::Function(Box::new(Function::new(
23271 "UNNEST".to_string(),
23272 vec![seq],
23273 ))))
23274 }
23275 DialectType::Spark
23276 | DialectType::Databricks
23277 | DialectType::Hive => {
23278 // Wrap in EXPLODE
23279 Ok(Expression::Function(Box::new(Function::new(
23280 "EXPLODE".to_string(),
23281 vec![seq],
23282 ))))
23283 }
23284 _ => {
23285 // Just SEQUENCE for others
23286 Ok(seq)
23287 }
23288 }
23289 } else {
23290 Ok(Expression::Function(f))
23291 }
23292 } else {
23293 Ok(e)
23294 }
23295 }
23296
23297 Action::ConcatCoalesceWrap => {
23298 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
23299 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
23300 if let Expression::Function(f) = e {
23301 if f.name.eq_ignore_ascii_case("CONCAT") {
23302 let new_args: Vec<Expression> = f
23303 .args
23304 .into_iter()
23305 .map(|arg| {
23306 let cast_arg = if matches!(
23307 target,
23308 DialectType::Presto
23309 | DialectType::Trino
23310 | DialectType::Athena
23311 ) {
23312 Expression::Cast(Box::new(Cast {
23313 this: arg,
23314 to: DataType::VarChar {
23315 length: None,
23316 parenthesized_length: false,
23317 },
23318 trailing_comments: Vec::new(),
23319 double_colon_syntax: false,
23320 format: None,
23321 default: None,
23322 inferred_type: None,
23323 }))
23324 } else {
23325 arg
23326 };
23327 Expression::Function(Box::new(Function::new(
23328 "COALESCE".to_string(),
23329 vec![cast_arg, Expression::string("")],
23330 )))
23331 })
23332 .collect();
23333 Ok(Expression::Function(Box::new(Function::new(
23334 "CONCAT".to_string(),
23335 new_args,
23336 ))))
23337 } else {
23338 Ok(Expression::Function(f))
23339 }
23340 } else {
23341 Ok(e)
23342 }
23343 }
23344
23345 Action::PipeConcatToConcat => {
23346 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
23347 if let Expression::Concat(op) = e {
23348 let cast_left = Expression::Cast(Box::new(Cast {
23349 this: op.left,
23350 to: DataType::VarChar {
23351 length: None,
23352 parenthesized_length: false,
23353 },
23354 trailing_comments: Vec::new(),
23355 double_colon_syntax: false,
23356 format: None,
23357 default: None,
23358 inferred_type: None,
23359 }));
23360 let cast_right = Expression::Cast(Box::new(Cast {
23361 this: op.right,
23362 to: DataType::VarChar {
23363 length: None,
23364 parenthesized_length: false,
23365 },
23366 trailing_comments: Vec::new(),
23367 double_colon_syntax: false,
23368 format: None,
23369 default: None,
23370 inferred_type: None,
23371 }));
23372 Ok(Expression::Function(Box::new(Function::new(
23373 "CONCAT".to_string(),
23374 vec![cast_left, cast_right],
23375 ))))
23376 } else {
23377 Ok(e)
23378 }
23379 }
23380
23381 Action::DivFuncConvert => {
23382 // DIV(a, b) -> target-specific integer division
23383 if let Expression::Function(f) = e {
23384 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
23385 let a = f.args[0].clone();
23386 let b = f.args[1].clone();
23387 match target {
23388 DialectType::DuckDB => {
23389 // DIV(a, b) -> CAST(a // b AS DECIMAL)
23390 let int_div = Expression::IntDiv(Box::new(
23391 crate::expressions::BinaryFunc {
23392 this: a,
23393 expression: b,
23394 original_name: None,
23395 inferred_type: None,
23396 },
23397 ));
23398 Ok(Expression::Cast(Box::new(Cast {
23399 this: int_div,
23400 to: DataType::Decimal {
23401 precision: None,
23402 scale: None,
23403 },
23404 trailing_comments: Vec::new(),
23405 double_colon_syntax: false,
23406 format: None,
23407 default: None,
23408 inferred_type: None,
23409 })))
23410 }
23411 DialectType::BigQuery => {
23412 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
23413 let div_func = Expression::Function(Box::new(Function::new(
23414 "DIV".to_string(),
23415 vec![a, b],
23416 )));
23417 Ok(Expression::Cast(Box::new(Cast {
23418 this: div_func,
23419 to: DataType::Custom {
23420 name: "NUMERIC".to_string(),
23421 },
23422 trailing_comments: Vec::new(),
23423 double_colon_syntax: false,
23424 format: None,
23425 default: None,
23426 inferred_type: None,
23427 })))
23428 }
23429 DialectType::SQLite => {
23430 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
23431 let cast_a = Expression::Cast(Box::new(Cast {
23432 this: a,
23433 to: DataType::Custom {
23434 name: "REAL".to_string(),
23435 },
23436 trailing_comments: Vec::new(),
23437 double_colon_syntax: false,
23438 format: None,
23439 default: None,
23440 inferred_type: None,
23441 }));
23442 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
23443 let cast_int = Expression::Cast(Box::new(Cast {
23444 this: div,
23445 to: DataType::Int {
23446 length: None,
23447 integer_spelling: true,
23448 },
23449 trailing_comments: Vec::new(),
23450 double_colon_syntax: false,
23451 format: None,
23452 default: None,
23453 inferred_type: None,
23454 }));
23455 Ok(Expression::Cast(Box::new(Cast {
23456 this: cast_int,
23457 to: DataType::Custom {
23458 name: "REAL".to_string(),
23459 },
23460 trailing_comments: Vec::new(),
23461 double_colon_syntax: false,
23462 format: None,
23463 default: None,
23464 inferred_type: None,
23465 })))
23466 }
23467 _ => Ok(Expression::Function(f)),
23468 }
23469 } else {
23470 Ok(Expression::Function(f))
23471 }
23472 } else {
23473 Ok(e)
23474 }
23475 }
23476
23477 Action::JsonObjectAggConvert => {
23478 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
23479 match e {
23480 Expression::Function(f) => Ok(Expression::Function(Box::new(
23481 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
23482 ))),
23483 Expression::AggregateFunction(af) => {
23484 // AggregateFunction stores all args in the `args` vec
23485 Ok(Expression::Function(Box::new(Function::new(
23486 "JSON_GROUP_OBJECT".to_string(),
23487 af.args,
23488 ))))
23489 }
23490 other => Ok(other),
23491 }
23492 }
23493
23494 Action::JsonbExistsConvert => {
23495 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
23496 if let Expression::Function(f) = e {
23497 if f.args.len() == 2 {
23498 let json_expr = f.args[0].clone();
23499 let key = match &f.args[1] {
23500 Expression::Literal(crate::expressions::Literal::String(s)) => {
23501 format!("$.{}", s)
23502 }
23503 _ => return Ok(Expression::Function(f)),
23504 };
23505 Ok(Expression::Function(Box::new(Function::new(
23506 "JSON_EXISTS".to_string(),
23507 vec![json_expr, Expression::string(&key)],
23508 ))))
23509 } else {
23510 Ok(Expression::Function(f))
23511 }
23512 } else {
23513 Ok(e)
23514 }
23515 }
23516
23517 Action::DateBinConvert => {
23518 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
23519 if let Expression::Function(f) = e {
23520 Ok(Expression::Function(Box::new(Function::new(
23521 "TIME_BUCKET".to_string(),
23522 f.args,
23523 ))))
23524 } else {
23525 Ok(e)
23526 }
23527 }
23528
23529 Action::MysqlCastCharToText => {
23530 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
23531 if let Expression::Cast(mut c) = e {
23532 c.to = DataType::Text;
23533 Ok(Expression::Cast(c))
23534 } else {
23535 Ok(e)
23536 }
23537 }
23538
23539 Action::SparkCastVarcharToString => {
23540 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
23541 match e {
23542 Expression::Cast(mut c) => {
23543 c.to = Self::normalize_varchar_to_string(c.to);
23544 Ok(Expression::Cast(c))
23545 }
23546 Expression::TryCast(mut c) => {
23547 c.to = Self::normalize_varchar_to_string(c.to);
23548 Ok(Expression::TryCast(c))
23549 }
23550 _ => Ok(e),
23551 }
23552 }
23553
23554 Action::MinMaxToLeastGreatest => {
23555 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
23556 if let Expression::Function(f) = e {
23557 let new_name = if f.name.eq_ignore_ascii_case("MIN") {
23558 "LEAST"
23559 } else if f.name.eq_ignore_ascii_case("MAX") {
23560 "GREATEST"
23561 } else {
23562 return Ok(Expression::Function(f));
23563 };
23564 Ok(Expression::Function(Box::new(Function::new(
23565 new_name.to_string(),
23566 f.args,
23567 ))))
23568 } else {
23569 Ok(e)
23570 }
23571 }
23572
23573 Action::ClickHouseUniqToApproxCountDistinct => {
23574 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
23575 if let Expression::Function(f) = e {
23576 Ok(Expression::Function(Box::new(Function::new(
23577 "APPROX_COUNT_DISTINCT".to_string(),
23578 f.args,
23579 ))))
23580 } else {
23581 Ok(e)
23582 }
23583 }
23584
23585 Action::ClickHouseAnyToAnyValue => {
23586 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
23587 if let Expression::Function(f) = e {
23588 Ok(Expression::Function(Box::new(Function::new(
23589 "ANY_VALUE".to_string(),
23590 f.args,
23591 ))))
23592 } else {
23593 Ok(e)
23594 }
23595 }
23596
23597 Action::OracleVarchar2ToVarchar => {
23598 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
23599 if let Expression::DataType(DataType::Custom { ref name }) = e {
23600 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
23601 let starts_varchar2 = name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2(");
23602 let starts_nvarchar2 = name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2(");
23603 let inner =
23604 if starts_varchar2 || starts_nvarchar2 {
23605 let start = if starts_nvarchar2 { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
23606 let end = name.len() - 1; // skip trailing ")"
23607 Some(&name[start..end])
23608 } else {
23609 Option::None
23610 };
23611 if let Some(inner_str) = inner {
23612 // Parse the number part, ignoring BYTE/CHAR qualifier
23613 let num_str = inner_str.split_whitespace().next().unwrap_or("");
23614 if let Ok(n) = num_str.parse::<u32>() {
23615 Ok(Expression::DataType(DataType::VarChar {
23616 length: Some(n),
23617 parenthesized_length: false,
23618 }))
23619 } else {
23620 Ok(e)
23621 }
23622 } else {
23623 // Plain VARCHAR2 / NVARCHAR2 without parens
23624 Ok(Expression::DataType(DataType::VarChar {
23625 length: Option::None,
23626 parenthesized_length: false,
23627 }))
23628 }
23629 } else {
23630 Ok(e)
23631 }
23632 }
23633
23634 Action::Nvl2Expand => {
23635 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
23636 // But keep as NVL2 for dialects that support it natively
23637 let nvl2_native = matches!(
23638 target,
23639 DialectType::Oracle
23640 | DialectType::Snowflake
23641 | DialectType::Redshift
23642 | DialectType::Teradata
23643 | DialectType::Spark
23644 | DialectType::Databricks
23645 );
23646 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
23647 if nvl2_native {
23648 return Ok(Expression::Nvl2(nvl2));
23649 }
23650 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
23651 } else if let Expression::Function(f) = e {
23652 if nvl2_native {
23653 return Ok(Expression::Function(Box::new(Function::new(
23654 "NVL2".to_string(),
23655 f.args,
23656 ))));
23657 }
23658 if f.args.len() < 2 {
23659 return Ok(Expression::Function(f));
23660 }
23661 let mut args = f.args;
23662 let a = args.remove(0);
23663 let b = args.remove(0);
23664 let c = if !args.is_empty() {
23665 Some(args.remove(0))
23666 } else {
23667 Option::None
23668 };
23669 (a, b, c)
23670 } else {
23671 return Ok(e);
23672 };
23673 // Build: NOT (a IS NULL)
23674 let is_null = Expression::IsNull(Box::new(IsNull {
23675 this: a,
23676 not: false,
23677 postfix_form: false,
23678 }));
23679 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
23680 this: is_null,
23681 inferred_type: None,
23682 }));
23683 Ok(Expression::Case(Box::new(Case {
23684 operand: Option::None,
23685 whens: vec![(not_null, b)],
23686 else_: c,
23687 comments: Vec::new(),
23688 inferred_type: None,
23689 })))
23690 }
23691
23692 Action::IfnullToCoalesce => {
23693 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
23694 if let Expression::Coalesce(mut cf) = e {
23695 cf.original_name = Option::None;
23696 Ok(Expression::Coalesce(cf))
23697 } else if let Expression::Function(f) = e {
23698 Ok(Expression::Function(Box::new(Function::new(
23699 "COALESCE".to_string(),
23700 f.args,
23701 ))))
23702 } else {
23703 Ok(e)
23704 }
23705 }
23706
23707 Action::IsAsciiConvert => {
23708 // IS_ASCII(x) -> dialect-specific ASCII check
23709 if let Expression::Function(f) = e {
23710 let arg = f.args.into_iter().next().unwrap();
23711 match target {
23712 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
23713 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
23714 Ok(Expression::Function(Box::new(Function::new(
23715 "REGEXP_LIKE".to_string(),
23716 vec![
23717 arg,
23718 Expression::Literal(Literal::String(
23719 "^[[:ascii:]]*$".to_string(),
23720 )),
23721 ],
23722 ))))
23723 }
23724 DialectType::PostgreSQL
23725 | DialectType::Redshift
23726 | DialectType::Materialize
23727 | DialectType::RisingWave => {
23728 // (x ~ '^[[:ascii:]]*$')
23729 Ok(Expression::Paren(Box::new(Paren {
23730 this: Expression::RegexpLike(Box::new(
23731 crate::expressions::RegexpFunc {
23732 this: arg,
23733 pattern: Expression::Literal(Literal::String(
23734 "^[[:ascii:]]*$".to_string(),
23735 )),
23736 flags: Option::None,
23737 },
23738 )),
23739 trailing_comments: Vec::new(),
23740 })))
23741 }
23742 DialectType::SQLite => {
23743 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
23744 let hex_lit = Expression::Literal(Literal::HexString(
23745 "2a5b5e012d7f5d2a".to_string(),
23746 ));
23747 let cast_expr = Expression::Cast(Box::new(Cast {
23748 this: hex_lit,
23749 to: DataType::Text,
23750 trailing_comments: Vec::new(),
23751 double_colon_syntax: false,
23752 format: Option::None,
23753 default: Option::None,
23754 inferred_type: None,
23755 }));
23756 let glob = Expression::Glob(Box::new(BinaryOp {
23757 left: arg,
23758 right: cast_expr,
23759 left_comments: Vec::new(),
23760 operator_comments: Vec::new(),
23761 trailing_comments: Vec::new(),
23762 inferred_type: None,
23763 }));
23764 Ok(Expression::Paren(Box::new(Paren {
23765 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
23766 this: glob,
23767 inferred_type: None,
23768 })),
23769 trailing_comments: Vec::new(),
23770 })))
23771 }
23772 DialectType::TSQL | DialectType::Fabric => {
23773 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
23774 let hex_lit = Expression::Literal(Literal::HexNumber(
23775 "255b5e002d7f5d25".to_string(),
23776 ));
23777 let convert_expr = Expression::Convert(Box::new(
23778 crate::expressions::ConvertFunc {
23779 this: hex_lit,
23780 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
23781 style: None,
23782 },
23783 ));
23784 let collated = Expression::Collation(Box::new(
23785 crate::expressions::CollationExpr {
23786 this: convert_expr,
23787 collation: "Latin1_General_BIN".to_string(),
23788 quoted: false,
23789 double_quoted: false,
23790 },
23791 ));
23792 let patindex = Expression::Function(Box::new(Function::new(
23793 "PATINDEX".to_string(),
23794 vec![collated, arg],
23795 )));
23796 let zero = Expression::Literal(Literal::Number("0".to_string()));
23797 let eq_zero = Expression::Eq(Box::new(BinaryOp {
23798 left: patindex,
23799 right: zero,
23800 left_comments: Vec::new(),
23801 operator_comments: Vec::new(),
23802 trailing_comments: Vec::new(),
23803 inferred_type: None,
23804 }));
23805 Ok(Expression::Paren(Box::new(Paren {
23806 this: eq_zero,
23807 trailing_comments: Vec::new(),
23808 })))
23809 }
23810 DialectType::Oracle => {
23811 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
23812 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
23813 let s1 = Expression::Literal(Literal::String("^[".to_string()));
23814 let chr1 = Expression::Function(Box::new(Function::new(
23815 "CHR".to_string(),
23816 vec![Expression::Literal(Literal::Number("1".to_string()))],
23817 )));
23818 let dash = Expression::Literal(Literal::String("-".to_string()));
23819 let chr127 = Expression::Function(Box::new(Function::new(
23820 "CHR".to_string(),
23821 vec![Expression::Literal(Literal::Number("127".to_string()))],
23822 )));
23823 let s2 = Expression::Literal(Literal::String("]*$".to_string()));
23824 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
23825 let concat1 =
23826 Expression::DPipe(Box::new(crate::expressions::DPipe {
23827 this: Box::new(s1),
23828 expression: Box::new(chr1),
23829 safe: None,
23830 }));
23831 let concat2 =
23832 Expression::DPipe(Box::new(crate::expressions::DPipe {
23833 this: Box::new(concat1),
23834 expression: Box::new(dash),
23835 safe: None,
23836 }));
23837 let concat3 =
23838 Expression::DPipe(Box::new(crate::expressions::DPipe {
23839 this: Box::new(concat2),
23840 expression: Box::new(chr127),
23841 safe: None,
23842 }));
23843 let concat4 =
23844 Expression::DPipe(Box::new(crate::expressions::DPipe {
23845 this: Box::new(concat3),
23846 expression: Box::new(s2),
23847 safe: None,
23848 }));
23849 let regexp_like = Expression::Function(Box::new(Function::new(
23850 "REGEXP_LIKE".to_string(),
23851 vec![arg, concat4],
23852 )));
23853 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
23854 let true_expr = Expression::Column(Box::new(crate::expressions::Column {
23855 name: Identifier {
23856 name: "TRUE".to_string(),
23857 quoted: false,
23858 trailing_comments: Vec::new(),
23859 span: None,
23860 },
23861 table: None,
23862 join_mark: false,
23863 trailing_comments: Vec::new(),
23864 span: None,
23865 inferred_type: None,
23866 }));
23867 let nvl = Expression::Function(Box::new(Function::new(
23868 "NVL".to_string(),
23869 vec![regexp_like, true_expr],
23870 )));
23871 Ok(nvl)
23872 }
23873 _ => Ok(Expression::Function(Box::new(Function::new(
23874 "IS_ASCII".to_string(),
23875 vec![arg],
23876 )))),
23877 }
23878 } else {
23879 Ok(e)
23880 }
23881 }
23882
23883 Action::StrPositionConvert => {
23884 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
23885 if let Expression::Function(f) = e {
23886 if f.args.len() < 2 {
23887 return Ok(Expression::Function(f));
23888 }
23889 let mut args = f.args;
23890
23891 let haystack = args.remove(0);
23892 let needle = args.remove(0);
23893 let position = if !args.is_empty() {
23894 Some(args.remove(0))
23895 } else {
23896 Option::None
23897 };
23898 let occurrence = if !args.is_empty() {
23899 Some(args.remove(0))
23900 } else {
23901 Option::None
23902 };
23903
23904 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
23905 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
23906 fn build_position_expansion(
23907 haystack: Expression,
23908 needle: Expression,
23909 pos: Expression,
23910 occurrence: Option<Expression>,
23911 inner_func: &str,
23912 wrapper: &str, // "CASE", "IF", "IIF"
23913 ) -> Expression {
23914 let substr = Expression::Function(Box::new(Function::new(
23915 "SUBSTRING".to_string(),
23916 vec![haystack, pos.clone()],
23917 )));
23918 let mut inner_args = vec![substr, needle];
23919 if let Some(occ) = occurrence {
23920 inner_args.push(occ);
23921 }
23922 let inner_call = Expression::Function(Box::new(Function::new(
23923 inner_func.to_string(),
23924 inner_args,
23925 )));
23926 let zero = Expression::Literal(Literal::Number("0".to_string()));
23927 let one = Expression::Literal(Literal::Number("1".to_string()));
23928 let eq_zero = Expression::Eq(Box::new(BinaryOp {
23929 left: inner_call.clone(),
23930 right: zero.clone(),
23931 left_comments: Vec::new(),
23932 operator_comments: Vec::new(),
23933 trailing_comments: Vec::new(),
23934 inferred_type: None,
23935 }));
23936 let add_pos = Expression::Add(Box::new(BinaryOp {
23937 left: inner_call,
23938 right: pos,
23939 left_comments: Vec::new(),
23940 operator_comments: Vec::new(),
23941 trailing_comments: Vec::new(),
23942 inferred_type: None,
23943 }));
23944 let sub_one = Expression::Sub(Box::new(BinaryOp {
23945 left: add_pos,
23946 right: one,
23947 left_comments: Vec::new(),
23948 operator_comments: Vec::new(),
23949 trailing_comments: Vec::new(),
23950 inferred_type: None,
23951 }));
23952
23953 match wrapper {
23954 "CASE" => Expression::Case(Box::new(Case {
23955 operand: Option::None,
23956 whens: vec![(eq_zero, zero)],
23957 else_: Some(sub_one),
23958 comments: Vec::new(),
23959 inferred_type: None,
23960 })),
23961 "IIF" => Expression::Function(Box::new(Function::new(
23962 "IIF".to_string(),
23963 vec![eq_zero, zero, sub_one],
23964 ))),
23965 _ => Expression::Function(Box::new(Function::new(
23966 "IF".to_string(),
23967 vec![eq_zero, zero, sub_one],
23968 ))),
23969 }
23970 }
23971
23972 match target {
23973 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
23974 DialectType::Athena
23975 | DialectType::DuckDB
23976 | DialectType::Presto
23977 | DialectType::Trino
23978 | DialectType::Drill => {
23979 if let Some(pos) = position {
23980 let wrapper = if matches!(target, DialectType::DuckDB) {
23981 "CASE"
23982 } else {
23983 "IF"
23984 };
23985 let result = build_position_expansion(
23986 haystack, needle, pos, occurrence, "STRPOS", wrapper,
23987 );
23988 if matches!(target, DialectType::Drill) {
23989 // Drill uses backtick-quoted `IF`
23990 if let Expression::Function(mut f) = result {
23991 f.name = "`IF`".to_string();
23992 Ok(Expression::Function(f))
23993 } else {
23994 Ok(result)
23995 }
23996 } else {
23997 Ok(result)
23998 }
23999 } else {
24000 Ok(Expression::Function(Box::new(Function::new(
24001 "STRPOS".to_string(),
24002 vec![haystack, needle],
24003 ))))
24004 }
24005 }
24006 // SQLite: IIF wrapper
24007 DialectType::SQLite => {
24008 if let Some(pos) = position {
24009 Ok(build_position_expansion(
24010 haystack, needle, pos, occurrence, "INSTR", "IIF",
24011 ))
24012 } else {
24013 Ok(Expression::Function(Box::new(Function::new(
24014 "INSTR".to_string(),
24015 vec![haystack, needle],
24016 ))))
24017 }
24018 }
24019 // INSTR group: Teradata, BigQuery, Oracle
24020 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
24021 let mut a = vec![haystack, needle];
24022 if let Some(pos) = position {
24023 a.push(pos);
24024 }
24025 if let Some(occ) = occurrence {
24026 a.push(occ);
24027 }
24028 Ok(Expression::Function(Box::new(Function::new(
24029 "INSTR".to_string(),
24030 a,
24031 ))))
24032 }
24033 // CHARINDEX group: Snowflake, TSQL
24034 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
24035 let mut a = vec![needle, haystack];
24036 if let Some(pos) = position {
24037 a.push(pos);
24038 }
24039 Ok(Expression::Function(Box::new(Function::new(
24040 "CHARINDEX".to_string(),
24041 a,
24042 ))))
24043 }
24044 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
24045 DialectType::PostgreSQL
24046 | DialectType::Materialize
24047 | DialectType::RisingWave
24048 | DialectType::Redshift => {
24049 if let Some(pos) = position {
24050 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
24051 // ELSE POSITION(...) + pos - 1 END
24052 let substr = Expression::Substring(Box::new(
24053 crate::expressions::SubstringFunc {
24054 this: haystack,
24055 start: pos.clone(),
24056 length: Option::None,
24057 from_for_syntax: true,
24058 },
24059 ));
24060 let pos_in = Expression::StrPosition(Box::new(
24061 crate::expressions::StrPosition {
24062 this: Box::new(substr),
24063 substr: Some(Box::new(needle)),
24064 position: Option::None,
24065 occurrence: Option::None,
24066 },
24067 ));
24068 let zero =
24069 Expression::Literal(Literal::Number("0".to_string()));
24070 let one = Expression::Literal(Literal::Number("1".to_string()));
24071 let eq_zero = Expression::Eq(Box::new(BinaryOp {
24072 left: pos_in.clone(),
24073 right: zero.clone(),
24074 left_comments: Vec::new(),
24075 operator_comments: Vec::new(),
24076 trailing_comments: Vec::new(),
24077 inferred_type: None,
24078 }));
24079 let add_pos = Expression::Add(Box::new(BinaryOp {
24080 left: pos_in,
24081 right: pos,
24082 left_comments: Vec::new(),
24083 operator_comments: Vec::new(),
24084 trailing_comments: Vec::new(),
24085 inferred_type: None,
24086 }));
24087 let sub_one = Expression::Sub(Box::new(BinaryOp {
24088 left: add_pos,
24089 right: one,
24090 left_comments: Vec::new(),
24091 operator_comments: Vec::new(),
24092 trailing_comments: Vec::new(),
24093 inferred_type: None,
24094 }));
24095 Ok(Expression::Case(Box::new(Case {
24096 operand: Option::None,
24097 whens: vec![(eq_zero, zero)],
24098 else_: Some(sub_one),
24099 comments: Vec::new(),
24100 inferred_type: None,
24101 })))
24102 } else {
24103 Ok(Expression::StrPosition(Box::new(
24104 crate::expressions::StrPosition {
24105 this: Box::new(haystack),
24106 substr: Some(Box::new(needle)),
24107 position: Option::None,
24108 occurrence: Option::None,
24109 },
24110 )))
24111 }
24112 }
24113 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
24114 DialectType::MySQL
24115 | DialectType::SingleStore
24116 | DialectType::TiDB
24117 | DialectType::Hive
24118 | DialectType::Spark
24119 | DialectType::Databricks
24120 | DialectType::Doris
24121 | DialectType::StarRocks => {
24122 let mut a = vec![needle, haystack];
24123 if let Some(pos) = position {
24124 a.push(pos);
24125 }
24126 Ok(Expression::Function(Box::new(Function::new(
24127 "LOCATE".to_string(),
24128 a,
24129 ))))
24130 }
24131 // ClickHouse: POSITION(haystack, needle[, position])
24132 DialectType::ClickHouse => {
24133 let mut a = vec![haystack, needle];
24134 if let Some(pos) = position {
24135 a.push(pos);
24136 }
24137 Ok(Expression::Function(Box::new(Function::new(
24138 "POSITION".to_string(),
24139 a,
24140 ))))
24141 }
24142 _ => {
24143 let mut a = vec![haystack, needle];
24144 if let Some(pos) = position {
24145 a.push(pos);
24146 }
24147 if let Some(occ) = occurrence {
24148 a.push(occ);
24149 }
24150 Ok(Expression::Function(Box::new(Function::new(
24151 "STR_POSITION".to_string(),
24152 a,
24153 ))))
24154 }
24155 }
24156 } else {
24157 Ok(e)
24158 }
24159 }
24160
24161 Action::ArraySumConvert => {
24162 // ARRAY_SUM(arr) -> dialect-specific
24163 if let Expression::Function(f) = e {
24164 let args = f.args;
24165 match target {
24166 DialectType::DuckDB => Ok(Expression::Function(Box::new(
24167 Function::new("LIST_SUM".to_string(), args),
24168 ))),
24169 DialectType::Spark | DialectType::Databricks => {
24170 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
24171 let arr = args.into_iter().next().unwrap();
24172 let zero = Expression::Literal(Literal::Number("0".to_string()));
24173 let acc_id = Identifier::new("acc");
24174 let x_id = Identifier::new("x");
24175 let acc = Expression::Identifier(acc_id.clone());
24176 let x = Expression::Identifier(x_id.clone());
24177 let add = Expression::Add(Box::new(BinaryOp {
24178 left: acc.clone(),
24179 right: x,
24180 left_comments: Vec::new(),
24181 operator_comments: Vec::new(),
24182 trailing_comments: Vec::new(),
24183 inferred_type: None,
24184 }));
24185 let lambda1 =
24186 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24187 parameters: vec![acc_id.clone(), x_id],
24188 body: add,
24189 colon: false,
24190 parameter_types: Vec::new(),
24191 }));
24192 let lambda2 =
24193 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24194 parameters: vec![acc_id],
24195 body: acc,
24196 colon: false,
24197 parameter_types: Vec::new(),
24198 }));
24199 Ok(Expression::Function(Box::new(Function::new(
24200 "AGGREGATE".to_string(),
24201 vec![arr, zero, lambda1, lambda2],
24202 ))))
24203 }
24204 DialectType::Presto | DialectType::Athena => {
24205 // Presto/Athena keep ARRAY_SUM natively
24206 Ok(Expression::Function(Box::new(Function::new(
24207 "ARRAY_SUM".to_string(),
24208 args,
24209 ))))
24210 }
24211 DialectType::Trino => {
24212 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
24213 if args.len() == 1 {
24214 let arr = args.into_iter().next().unwrap();
24215 let zero =
24216 Expression::Literal(Literal::Number("0".to_string()));
24217 let acc_id = Identifier::new("acc");
24218 let x_id = Identifier::new("x");
24219 let acc = Expression::Identifier(acc_id.clone());
24220 let x = Expression::Identifier(x_id.clone());
24221 let add = Expression::Add(Box::new(BinaryOp {
24222 left: acc.clone(),
24223 right: x,
24224 left_comments: Vec::new(),
24225 operator_comments: Vec::new(),
24226 trailing_comments: Vec::new(),
24227 inferred_type: None,
24228 }));
24229 let lambda1 = Expression::Lambda(Box::new(
24230 crate::expressions::LambdaExpr {
24231 parameters: vec![acc_id.clone(), x_id],
24232 body: add,
24233 colon: false,
24234 parameter_types: Vec::new(),
24235 },
24236 ));
24237 let lambda2 = Expression::Lambda(Box::new(
24238 crate::expressions::LambdaExpr {
24239 parameters: vec![acc_id],
24240 body: acc,
24241 colon: false,
24242 parameter_types: Vec::new(),
24243 },
24244 ));
24245 Ok(Expression::Function(Box::new(Function::new(
24246 "REDUCE".to_string(),
24247 vec![arr, zero, lambda1, lambda2],
24248 ))))
24249 } else {
24250 Ok(Expression::Function(Box::new(Function::new(
24251 "ARRAY_SUM".to_string(),
24252 args,
24253 ))))
24254 }
24255 }
24256 DialectType::ClickHouse => {
24257 // arraySum(lambda, arr) or arraySum(arr)
24258 Ok(Expression::Function(Box::new(Function::new(
24259 "arraySum".to_string(),
24260 args,
24261 ))))
24262 }
24263 _ => Ok(Expression::Function(Box::new(Function::new(
24264 "ARRAY_SUM".to_string(),
24265 args,
24266 )))),
24267 }
24268 } else {
24269 Ok(e)
24270 }
24271 }
24272
24273 Action::ArraySizeConvert => {
24274 if let Expression::Function(f) = e {
24275 Ok(Expression::Function(Box::new(Function::new(
24276 "REPEATED_COUNT".to_string(),
24277 f.args,
24278 ))))
24279 } else {
24280 Ok(e)
24281 }
24282 }
24283
24284 Action::ArrayAnyConvert => {
24285 if let Expression::Function(f) = e {
24286 let mut args = f.args;
24287 if args.len() == 2 {
24288 let arr = args.remove(0);
24289 let lambda = args.remove(0);
24290
24291 // Extract lambda parameter name and body
24292 let (param_name, pred_body) =
24293 if let Expression::Lambda(ref lam) = lambda {
24294 let name = if let Some(p) = lam.parameters.first() {
24295 p.name.clone()
24296 } else {
24297 "x".to_string()
24298 };
24299 (name, lam.body.clone())
24300 } else {
24301 ("x".to_string(), lambda.clone())
24302 };
24303
24304 // Helper: build a function call Expression
24305 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
24306 Expression::Function(Box::new(Function::new(
24307 name.to_string(),
24308 args,
24309 )))
24310 };
24311
24312 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
24313 let build_filter_pattern = |len_func: &str,
24314 len_args_extra: Vec<Expression>,
24315 filter_expr: Expression|
24316 -> Expression {
24317 // len_func(arr, ...extra) = 0
24318 let mut len_arr_args = vec![arr.clone()];
24319 len_arr_args.extend(len_args_extra.clone());
24320 let len_arr = make_func(len_func, len_arr_args);
24321 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
24322 len_arr,
24323 Expression::number(0),
24324 )));
24325
24326 // len_func(filter_expr, ...extra) <> 0
24327 let mut len_filter_args = vec![filter_expr];
24328 len_filter_args.extend(len_args_extra);
24329 let len_filter = make_func(len_func, len_filter_args);
24330 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
24331 len_filter,
24332 Expression::number(0),
24333 )));
24334
24335 // (eq_zero OR neq_zero)
24336 let or_expr =
24337 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
24338 Expression::Paren(Box::new(Paren {
24339 this: or_expr,
24340 trailing_comments: Vec::new(),
24341 }))
24342 };
24343
24344 match target {
24345 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
24346 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
24347 }
24348 DialectType::ClickHouse => {
24349 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
24350 // ClickHouse arrayFilter takes lambda first, then array
24351 let filter_expr =
24352 make_func("arrayFilter", vec![lambda, arr.clone()]);
24353 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
24354 }
24355 DialectType::Databricks | DialectType::Spark => {
24356 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
24357 let filter_expr =
24358 make_func("FILTER", vec![arr.clone(), lambda]);
24359 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
24360 }
24361 DialectType::DuckDB => {
24362 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
24363 let filter_expr =
24364 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
24365 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
24366 }
24367 DialectType::Teradata => {
24368 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
24369 let filter_expr =
24370 make_func("FILTER", vec![arr.clone(), lambda]);
24371 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
24372 }
24373 DialectType::BigQuery => {
24374 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
24375 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
24376 let param_col = Expression::column(¶m_name);
24377 let unnest_expr = Expression::Unnest(Box::new(
24378 crate::expressions::UnnestFunc {
24379 this: arr.clone(),
24380 expressions: vec![],
24381 with_ordinality: false,
24382 alias: Some(Identifier::new(¶m_name)),
24383 offset_alias: None,
24384 },
24385 ));
24386 let mut sel = crate::expressions::Select::default();
24387 sel.expressions = vec![param_col];
24388 sel.from = Some(crate::expressions::From {
24389 expressions: vec![unnest_expr],
24390 });
24391 sel.where_clause =
24392 Some(crate::expressions::Where { this: pred_body });
24393 let array_subquery =
24394 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
24395 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
24396 }
24397 DialectType::PostgreSQL => {
24398 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
24399 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
24400 let param_col = Expression::column(¶m_name);
24401 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
24402 let unnest_with_alias =
24403 Expression::Alias(Box::new(crate::expressions::Alias {
24404 this: Expression::Unnest(Box::new(
24405 crate::expressions::UnnestFunc {
24406 this: arr.clone(),
24407 expressions: vec![],
24408 with_ordinality: false,
24409 alias: None,
24410 offset_alias: None,
24411 },
24412 )),
24413 alias: Identifier::new("_t0"),
24414 column_aliases: vec![Identifier::new(¶m_name)],
24415 pre_alias_comments: Vec::new(),
24416 trailing_comments: Vec::new(),
24417 inferred_type: None,
24418 }));
24419 let mut sel = crate::expressions::Select::default();
24420 sel.expressions = vec![param_col];
24421 sel.from = Some(crate::expressions::From {
24422 expressions: vec![unnest_with_alias],
24423 });
24424 sel.where_clause =
24425 Some(crate::expressions::Where { this: pred_body });
24426 let array_subquery =
24427 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
24428 Ok(build_filter_pattern(
24429 "ARRAY_LENGTH",
24430 vec![Expression::number(1)],
24431 array_subquery,
24432 ))
24433 }
24434 _ => Ok(Expression::Function(Box::new(Function::new(
24435 "ARRAY_ANY".to_string(),
24436 vec![arr, lambda],
24437 )))),
24438 }
24439 } else {
24440 Ok(Expression::Function(Box::new(Function::new(
24441 "ARRAY_ANY".to_string(),
24442 args,
24443 ))))
24444 }
24445 } else {
24446 Ok(e)
24447 }
24448 }
24449
24450 Action::DecodeSimplify => {
24451 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
24452 // For literal search values: CASE WHEN x = search THEN result
24453 // For NULL search: CASE WHEN x IS NULL THEN result
24454 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
24455 fn is_decode_literal(e: &Expression) -> bool {
24456 matches!(
24457 e,
24458 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
24459 )
24460 }
24461
24462 let build_decode_case =
24463 |this_expr: Expression,
24464 pairs: Vec<(Expression, Expression)>,
24465 default: Option<Expression>| {
24466 let whens: Vec<(Expression, Expression)> = pairs
24467 .into_iter()
24468 .map(|(search, result)| {
24469 if matches!(&search, Expression::Null(_)) {
24470 // NULL search -> IS NULL
24471 let condition = Expression::Is(Box::new(BinaryOp {
24472 left: this_expr.clone(),
24473 right: Expression::Null(crate::expressions::Null),
24474 left_comments: Vec::new(),
24475 operator_comments: Vec::new(),
24476 trailing_comments: Vec::new(),
24477 inferred_type: None,
24478 }));
24479 (condition, result)
24480 } else if is_decode_literal(&search)
24481 || is_decode_literal(&this_expr)
24482 {
24483 // At least one side is a literal -> simple equality (no NULL check needed)
24484 let eq = Expression::Eq(Box::new(BinaryOp {
24485 left: this_expr.clone(),
24486 right: search,
24487 left_comments: Vec::new(),
24488 operator_comments: Vec::new(),
24489 trailing_comments: Vec::new(),
24490 inferred_type: None,
24491 }));
24492 (eq, result)
24493 } else {
24494 // Non-literal -> null-safe comparison
24495 let needs_paren = matches!(
24496 &search,
24497 Expression::Eq(_)
24498 | Expression::Neq(_)
24499 | Expression::Gt(_)
24500 | Expression::Gte(_)
24501 | Expression::Lt(_)
24502 | Expression::Lte(_)
24503 );
24504 let search_ref = if needs_paren {
24505 Expression::Paren(Box::new(crate::expressions::Paren {
24506 this: search.clone(),
24507 trailing_comments: Vec::new(),
24508 }))
24509 } else {
24510 search.clone()
24511 };
24512 // Build: x = search OR (x IS NULL AND search IS NULL)
24513 let eq = Expression::Eq(Box::new(BinaryOp {
24514 left: this_expr.clone(),
24515 right: search_ref,
24516 left_comments: Vec::new(),
24517 operator_comments: Vec::new(),
24518 trailing_comments: Vec::new(),
24519 inferred_type: None,
24520 }));
24521 let search_in_null = if needs_paren {
24522 Expression::Paren(Box::new(crate::expressions::Paren {
24523 this: search.clone(),
24524 trailing_comments: Vec::new(),
24525 }))
24526 } else {
24527 search.clone()
24528 };
24529 let x_is_null = Expression::Is(Box::new(BinaryOp {
24530 left: this_expr.clone(),
24531 right: Expression::Null(crate::expressions::Null),
24532 left_comments: Vec::new(),
24533 operator_comments: Vec::new(),
24534 trailing_comments: Vec::new(),
24535 inferred_type: None,
24536 }));
24537 let search_is_null = Expression::Is(Box::new(BinaryOp {
24538 left: search_in_null,
24539 right: Expression::Null(crate::expressions::Null),
24540 left_comments: Vec::new(),
24541 operator_comments: Vec::new(),
24542 trailing_comments: Vec::new(),
24543 inferred_type: None,
24544 }));
24545 let both_null = Expression::And(Box::new(BinaryOp {
24546 left: x_is_null,
24547 right: search_is_null,
24548 left_comments: Vec::new(),
24549 operator_comments: Vec::new(),
24550 trailing_comments: Vec::new(),
24551 inferred_type: None,
24552 }));
24553 let condition = Expression::Or(Box::new(BinaryOp {
24554 left: eq,
24555 right: Expression::Paren(Box::new(
24556 crate::expressions::Paren {
24557 this: both_null,
24558 trailing_comments: Vec::new(),
24559 },
24560 )),
24561 left_comments: Vec::new(),
24562 operator_comments: Vec::new(),
24563 trailing_comments: Vec::new(),
24564 inferred_type: None,
24565 }));
24566 (condition, result)
24567 }
24568 })
24569 .collect();
24570 Expression::Case(Box::new(Case {
24571 operand: None,
24572 whens,
24573 else_: default,
24574 comments: Vec::new(),
24575 inferred_type: None,
24576 }))
24577 };
24578
24579 if let Expression::Decode(decode) = e {
24580 Ok(build_decode_case(
24581 decode.this,
24582 decode.search_results,
24583 decode.default,
24584 ))
24585 } else if let Expression::DecodeCase(dc) = e {
24586 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
24587 let mut exprs = dc.expressions;
24588 if exprs.len() < 3 {
24589 return Ok(Expression::DecodeCase(Box::new(
24590 crate::expressions::DecodeCase { expressions: exprs },
24591 )));
24592 }
24593 let this_expr = exprs.remove(0);
24594 let mut pairs = Vec::new();
24595 let mut default = None;
24596 let mut i = 0;
24597 while i + 1 < exprs.len() {
24598 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
24599 i += 2;
24600 }
24601 if i < exprs.len() {
24602 // Odd remaining element is the default
24603 default = Some(exprs[i].clone());
24604 }
24605 Ok(build_decode_case(this_expr, pairs, default))
24606 } else {
24607 Ok(e)
24608 }
24609 }
24610
24611 Action::CreateTableLikeToCtas => {
24612 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
24613 if let Expression::CreateTable(ct) = e {
24614 let like_source = ct.constraints.iter().find_map(|c| {
24615 if let crate::expressions::TableConstraint::Like { source, .. } = c {
24616 Some(source.clone())
24617 } else {
24618 None
24619 }
24620 });
24621 if let Some(source_table) = like_source {
24622 let mut new_ct = *ct;
24623 new_ct.constraints.clear();
24624 // Build: SELECT * FROM b LIMIT 0
24625 let select = Expression::Select(Box::new(crate::expressions::Select {
24626 expressions: vec![Expression::Star(crate::expressions::Star {
24627 table: None,
24628 except: None,
24629 replace: None,
24630 rename: None,
24631 trailing_comments: Vec::new(),
24632 span: None,
24633 })],
24634 from: Some(crate::expressions::From {
24635 expressions: vec![Expression::Table(Box::new(source_table))],
24636 }),
24637 limit: Some(crate::expressions::Limit {
24638 this: Expression::Literal(Literal::Number("0".to_string())),
24639 percent: false,
24640 comments: Vec::new(),
24641 }),
24642 ..Default::default()
24643 }));
24644 new_ct.as_select = Some(select);
24645 Ok(Expression::CreateTable(Box::new(new_ct)))
24646 } else {
24647 Ok(Expression::CreateTable(ct))
24648 }
24649 } else {
24650 Ok(e)
24651 }
24652 }
24653
24654 Action::CreateTableLikeToSelectInto => {
24655 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
24656 if let Expression::CreateTable(ct) = e {
24657 let like_source = ct.constraints.iter().find_map(|c| {
24658 if let crate::expressions::TableConstraint::Like { source, .. } = c {
24659 Some(source.clone())
24660 } else {
24661 None
24662 }
24663 });
24664 if let Some(source_table) = like_source {
24665 let mut aliased_source = source_table;
24666 aliased_source.alias = Some(Identifier::new("temp"));
24667 // Build: SELECT TOP 0 * INTO a FROM b AS temp
24668 let select = Expression::Select(Box::new(crate::expressions::Select {
24669 expressions: vec![Expression::Star(crate::expressions::Star {
24670 table: None,
24671 except: None,
24672 replace: None,
24673 rename: None,
24674 trailing_comments: Vec::new(),
24675 span: None,
24676 })],
24677 from: Some(crate::expressions::From {
24678 expressions: vec![Expression::Table(Box::new(aliased_source))],
24679 }),
24680 into: Some(crate::expressions::SelectInto {
24681 this: Expression::Table(Box::new(ct.name.clone())),
24682 temporary: false,
24683 unlogged: false,
24684 bulk_collect: false,
24685 expressions: Vec::new(),
24686 }),
24687 top: Some(crate::expressions::Top {
24688 this: Expression::Literal(Literal::Number("0".to_string())),
24689 percent: false,
24690 with_ties: false,
24691 parenthesized: false,
24692 }),
24693 ..Default::default()
24694 }));
24695 Ok(select)
24696 } else {
24697 Ok(Expression::CreateTable(ct))
24698 }
24699 } else {
24700 Ok(e)
24701 }
24702 }
24703
24704 Action::CreateTableLikeToAs => {
24705 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
24706 if let Expression::CreateTable(ct) = e {
24707 let like_source = ct.constraints.iter().find_map(|c| {
24708 if let crate::expressions::TableConstraint::Like { source, .. } = c {
24709 Some(source.clone())
24710 } else {
24711 None
24712 }
24713 });
24714 if let Some(source_table) = like_source {
24715 let mut new_ct = *ct;
24716 new_ct.constraints.clear();
24717 // AS b (just a table reference, not a SELECT)
24718 new_ct.as_select = Some(Expression::Table(Box::new(source_table)));
24719 Ok(Expression::CreateTable(Box::new(new_ct)))
24720 } else {
24721 Ok(Expression::CreateTable(ct))
24722 }
24723 } else {
24724 Ok(e)
24725 }
24726 }
24727
24728 Action::TsOrDsToDateConvert => {
24729 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
24730 if let Expression::Function(f) = e {
24731 let mut args = f.args;
24732 let this = args.remove(0);
24733 let fmt = if !args.is_empty() {
24734 match &args[0] {
24735 Expression::Literal(Literal::String(s)) => Some(s.clone()),
24736 _ => None,
24737 }
24738 } else {
24739 None
24740 };
24741 Ok(Expression::TsOrDsToDate(Box::new(
24742 crate::expressions::TsOrDsToDate {
24743 this: Box::new(this),
24744 format: fmt,
24745 safe: None,
24746 },
24747 )))
24748 } else {
24749 Ok(e)
24750 }
24751 }
24752
24753 Action::TsOrDsToDateStrConvert => {
24754 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
24755 if let Expression::Function(f) = e {
24756 let arg = f.args.into_iter().next().unwrap();
24757 let str_type = match target {
24758 DialectType::DuckDB
24759 | DialectType::PostgreSQL
24760 | DialectType::Materialize => DataType::Text,
24761 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
24762 DataType::Custom {
24763 name: "STRING".to_string(),
24764 }
24765 }
24766 DialectType::Presto
24767 | DialectType::Trino
24768 | DialectType::Athena
24769 | DialectType::Drill => DataType::VarChar {
24770 length: None,
24771 parenthesized_length: false,
24772 },
24773 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
24774 DataType::Custom {
24775 name: "STRING".to_string(),
24776 }
24777 }
24778 _ => DataType::VarChar {
24779 length: None,
24780 parenthesized_length: false,
24781 },
24782 };
24783 let cast_expr = Expression::Cast(Box::new(Cast {
24784 this: arg,
24785 to: str_type,
24786 double_colon_syntax: false,
24787 trailing_comments: Vec::new(),
24788 format: None,
24789 default: None,
24790 inferred_type: None,
24791 }));
24792 Ok(Expression::Substring(Box::new(
24793 crate::expressions::SubstringFunc {
24794 this: cast_expr,
24795 start: Expression::number(1),
24796 length: Some(Expression::number(10)),
24797 from_for_syntax: false,
24798 },
24799 )))
24800 } else {
24801 Ok(e)
24802 }
24803 }
24804
24805 Action::DateStrToDateConvert => {
24806 // DATE_STR_TO_DATE(x) -> dialect-specific
24807 if let Expression::Function(f) = e {
24808 let arg = f.args.into_iter().next().unwrap();
24809 match target {
24810 DialectType::SQLite => {
24811 // SQLite: just the bare expression (dates are strings)
24812 Ok(arg)
24813 }
24814 _ => Ok(Expression::Cast(Box::new(Cast {
24815 this: arg,
24816 to: DataType::Date,
24817 double_colon_syntax: false,
24818 trailing_comments: Vec::new(),
24819 format: None,
24820 default: None,
24821 inferred_type: None,
24822 }))),
24823 }
24824 } else {
24825 Ok(e)
24826 }
24827 }
24828
24829 Action::TimeStrToDateConvert => {
24830 // TIME_STR_TO_DATE(x) -> dialect-specific
24831 if let Expression::Function(f) = e {
24832 let arg = f.args.into_iter().next().unwrap();
24833 match target {
24834 DialectType::Hive
24835 | DialectType::Doris
24836 | DialectType::StarRocks
24837 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
24838 Function::new("TO_DATE".to_string(), vec![arg]),
24839 ))),
24840 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24841 // Presto: CAST(x AS TIMESTAMP)
24842 Ok(Expression::Cast(Box::new(Cast {
24843 this: arg,
24844 to: DataType::Timestamp {
24845 timezone: false,
24846 precision: None,
24847 },
24848 double_colon_syntax: false,
24849 trailing_comments: Vec::new(),
24850 format: None,
24851 default: None,
24852 inferred_type: None,
24853 })))
24854 }
24855 _ => {
24856 // Default: CAST(x AS DATE)
24857 Ok(Expression::Cast(Box::new(Cast {
24858 this: arg,
24859 to: DataType::Date,
24860 double_colon_syntax: false,
24861 trailing_comments: Vec::new(),
24862 format: None,
24863 default: None,
24864 inferred_type: None,
24865 })))
24866 }
24867 }
24868 } else {
24869 Ok(e)
24870 }
24871 }
24872
24873 Action::TimeStrToTimeConvert => {
24874 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
24875 if let Expression::Function(f) = e {
24876 let mut args = f.args;
24877 let this = args.remove(0);
24878 let zone = if !args.is_empty() {
24879 match &args[0] {
24880 Expression::Literal(Literal::String(s)) => Some(s.clone()),
24881 _ => None,
24882 }
24883 } else {
24884 None
24885 };
24886 let has_zone = zone.is_some();
24887
24888 match target {
24889 DialectType::SQLite => {
24890 // SQLite: just the bare expression
24891 Ok(this)
24892 }
24893 DialectType::MySQL => {
24894 if has_zone {
24895 // MySQL with zone: TIMESTAMP(x)
24896 Ok(Expression::Function(Box::new(Function::new(
24897 "TIMESTAMP".to_string(),
24898 vec![this],
24899 ))))
24900 } else {
24901 // MySQL: CAST(x AS DATETIME) or with precision
24902 // Use DataType::Custom to avoid MySQL's transform_cast converting
24903 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
24904 let precision =
24905 if let Expression::Literal(Literal::String(ref s)) = this {
24906 if let Some(dot_pos) = s.rfind('.') {
24907 let frac = &s[dot_pos + 1..];
24908 let digit_count = frac
24909 .chars()
24910 .take_while(|c| c.is_ascii_digit())
24911 .count();
24912 if digit_count > 0 {
24913 Some(digit_count)
24914 } else {
24915 None
24916 }
24917 } else {
24918 None
24919 }
24920 } else {
24921 None
24922 };
24923 let type_name = match precision {
24924 Some(p) => format!("DATETIME({})", p),
24925 None => "DATETIME".to_string(),
24926 };
24927 Ok(Expression::Cast(Box::new(Cast {
24928 this,
24929 to: DataType::Custom { name: type_name },
24930 double_colon_syntax: false,
24931 trailing_comments: Vec::new(),
24932 format: None,
24933 default: None,
24934 inferred_type: None,
24935 })))
24936 }
24937 }
24938 DialectType::ClickHouse => {
24939 if has_zone {
24940 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
24941 // We need to strip the timezone offset from the literal if present
24942 let clean_this =
24943 if let Expression::Literal(Literal::String(ref s)) = this {
24944 // Strip timezone offset like "-08:00" or "+00:00"
24945 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
24946 if let Some(offset_pos) = re_offset {
24947 if offset_pos > 10 {
24948 // After the date part
24949 let trimmed = s[..offset_pos].to_string();
24950 Expression::Literal(Literal::String(trimmed))
24951 } else {
24952 this.clone()
24953 }
24954 } else {
24955 this.clone()
24956 }
24957 } else {
24958 this.clone()
24959 };
24960 let zone_str = zone.unwrap();
24961 // Build: CAST(x AS DateTime64(6, 'zone'))
24962 let type_name = format!("DateTime64(6, '{}')", zone_str);
24963 Ok(Expression::Cast(Box::new(Cast {
24964 this: clean_this,
24965 to: DataType::Custom { name: type_name },
24966 double_colon_syntax: false,
24967 trailing_comments: Vec::new(),
24968 format: None,
24969 default: None,
24970 inferred_type: None,
24971 })))
24972 } else {
24973 Ok(Expression::Cast(Box::new(Cast {
24974 this,
24975 to: DataType::Custom {
24976 name: "DateTime64(6)".to_string(),
24977 },
24978 double_colon_syntax: false,
24979 trailing_comments: Vec::new(),
24980 format: None,
24981 default: None,
24982 inferred_type: None,
24983 })))
24984 }
24985 }
24986 DialectType::BigQuery => {
24987 if has_zone {
24988 // BigQuery with zone: CAST(x AS TIMESTAMP)
24989 Ok(Expression::Cast(Box::new(Cast {
24990 this,
24991 to: DataType::Timestamp {
24992 timezone: false,
24993 precision: None,
24994 },
24995 double_colon_syntax: false,
24996 trailing_comments: Vec::new(),
24997 format: None,
24998 default: None,
24999 inferred_type: None,
25000 })))
25001 } else {
25002 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
25003 Ok(Expression::Cast(Box::new(Cast {
25004 this,
25005 to: DataType::Custom {
25006 name: "DATETIME".to_string(),
25007 },
25008 double_colon_syntax: false,
25009 trailing_comments: Vec::new(),
25010 format: None,
25011 default: None,
25012 inferred_type: None,
25013 })))
25014 }
25015 }
25016 DialectType::Doris => {
25017 // Doris: CAST(x AS DATETIME)
25018 Ok(Expression::Cast(Box::new(Cast {
25019 this,
25020 to: DataType::Custom {
25021 name: "DATETIME".to_string(),
25022 },
25023 double_colon_syntax: false,
25024 trailing_comments: Vec::new(),
25025 format: None,
25026 default: None,
25027 inferred_type: None,
25028 })))
25029 }
25030 DialectType::TSQL | DialectType::Fabric => {
25031 if has_zone {
25032 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
25033 let cast_expr = Expression::Cast(Box::new(Cast {
25034 this,
25035 to: DataType::Custom {
25036 name: "DATETIMEOFFSET".to_string(),
25037 },
25038 double_colon_syntax: false,
25039 trailing_comments: Vec::new(),
25040 format: None,
25041 default: None,
25042 inferred_type: None,
25043 }));
25044 Ok(Expression::AtTimeZone(Box::new(
25045 crate::expressions::AtTimeZone {
25046 this: cast_expr,
25047 zone: Expression::Literal(Literal::String(
25048 "UTC".to_string(),
25049 )),
25050 },
25051 )))
25052 } else {
25053 // TSQL: CAST(x AS DATETIME2)
25054 Ok(Expression::Cast(Box::new(Cast {
25055 this,
25056 to: DataType::Custom {
25057 name: "DATETIME2".to_string(),
25058 },
25059 double_colon_syntax: false,
25060 trailing_comments: Vec::new(),
25061 format: None,
25062 default: None,
25063 inferred_type: None,
25064 })))
25065 }
25066 }
25067 DialectType::DuckDB => {
25068 if has_zone {
25069 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
25070 Ok(Expression::Cast(Box::new(Cast {
25071 this,
25072 to: DataType::Timestamp {
25073 timezone: true,
25074 precision: None,
25075 },
25076 double_colon_syntax: false,
25077 trailing_comments: Vec::new(),
25078 format: None,
25079 default: None,
25080 inferred_type: None,
25081 })))
25082 } else {
25083 // DuckDB: CAST(x AS TIMESTAMP)
25084 Ok(Expression::Cast(Box::new(Cast {
25085 this,
25086 to: DataType::Timestamp {
25087 timezone: false,
25088 precision: None,
25089 },
25090 double_colon_syntax: false,
25091 trailing_comments: Vec::new(),
25092 format: None,
25093 default: None,
25094 inferred_type: None,
25095 })))
25096 }
25097 }
25098 DialectType::PostgreSQL
25099 | DialectType::Materialize
25100 | DialectType::RisingWave => {
25101 if has_zone {
25102 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
25103 Ok(Expression::Cast(Box::new(Cast {
25104 this,
25105 to: DataType::Timestamp {
25106 timezone: true,
25107 precision: None,
25108 },
25109 double_colon_syntax: false,
25110 trailing_comments: Vec::new(),
25111 format: None,
25112 default: None,
25113 inferred_type: None,
25114 })))
25115 } else {
25116 // PostgreSQL: CAST(x AS TIMESTAMP)
25117 Ok(Expression::Cast(Box::new(Cast {
25118 this,
25119 to: DataType::Timestamp {
25120 timezone: false,
25121 precision: None,
25122 },
25123 double_colon_syntax: false,
25124 trailing_comments: Vec::new(),
25125 format: None,
25126 default: None,
25127 inferred_type: None,
25128 })))
25129 }
25130 }
25131 DialectType::Snowflake => {
25132 if has_zone {
25133 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
25134 Ok(Expression::Cast(Box::new(Cast {
25135 this,
25136 to: DataType::Timestamp {
25137 timezone: true,
25138 precision: None,
25139 },
25140 double_colon_syntax: false,
25141 trailing_comments: Vec::new(),
25142 format: None,
25143 default: None,
25144 inferred_type: None,
25145 })))
25146 } else {
25147 // Snowflake: CAST(x AS TIMESTAMP)
25148 Ok(Expression::Cast(Box::new(Cast {
25149 this,
25150 to: DataType::Timestamp {
25151 timezone: false,
25152 precision: None,
25153 },
25154 double_colon_syntax: false,
25155 trailing_comments: Vec::new(),
25156 format: None,
25157 default: None,
25158 inferred_type: None,
25159 })))
25160 }
25161 }
25162 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25163 if has_zone {
25164 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
25165 // Check for precision from sub-second digits
25166 let precision =
25167 if let Expression::Literal(Literal::String(ref s)) = this {
25168 if let Some(dot_pos) = s.rfind('.') {
25169 let frac = &s[dot_pos + 1..];
25170 let digit_count = frac
25171 .chars()
25172 .take_while(|c| c.is_ascii_digit())
25173 .count();
25174 if digit_count > 0
25175 && matches!(target, DialectType::Trino)
25176 {
25177 Some(digit_count as u32)
25178 } else {
25179 None
25180 }
25181 } else {
25182 None
25183 }
25184 } else {
25185 None
25186 };
25187 let dt = if let Some(prec) = precision {
25188 DataType::Timestamp {
25189 timezone: true,
25190 precision: Some(prec),
25191 }
25192 } else {
25193 DataType::Timestamp {
25194 timezone: true,
25195 precision: None,
25196 }
25197 };
25198 Ok(Expression::Cast(Box::new(Cast {
25199 this,
25200 to: dt,
25201 double_colon_syntax: false,
25202 trailing_comments: Vec::new(),
25203 format: None,
25204 default: None,
25205 inferred_type: None,
25206 })))
25207 } else {
25208 // Check for sub-second precision for Trino
25209 let precision =
25210 if let Expression::Literal(Literal::String(ref s)) = this {
25211 if let Some(dot_pos) = s.rfind('.') {
25212 let frac = &s[dot_pos + 1..];
25213 let digit_count = frac
25214 .chars()
25215 .take_while(|c| c.is_ascii_digit())
25216 .count();
25217 if digit_count > 0
25218 && matches!(target, DialectType::Trino)
25219 {
25220 Some(digit_count as u32)
25221 } else {
25222 None
25223 }
25224 } else {
25225 None
25226 }
25227 } else {
25228 None
25229 };
25230 let dt = DataType::Timestamp {
25231 timezone: false,
25232 precision,
25233 };
25234 Ok(Expression::Cast(Box::new(Cast {
25235 this,
25236 to: dt,
25237 double_colon_syntax: false,
25238 trailing_comments: Vec::new(),
25239 format: None,
25240 default: None,
25241 inferred_type: None,
25242 })))
25243 }
25244 }
25245 DialectType::Redshift => {
25246 if has_zone {
25247 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
25248 Ok(Expression::Cast(Box::new(Cast {
25249 this,
25250 to: DataType::Timestamp {
25251 timezone: true,
25252 precision: None,
25253 },
25254 double_colon_syntax: false,
25255 trailing_comments: Vec::new(),
25256 format: None,
25257 default: None,
25258 inferred_type: None,
25259 })))
25260 } else {
25261 // Redshift: CAST(x AS TIMESTAMP)
25262 Ok(Expression::Cast(Box::new(Cast {
25263 this,
25264 to: DataType::Timestamp {
25265 timezone: false,
25266 precision: None,
25267 },
25268 double_colon_syntax: false,
25269 trailing_comments: Vec::new(),
25270 format: None,
25271 default: None,
25272 inferred_type: None,
25273 })))
25274 }
25275 }
25276 _ => {
25277 // Default: CAST(x AS TIMESTAMP)
25278 Ok(Expression::Cast(Box::new(Cast {
25279 this,
25280 to: DataType::Timestamp {
25281 timezone: false,
25282 precision: None,
25283 },
25284 double_colon_syntax: false,
25285 trailing_comments: Vec::new(),
25286 format: None,
25287 default: None,
25288 inferred_type: None,
25289 })))
25290 }
25291 }
25292 } else {
25293 Ok(e)
25294 }
25295 }
25296
25297 Action::DateToDateStrConvert => {
25298 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
25299 if let Expression::Function(f) = e {
25300 let arg = f.args.into_iter().next().unwrap();
25301 let str_type = match target {
25302 DialectType::DuckDB => DataType::Text,
25303 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
25304 DataType::Custom {
25305 name: "STRING".to_string(),
25306 }
25307 }
25308 DialectType::Presto
25309 | DialectType::Trino
25310 | DialectType::Athena
25311 | DialectType::Drill => DataType::VarChar {
25312 length: None,
25313 parenthesized_length: false,
25314 },
25315 _ => DataType::VarChar {
25316 length: None,
25317 parenthesized_length: false,
25318 },
25319 };
25320 Ok(Expression::Cast(Box::new(Cast {
25321 this: arg,
25322 to: str_type,
25323 double_colon_syntax: false,
25324 trailing_comments: Vec::new(),
25325 format: None,
25326 default: None,
25327 inferred_type: None,
25328 })))
25329 } else {
25330 Ok(e)
25331 }
25332 }
25333
25334 Action::DateToDiConvert => {
25335 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
25336 if let Expression::Function(f) = e {
25337 let arg = f.args.into_iter().next().unwrap();
25338 let inner = match target {
25339 DialectType::DuckDB => {
25340 // STRFTIME(x, '%Y%m%d')
25341 Expression::Function(Box::new(Function::new(
25342 "STRFTIME".to_string(),
25343 vec![arg, Expression::string("%Y%m%d")],
25344 )))
25345 }
25346 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
25347 // DATE_FORMAT(x, 'yyyyMMdd')
25348 Expression::Function(Box::new(Function::new(
25349 "DATE_FORMAT".to_string(),
25350 vec![arg, Expression::string("yyyyMMdd")],
25351 )))
25352 }
25353 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25354 // DATE_FORMAT(x, '%Y%m%d')
25355 Expression::Function(Box::new(Function::new(
25356 "DATE_FORMAT".to_string(),
25357 vec![arg, Expression::string("%Y%m%d")],
25358 )))
25359 }
25360 DialectType::Drill => {
25361 // TO_DATE(x, 'yyyyMMdd')
25362 Expression::Function(Box::new(Function::new(
25363 "TO_DATE".to_string(),
25364 vec![arg, Expression::string("yyyyMMdd")],
25365 )))
25366 }
25367 _ => {
25368 // Default: STRFTIME(x, '%Y%m%d')
25369 Expression::Function(Box::new(Function::new(
25370 "STRFTIME".to_string(),
25371 vec![arg, Expression::string("%Y%m%d")],
25372 )))
25373 }
25374 };
25375 // Use INT (not INTEGER) for Presto/Trino
25376 let int_type = match target {
25377 DialectType::Presto
25378 | DialectType::Trino
25379 | DialectType::Athena
25380 | DialectType::TSQL
25381 | DialectType::Fabric
25382 | DialectType::SQLite
25383 | DialectType::Redshift => DataType::Custom {
25384 name: "INT".to_string(),
25385 },
25386 _ => DataType::Int {
25387 length: None,
25388 integer_spelling: false,
25389 },
25390 };
25391 Ok(Expression::Cast(Box::new(Cast {
25392 this: inner,
25393 to: int_type,
25394 double_colon_syntax: false,
25395 trailing_comments: Vec::new(),
25396 format: None,
25397 default: None,
25398 inferred_type: None,
25399 })))
25400 } else {
25401 Ok(e)
25402 }
25403 }
25404
25405 Action::DiToDateConvert => {
25406 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
25407 if let Expression::Function(f) = e {
25408 let arg = f.args.into_iter().next().unwrap();
25409 match target {
25410 DialectType::DuckDB => {
25411 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
25412 let cast_text = Expression::Cast(Box::new(Cast {
25413 this: arg,
25414 to: DataType::Text,
25415 double_colon_syntax: false,
25416 trailing_comments: Vec::new(),
25417 format: None,
25418 default: None,
25419 inferred_type: None,
25420 }));
25421 let strptime = Expression::Function(Box::new(Function::new(
25422 "STRPTIME".to_string(),
25423 vec![cast_text, Expression::string("%Y%m%d")],
25424 )));
25425 Ok(Expression::Cast(Box::new(Cast {
25426 this: strptime,
25427 to: DataType::Date,
25428 double_colon_syntax: false,
25429 trailing_comments: Vec::new(),
25430 format: None,
25431 default: None,
25432 inferred_type: None,
25433 })))
25434 }
25435 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
25436 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
25437 let cast_str = Expression::Cast(Box::new(Cast {
25438 this: arg,
25439 to: DataType::Custom {
25440 name: "STRING".to_string(),
25441 },
25442 double_colon_syntax: false,
25443 trailing_comments: Vec::new(),
25444 format: None,
25445 default: None,
25446 inferred_type: None,
25447 }));
25448 Ok(Expression::Function(Box::new(Function::new(
25449 "TO_DATE".to_string(),
25450 vec![cast_str, Expression::string("yyyyMMdd")],
25451 ))))
25452 }
25453 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25454 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
25455 let cast_varchar = Expression::Cast(Box::new(Cast {
25456 this: arg,
25457 to: DataType::VarChar {
25458 length: None,
25459 parenthesized_length: false,
25460 },
25461 double_colon_syntax: false,
25462 trailing_comments: Vec::new(),
25463 format: None,
25464 default: None,
25465 inferred_type: None,
25466 }));
25467 let date_parse = Expression::Function(Box::new(Function::new(
25468 "DATE_PARSE".to_string(),
25469 vec![cast_varchar, Expression::string("%Y%m%d")],
25470 )));
25471 Ok(Expression::Cast(Box::new(Cast {
25472 this: date_parse,
25473 to: DataType::Date,
25474 double_colon_syntax: false,
25475 trailing_comments: Vec::new(),
25476 format: None,
25477 default: None,
25478 inferred_type: None,
25479 })))
25480 }
25481 DialectType::Drill => {
25482 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
25483 let cast_varchar = Expression::Cast(Box::new(Cast {
25484 this: arg,
25485 to: DataType::VarChar {
25486 length: None,
25487 parenthesized_length: false,
25488 },
25489 double_colon_syntax: false,
25490 trailing_comments: Vec::new(),
25491 format: None,
25492 default: None,
25493 inferred_type: None,
25494 }));
25495 Ok(Expression::Function(Box::new(Function::new(
25496 "TO_DATE".to_string(),
25497 vec![cast_varchar, Expression::string("yyyyMMdd")],
25498 ))))
25499 }
25500 _ => Ok(Expression::Function(Box::new(Function::new(
25501 "DI_TO_DATE".to_string(),
25502 vec![arg],
25503 )))),
25504 }
25505 } else {
25506 Ok(e)
25507 }
25508 }
25509
25510 Action::TsOrDiToDiConvert => {
25511 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
25512 if let Expression::Function(f) = e {
25513 let arg = f.args.into_iter().next().unwrap();
25514 let str_type = match target {
25515 DialectType::DuckDB => DataType::Text,
25516 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
25517 DataType::Custom {
25518 name: "STRING".to_string(),
25519 }
25520 }
25521 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25522 DataType::VarChar {
25523 length: None,
25524 parenthesized_length: false,
25525 }
25526 }
25527 _ => DataType::VarChar {
25528 length: None,
25529 parenthesized_length: false,
25530 },
25531 };
25532 let cast_str = Expression::Cast(Box::new(Cast {
25533 this: arg,
25534 to: str_type,
25535 double_colon_syntax: false,
25536 trailing_comments: Vec::new(),
25537 format: None,
25538 default: None,
25539 inferred_type: None,
25540 }));
25541 let replace_expr = Expression::Function(Box::new(Function::new(
25542 "REPLACE".to_string(),
25543 vec![cast_str, Expression::string("-"), Expression::string("")],
25544 )));
25545 let substr_name = match target {
25546 DialectType::DuckDB
25547 | DialectType::Hive
25548 | DialectType::Spark
25549 | DialectType::Databricks => "SUBSTR",
25550 _ => "SUBSTR",
25551 };
25552 let substr = Expression::Function(Box::new(Function::new(
25553 substr_name.to_string(),
25554 vec![replace_expr, Expression::number(1), Expression::number(8)],
25555 )));
25556 // Use INT (not INTEGER) for Presto/Trino etc.
25557 let int_type = match target {
25558 DialectType::Presto
25559 | DialectType::Trino
25560 | DialectType::Athena
25561 | DialectType::TSQL
25562 | DialectType::Fabric
25563 | DialectType::SQLite
25564 | DialectType::Redshift => DataType::Custom {
25565 name: "INT".to_string(),
25566 },
25567 _ => DataType::Int {
25568 length: None,
25569 integer_spelling: false,
25570 },
25571 };
25572 Ok(Expression::Cast(Box::new(Cast {
25573 this: substr,
25574 to: int_type,
25575 double_colon_syntax: false,
25576 trailing_comments: Vec::new(),
25577 format: None,
25578 default: None,
25579 inferred_type: None,
25580 })))
25581 } else {
25582 Ok(e)
25583 }
25584 }
25585
25586 Action::UnixToStrConvert => {
25587 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
25588 if let Expression::Function(f) = e {
25589 let mut args = f.args;
25590 let this = args.remove(0);
25591 let fmt_expr = if !args.is_empty() {
25592 Some(args.remove(0))
25593 } else {
25594 None
25595 };
25596
25597 // Check if format is a string literal
25598 let fmt_str = fmt_expr.as_ref().and_then(|f| {
25599 if let Expression::Literal(Literal::String(s)) = f {
25600 Some(s.clone())
25601 } else {
25602 None
25603 }
25604 });
25605
25606 if let Some(fmt_string) = fmt_str {
25607 // String literal format -> use UnixToStr expression (generator handles it)
25608 Ok(Expression::UnixToStr(Box::new(
25609 crate::expressions::UnixToStr {
25610 this: Box::new(this),
25611 format: Some(fmt_string),
25612 },
25613 )))
25614 } else if let Some(fmt_e) = fmt_expr {
25615 // Non-literal format (e.g., identifier `y`) -> build target expression directly
25616 match target {
25617 DialectType::DuckDB => {
25618 // STRFTIME(TO_TIMESTAMP(x), y)
25619 let to_ts = Expression::Function(Box::new(Function::new(
25620 "TO_TIMESTAMP".to_string(),
25621 vec![this],
25622 )));
25623 Ok(Expression::Function(Box::new(Function::new(
25624 "STRFTIME".to_string(),
25625 vec![to_ts, fmt_e],
25626 ))))
25627 }
25628 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25629 // DATE_FORMAT(FROM_UNIXTIME(x), y)
25630 let from_unix = Expression::Function(Box::new(Function::new(
25631 "FROM_UNIXTIME".to_string(),
25632 vec![this],
25633 )));
25634 Ok(Expression::Function(Box::new(Function::new(
25635 "DATE_FORMAT".to_string(),
25636 vec![from_unix, fmt_e],
25637 ))))
25638 }
25639 DialectType::Hive
25640 | DialectType::Spark
25641 | DialectType::Databricks
25642 | DialectType::Doris
25643 | DialectType::StarRocks => {
25644 // FROM_UNIXTIME(x, y)
25645 Ok(Expression::Function(Box::new(Function::new(
25646 "FROM_UNIXTIME".to_string(),
25647 vec![this, fmt_e],
25648 ))))
25649 }
25650 _ => {
25651 // Default: keep as UNIX_TO_STR(x, y)
25652 Ok(Expression::Function(Box::new(Function::new(
25653 "UNIX_TO_STR".to_string(),
25654 vec![this, fmt_e],
25655 ))))
25656 }
25657 }
25658 } else {
25659 Ok(Expression::UnixToStr(Box::new(
25660 crate::expressions::UnixToStr {
25661 this: Box::new(this),
25662 format: None,
25663 },
25664 )))
25665 }
25666 } else {
25667 Ok(e)
25668 }
25669 }
25670
25671 Action::UnixToTimeConvert => {
25672 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
25673 if let Expression::Function(f) = e {
25674 let arg = f.args.into_iter().next().unwrap();
25675 Ok(Expression::UnixToTime(Box::new(
25676 crate::expressions::UnixToTime {
25677 this: Box::new(arg),
25678 scale: None,
25679 zone: None,
25680 hours: None,
25681 minutes: None,
25682 format: None,
25683 target_type: None,
25684 },
25685 )))
25686 } else {
25687 Ok(e)
25688 }
25689 }
25690
25691 Action::UnixToTimeStrConvert => {
25692 // UNIX_TO_TIME_STR(x) -> dialect-specific
25693 if let Expression::Function(f) = e {
25694 let arg = f.args.into_iter().next().unwrap();
25695 match target {
25696 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
25697 // FROM_UNIXTIME(x)
25698 Ok(Expression::Function(Box::new(Function::new(
25699 "FROM_UNIXTIME".to_string(),
25700 vec![arg],
25701 ))))
25702 }
25703 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25704 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
25705 let from_unix = Expression::Function(Box::new(Function::new(
25706 "FROM_UNIXTIME".to_string(),
25707 vec![arg],
25708 )));
25709 Ok(Expression::Cast(Box::new(Cast {
25710 this: from_unix,
25711 to: DataType::VarChar {
25712 length: None,
25713 parenthesized_length: false,
25714 },
25715 double_colon_syntax: false,
25716 trailing_comments: Vec::new(),
25717 format: None,
25718 default: None,
25719 inferred_type: None,
25720 })))
25721 }
25722 DialectType::DuckDB => {
25723 // CAST(TO_TIMESTAMP(x) AS TEXT)
25724 let to_ts = Expression::Function(Box::new(Function::new(
25725 "TO_TIMESTAMP".to_string(),
25726 vec![arg],
25727 )));
25728 Ok(Expression::Cast(Box::new(Cast {
25729 this: to_ts,
25730 to: DataType::Text,
25731 double_colon_syntax: false,
25732 trailing_comments: Vec::new(),
25733 format: None,
25734 default: None,
25735 inferred_type: None,
25736 })))
25737 }
25738 _ => Ok(Expression::Function(Box::new(Function::new(
25739 "UNIX_TO_TIME_STR".to_string(),
25740 vec![arg],
25741 )))),
25742 }
25743 } else {
25744 Ok(e)
25745 }
25746 }
25747
25748 Action::TimeToUnixConvert => {
25749 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
25750 if let Expression::Function(f) = e {
25751 let arg = f.args.into_iter().next().unwrap();
25752 Ok(Expression::TimeToUnix(Box::new(
25753 crate::expressions::UnaryFunc {
25754 this: arg,
25755 original_name: None,
25756 inferred_type: None,
25757 },
25758 )))
25759 } else {
25760 Ok(e)
25761 }
25762 }
25763
25764 Action::TimeToStrConvert => {
25765 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
25766 if let Expression::Function(f) = e {
25767 let mut args = f.args;
25768 let this = args.remove(0);
25769 let fmt = match args.remove(0) {
25770 Expression::Literal(Literal::String(s)) => s,
25771 other => {
25772 return Ok(Expression::Function(Box::new(Function::new(
25773 "TIME_TO_STR".to_string(),
25774 vec![this, other],
25775 ))));
25776 }
25777 };
25778 Ok(Expression::TimeToStr(Box::new(
25779 crate::expressions::TimeToStr {
25780 this: Box::new(this),
25781 format: fmt,
25782 culture: None,
25783 zone: None,
25784 },
25785 )))
25786 } else {
25787 Ok(e)
25788 }
25789 }
25790
25791 Action::StrToUnixConvert => {
25792 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
25793 if let Expression::Function(f) = e {
25794 let mut args = f.args;
25795 let this = args.remove(0);
25796 let fmt = match args.remove(0) {
25797 Expression::Literal(Literal::String(s)) => s,
25798 other => {
25799 return Ok(Expression::Function(Box::new(Function::new(
25800 "STR_TO_UNIX".to_string(),
25801 vec![this, other],
25802 ))));
25803 }
25804 };
25805 Ok(Expression::StrToUnix(Box::new(
25806 crate::expressions::StrToUnix {
25807 this: Some(Box::new(this)),
25808 format: Some(fmt),
25809 },
25810 )))
25811 } else {
25812 Ok(e)
25813 }
25814 }
25815
25816 Action::TimeStrToUnixConvert => {
25817 // TIME_STR_TO_UNIX(x) -> dialect-specific
25818 if let Expression::Function(f) = e {
25819 let arg = f.args.into_iter().next().unwrap();
25820 match target {
25821 DialectType::DuckDB => {
25822 // EPOCH(CAST(x AS TIMESTAMP))
25823 let cast_ts = Expression::Cast(Box::new(Cast {
25824 this: arg,
25825 to: DataType::Timestamp {
25826 timezone: false,
25827 precision: None,
25828 },
25829 double_colon_syntax: false,
25830 trailing_comments: Vec::new(),
25831 format: None,
25832 default: None,
25833 inferred_type: None,
25834 }));
25835 Ok(Expression::Function(Box::new(Function::new(
25836 "EPOCH".to_string(),
25837 vec![cast_ts],
25838 ))))
25839 }
25840 DialectType::Hive
25841 | DialectType::Doris
25842 | DialectType::StarRocks
25843 | DialectType::MySQL => {
25844 // UNIX_TIMESTAMP(x)
25845 Ok(Expression::Function(Box::new(Function::new(
25846 "UNIX_TIMESTAMP".to_string(),
25847 vec![arg],
25848 ))))
25849 }
25850 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25851 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
25852 let date_parse = Expression::Function(Box::new(Function::new(
25853 "DATE_PARSE".to_string(),
25854 vec![arg, Expression::string("%Y-%m-%d %T")],
25855 )));
25856 Ok(Expression::Function(Box::new(Function::new(
25857 "TO_UNIXTIME".to_string(),
25858 vec![date_parse],
25859 ))))
25860 }
25861 _ => Ok(Expression::Function(Box::new(Function::new(
25862 "TIME_STR_TO_UNIX".to_string(),
25863 vec![arg],
25864 )))),
25865 }
25866 } else {
25867 Ok(e)
25868 }
25869 }
25870
25871 Action::TimeToTimeStrConvert => {
25872 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
25873 if let Expression::Function(f) = e {
25874 let arg = f.args.into_iter().next().unwrap();
25875 let str_type = match target {
25876 DialectType::DuckDB => DataType::Text,
25877 DialectType::Hive
25878 | DialectType::Spark
25879 | DialectType::Databricks
25880 | DialectType::Doris
25881 | DialectType::StarRocks => DataType::Custom {
25882 name: "STRING".to_string(),
25883 },
25884 DialectType::Redshift => DataType::Custom {
25885 name: "VARCHAR(MAX)".to_string(),
25886 },
25887 _ => DataType::VarChar {
25888 length: None,
25889 parenthesized_length: false,
25890 },
25891 };
25892 Ok(Expression::Cast(Box::new(Cast {
25893 this: arg,
25894 to: str_type,
25895 double_colon_syntax: false,
25896 trailing_comments: Vec::new(),
25897 format: None,
25898 default: None,
25899 inferred_type: None,
25900 })))
25901 } else {
25902 Ok(e)
25903 }
25904 }
25905
25906 Action::DateTruncSwapArgs => {
25907 // DATE_TRUNC('unit', x) from Generic -> target-specific
25908 if let Expression::Function(f) = e {
25909 if f.args.len() == 2 {
25910 let unit_arg = f.args[0].clone();
25911 let expr_arg = f.args[1].clone();
25912 // Extract unit string from the first arg
25913 let unit_str = match &unit_arg {
25914 Expression::Literal(Literal::String(s)) => s.to_ascii_uppercase(),
25915 _ => return Ok(Expression::Function(f)),
25916 };
25917 match target {
25918 DialectType::BigQuery => {
25919 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
25920 let unit_ident =
25921 Expression::Column(Box::new(crate::expressions::Column {
25922 name: crate::expressions::Identifier::new(unit_str),
25923 table: None,
25924 join_mark: false,
25925 trailing_comments: Vec::new(),
25926 span: None,
25927 inferred_type: None,
25928 }));
25929 Ok(Expression::Function(Box::new(Function::new(
25930 "DATE_TRUNC".to_string(),
25931 vec![expr_arg, unit_ident],
25932 ))))
25933 }
25934 DialectType::Doris => {
25935 // Doris: DATE_TRUNC(x, 'UNIT')
25936 Ok(Expression::Function(Box::new(Function::new(
25937 "DATE_TRUNC".to_string(),
25938 vec![expr_arg, Expression::string(&unit_str)],
25939 ))))
25940 }
25941 DialectType::StarRocks => {
25942 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
25943 Ok(Expression::Function(Box::new(Function::new(
25944 "DATE_TRUNC".to_string(),
25945 vec![Expression::string(&unit_str), expr_arg],
25946 ))))
25947 }
25948 DialectType::Spark | DialectType::Databricks => {
25949 // Spark: TRUNC(x, 'UNIT')
25950 Ok(Expression::Function(Box::new(Function::new(
25951 "TRUNC".to_string(),
25952 vec![expr_arg, Expression::string(&unit_str)],
25953 ))))
25954 }
25955 DialectType::MySQL => {
25956 // MySQL: complex expansion based on unit
25957 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
25958 }
25959 _ => Ok(Expression::Function(f)),
25960 }
25961 } else {
25962 Ok(Expression::Function(f))
25963 }
25964 } else {
25965 Ok(e)
25966 }
25967 }
25968
25969 Action::TimestampTruncConvert => {
25970 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
25971 if let Expression::Function(f) = e {
25972 if f.args.len() >= 2 {
25973 let expr_arg = f.args[0].clone();
25974 let unit_arg = f.args[1].clone();
25975 let tz_arg = if f.args.len() >= 3 {
25976 Some(f.args[2].clone())
25977 } else {
25978 None
25979 };
25980 // Extract unit string
25981 let unit_str = match &unit_arg {
25982 Expression::Literal(Literal::String(s)) => s.to_ascii_uppercase(),
25983 Expression::Column(c) => c.name.name.to_ascii_uppercase(),
25984 _ => {
25985 return Ok(Expression::Function(f));
25986 }
25987 };
25988 match target {
25989 DialectType::Spark | DialectType::Databricks => {
25990 // Spark: DATE_TRUNC('UNIT', x)
25991 Ok(Expression::Function(Box::new(Function::new(
25992 "DATE_TRUNC".to_string(),
25993 vec![Expression::string(&unit_str), expr_arg],
25994 ))))
25995 }
25996 DialectType::Doris | DialectType::StarRocks => {
25997 // Doris: DATE_TRUNC(x, 'UNIT')
25998 Ok(Expression::Function(Box::new(Function::new(
25999 "DATE_TRUNC".to_string(),
26000 vec![expr_arg, Expression::string(&unit_str)],
26001 ))))
26002 }
26003 DialectType::BigQuery => {
26004 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
26005 let unit_ident =
26006 Expression::Column(Box::new(crate::expressions::Column {
26007 name: crate::expressions::Identifier::new(unit_str),
26008 table: None,
26009 join_mark: false,
26010 trailing_comments: Vec::new(),
26011 span: None,
26012 inferred_type: None,
26013 }));
26014 let mut args = vec![expr_arg, unit_ident];
26015 if let Some(tz) = tz_arg {
26016 args.push(tz);
26017 }
26018 Ok(Expression::Function(Box::new(Function::new(
26019 "TIMESTAMP_TRUNC".to_string(),
26020 args,
26021 ))))
26022 }
26023 DialectType::DuckDB => {
26024 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
26025 if let Some(tz) = tz_arg {
26026 let tz_str = match &tz {
26027 Expression::Literal(Literal::String(s)) => s.clone(),
26028 _ => "UTC".to_string(),
26029 };
26030 // x AT TIME ZONE 'tz'
26031 let at_tz = Expression::AtTimeZone(Box::new(
26032 crate::expressions::AtTimeZone {
26033 this: expr_arg,
26034 zone: Expression::string(&tz_str),
26035 },
26036 ));
26037 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
26038 let trunc = Expression::Function(Box::new(Function::new(
26039 "DATE_TRUNC".to_string(),
26040 vec![Expression::string(&unit_str), at_tz],
26041 )));
26042 // DATE_TRUNC(...) AT TIME ZONE 'tz'
26043 Ok(Expression::AtTimeZone(Box::new(
26044 crate::expressions::AtTimeZone {
26045 this: trunc,
26046 zone: Expression::string(&tz_str),
26047 },
26048 )))
26049 } else {
26050 Ok(Expression::Function(Box::new(Function::new(
26051 "DATE_TRUNC".to_string(),
26052 vec![Expression::string(&unit_str), expr_arg],
26053 ))))
26054 }
26055 }
26056 DialectType::Presto
26057 | DialectType::Trino
26058 | DialectType::Athena
26059 | DialectType::Snowflake => {
26060 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
26061 Ok(Expression::Function(Box::new(Function::new(
26062 "DATE_TRUNC".to_string(),
26063 vec![Expression::string(&unit_str), expr_arg],
26064 ))))
26065 }
26066 _ => {
26067 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
26068 let mut args = vec![Expression::string(&unit_str), expr_arg];
26069 if let Some(tz) = tz_arg {
26070 args.push(tz);
26071 }
26072 Ok(Expression::Function(Box::new(Function::new(
26073 "DATE_TRUNC".to_string(),
26074 args,
26075 ))))
26076 }
26077 }
26078 } else {
26079 Ok(Expression::Function(f))
26080 }
26081 } else {
26082 Ok(e)
26083 }
26084 }
26085
26086 Action::StrToDateConvert => {
26087 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
26088 if let Expression::Function(f) = e {
26089 if f.args.len() == 2 {
26090 let mut args = f.args;
26091 let this = args.remove(0);
26092 let fmt_expr = args.remove(0);
26093 let fmt_str = match &fmt_expr {
26094 Expression::Literal(Literal::String(s)) => Some(s.clone()),
26095 _ => None,
26096 };
26097 let default_date = "%Y-%m-%d";
26098 let default_time = "%Y-%m-%d %H:%M:%S";
26099 let is_default = fmt_str
26100 .as_ref()
26101 .map_or(false, |f| f == default_date || f == default_time);
26102
26103 if is_default {
26104 // Default format: handle per-dialect
26105 match target {
26106 DialectType::MySQL
26107 | DialectType::Doris
26108 | DialectType::StarRocks => {
26109 // Keep STR_TO_DATE(x, fmt) as-is
26110 Ok(Expression::Function(Box::new(Function::new(
26111 "STR_TO_DATE".to_string(),
26112 vec![this, fmt_expr],
26113 ))))
26114 }
26115 DialectType::Hive => {
26116 // Hive: CAST(x AS DATE)
26117 Ok(Expression::Cast(Box::new(Cast {
26118 this,
26119 to: DataType::Date,
26120 double_colon_syntax: false,
26121 trailing_comments: Vec::new(),
26122 format: None,
26123 default: None,
26124 inferred_type: None,
26125 })))
26126 }
26127 DialectType::Presto
26128 | DialectType::Trino
26129 | DialectType::Athena => {
26130 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
26131 let date_parse =
26132 Expression::Function(Box::new(Function::new(
26133 "DATE_PARSE".to_string(),
26134 vec![this, fmt_expr],
26135 )));
26136 Ok(Expression::Cast(Box::new(Cast {
26137 this: date_parse,
26138 to: DataType::Date,
26139 double_colon_syntax: false,
26140 trailing_comments: Vec::new(),
26141 format: None,
26142 default: None,
26143 inferred_type: None,
26144 })))
26145 }
26146 _ => {
26147 // Others: TsOrDsToDate (delegates to generator)
26148 Ok(Expression::TsOrDsToDate(Box::new(
26149 crate::expressions::TsOrDsToDate {
26150 this: Box::new(this),
26151 format: None,
26152 safe: None,
26153 },
26154 )))
26155 }
26156 }
26157 } else if let Some(fmt) = fmt_str {
26158 match target {
26159 DialectType::Doris
26160 | DialectType::StarRocks
26161 | DialectType::MySQL => {
26162 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
26163 let mut normalized = fmt.clone();
26164 normalized = normalized.replace("%-d", "%e");
26165 normalized = normalized.replace("%-m", "%c");
26166 normalized = normalized.replace("%H:%M:%S", "%T");
26167 Ok(Expression::Function(Box::new(Function::new(
26168 "STR_TO_DATE".to_string(),
26169 vec![this, Expression::string(&normalized)],
26170 ))))
26171 }
26172 DialectType::Hive => {
26173 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
26174 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
26175 let unix_ts =
26176 Expression::Function(Box::new(Function::new(
26177 "UNIX_TIMESTAMP".to_string(),
26178 vec![this, Expression::string(&java_fmt)],
26179 )));
26180 let from_unix =
26181 Expression::Function(Box::new(Function::new(
26182 "FROM_UNIXTIME".to_string(),
26183 vec![unix_ts],
26184 )));
26185 Ok(Expression::Cast(Box::new(Cast {
26186 this: from_unix,
26187 to: DataType::Date,
26188 double_colon_syntax: false,
26189 trailing_comments: Vec::new(),
26190 format: None,
26191 default: None,
26192 inferred_type: None,
26193 })))
26194 }
26195 DialectType::Spark | DialectType::Databricks => {
26196 // Spark: TO_DATE(x, java_fmt)
26197 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
26198 Ok(Expression::Function(Box::new(Function::new(
26199 "TO_DATE".to_string(),
26200 vec![this, Expression::string(&java_fmt)],
26201 ))))
26202 }
26203 DialectType::Drill => {
26204 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
26205 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
26206 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
26207 let java_fmt = java_fmt.replace('T', "'T'");
26208 Ok(Expression::Function(Box::new(Function::new(
26209 "TO_DATE".to_string(),
26210 vec![this, Expression::string(&java_fmt)],
26211 ))))
26212 }
26213 _ => {
26214 // For other dialects: use TsOrDsToDate which delegates to generator
26215 Ok(Expression::TsOrDsToDate(Box::new(
26216 crate::expressions::TsOrDsToDate {
26217 this: Box::new(this),
26218 format: Some(fmt),
26219 safe: None,
26220 },
26221 )))
26222 }
26223 }
26224 } else {
26225 // Non-string format - keep as-is
26226 let mut new_args = Vec::new();
26227 new_args.push(this);
26228 new_args.push(fmt_expr);
26229 Ok(Expression::Function(Box::new(Function::new(
26230 "STR_TO_DATE".to_string(),
26231 new_args,
26232 ))))
26233 }
26234 } else {
26235 Ok(Expression::Function(f))
26236 }
26237 } else {
26238 Ok(e)
26239 }
26240 }
26241
26242 Action::TsOrDsAddConvert => {
26243 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
26244 if let Expression::Function(f) = e {
26245 if f.args.len() == 3 {
26246 let mut args = f.args;
26247 let x = args.remove(0);
26248 let n = args.remove(0);
26249 let unit_expr = args.remove(0);
26250 let unit_str = match &unit_expr {
26251 Expression::Literal(Literal::String(s)) => s.to_ascii_uppercase(),
26252 _ => "DAY".to_string(),
26253 };
26254
26255 match target {
26256 DialectType::Hive
26257 | DialectType::Spark
26258 | DialectType::Databricks => {
26259 // DATE_ADD(x, n) - only supports DAY unit
26260 Ok(Expression::Function(Box::new(Function::new(
26261 "DATE_ADD".to_string(),
26262 vec![x, n],
26263 ))))
26264 }
26265 DialectType::MySQL => {
26266 // DATE_ADD(x, INTERVAL n UNIT)
26267 let iu = match unit_str.as_str() {
26268 "YEAR" => crate::expressions::IntervalUnit::Year,
26269 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
26270 "MONTH" => crate::expressions::IntervalUnit::Month,
26271 "WEEK" => crate::expressions::IntervalUnit::Week,
26272 "HOUR" => crate::expressions::IntervalUnit::Hour,
26273 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26274 "SECOND" => crate::expressions::IntervalUnit::Second,
26275 _ => crate::expressions::IntervalUnit::Day,
26276 };
26277 let interval = Expression::Interval(Box::new(
26278 crate::expressions::Interval {
26279 this: Some(n),
26280 unit: Some(
26281 crate::expressions::IntervalUnitSpec::Simple {
26282 unit: iu,
26283 use_plural: false,
26284 },
26285 ),
26286 },
26287 ));
26288 Ok(Expression::Function(Box::new(Function::new(
26289 "DATE_ADD".to_string(),
26290 vec![x, interval],
26291 ))))
26292 }
26293 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26294 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
26295 let cast_ts = Expression::Cast(Box::new(Cast {
26296 this: x,
26297 to: DataType::Timestamp {
26298 precision: None,
26299 timezone: false,
26300 },
26301 double_colon_syntax: false,
26302 trailing_comments: Vec::new(),
26303 format: None,
26304 default: None,
26305 inferred_type: None,
26306 }));
26307 let cast_date = Expression::Cast(Box::new(Cast {
26308 this: cast_ts,
26309 to: DataType::Date,
26310 double_colon_syntax: false,
26311 trailing_comments: Vec::new(),
26312 format: None,
26313 default: None,
26314 inferred_type: None,
26315 }));
26316 Ok(Expression::Function(Box::new(Function::new(
26317 "DATE_ADD".to_string(),
26318 vec![Expression::string(&unit_str), n, cast_date],
26319 ))))
26320 }
26321 DialectType::DuckDB => {
26322 // CAST(x AS DATE) + INTERVAL n UNIT
26323 let cast_date = Expression::Cast(Box::new(Cast {
26324 this: x,
26325 to: DataType::Date,
26326 double_colon_syntax: false,
26327 trailing_comments: Vec::new(),
26328 format: None,
26329 default: None,
26330 inferred_type: None,
26331 }));
26332 let iu = match unit_str.as_str() {
26333 "YEAR" => crate::expressions::IntervalUnit::Year,
26334 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
26335 "MONTH" => crate::expressions::IntervalUnit::Month,
26336 "WEEK" => crate::expressions::IntervalUnit::Week,
26337 "HOUR" => crate::expressions::IntervalUnit::Hour,
26338 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26339 "SECOND" => crate::expressions::IntervalUnit::Second,
26340 _ => crate::expressions::IntervalUnit::Day,
26341 };
26342 let interval = Expression::Interval(Box::new(
26343 crate::expressions::Interval {
26344 this: Some(n),
26345 unit: Some(
26346 crate::expressions::IntervalUnitSpec::Simple {
26347 unit: iu,
26348 use_plural: false,
26349 },
26350 ),
26351 },
26352 ));
26353 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
26354 left: cast_date,
26355 right: interval,
26356 left_comments: Vec::new(),
26357 operator_comments: Vec::new(),
26358 trailing_comments: Vec::new(),
26359 inferred_type: None,
26360 })))
26361 }
26362 DialectType::Drill => {
26363 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
26364 let cast_date = Expression::Cast(Box::new(Cast {
26365 this: x,
26366 to: DataType::Date,
26367 double_colon_syntax: false,
26368 trailing_comments: Vec::new(),
26369 format: None,
26370 default: None,
26371 inferred_type: None,
26372 }));
26373 let iu = match unit_str.as_str() {
26374 "YEAR" => crate::expressions::IntervalUnit::Year,
26375 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
26376 "MONTH" => crate::expressions::IntervalUnit::Month,
26377 "WEEK" => crate::expressions::IntervalUnit::Week,
26378 "HOUR" => crate::expressions::IntervalUnit::Hour,
26379 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26380 "SECOND" => crate::expressions::IntervalUnit::Second,
26381 _ => crate::expressions::IntervalUnit::Day,
26382 };
26383 let interval = Expression::Interval(Box::new(
26384 crate::expressions::Interval {
26385 this: Some(n),
26386 unit: Some(
26387 crate::expressions::IntervalUnitSpec::Simple {
26388 unit: iu,
26389 use_plural: false,
26390 },
26391 ),
26392 },
26393 ));
26394 Ok(Expression::Function(Box::new(Function::new(
26395 "DATE_ADD".to_string(),
26396 vec![cast_date, interval],
26397 ))))
26398 }
26399 _ => {
26400 // Default: keep as TS_OR_DS_ADD
26401 Ok(Expression::Function(Box::new(Function::new(
26402 "TS_OR_DS_ADD".to_string(),
26403 vec![x, n, unit_expr],
26404 ))))
26405 }
26406 }
26407 } else {
26408 Ok(Expression::Function(f))
26409 }
26410 } else {
26411 Ok(e)
26412 }
26413 }
26414
26415 Action::DateFromUnixDateConvert => {
26416 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
26417 if let Expression::Function(f) = e {
26418 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
26419 if matches!(
26420 target,
26421 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
26422 ) {
26423 return Ok(Expression::Function(Box::new(Function::new(
26424 "DATE_FROM_UNIX_DATE".to_string(),
26425 f.args,
26426 ))));
26427 }
26428 let n = f.args.into_iter().next().unwrap();
26429 let epoch_date = Expression::Cast(Box::new(Cast {
26430 this: Expression::string("1970-01-01"),
26431 to: DataType::Date,
26432 double_colon_syntax: false,
26433 trailing_comments: Vec::new(),
26434 format: None,
26435 default: None,
26436 inferred_type: None,
26437 }));
26438 match target {
26439 DialectType::DuckDB => {
26440 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
26441 let interval =
26442 Expression::Interval(Box::new(crate::expressions::Interval {
26443 this: Some(n),
26444 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26445 unit: crate::expressions::IntervalUnit::Day,
26446 use_plural: false,
26447 }),
26448 }));
26449 Ok(Expression::Add(Box::new(
26450 crate::expressions::BinaryOp::new(epoch_date, interval),
26451 )))
26452 }
26453 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26454 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
26455 Ok(Expression::Function(Box::new(Function::new(
26456 "DATE_ADD".to_string(),
26457 vec![Expression::string("DAY"), n, epoch_date],
26458 ))))
26459 }
26460 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
26461 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
26462 Ok(Expression::Function(Box::new(Function::new(
26463 "DATEADD".to_string(),
26464 vec![
26465 Expression::Identifier(Identifier::new("DAY")),
26466 n,
26467 epoch_date,
26468 ],
26469 ))))
26470 }
26471 DialectType::BigQuery => {
26472 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
26473 let interval =
26474 Expression::Interval(Box::new(crate::expressions::Interval {
26475 this: Some(n),
26476 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26477 unit: crate::expressions::IntervalUnit::Day,
26478 use_plural: false,
26479 }),
26480 }));
26481 Ok(Expression::Function(Box::new(Function::new(
26482 "DATE_ADD".to_string(),
26483 vec![epoch_date, interval],
26484 ))))
26485 }
26486 DialectType::MySQL
26487 | DialectType::Doris
26488 | DialectType::StarRocks
26489 | DialectType::Drill => {
26490 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
26491 let interval =
26492 Expression::Interval(Box::new(crate::expressions::Interval {
26493 this: Some(n),
26494 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26495 unit: crate::expressions::IntervalUnit::Day,
26496 use_plural: false,
26497 }),
26498 }));
26499 Ok(Expression::Function(Box::new(Function::new(
26500 "DATE_ADD".to_string(),
26501 vec![epoch_date, interval],
26502 ))))
26503 }
26504 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
26505 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
26506 Ok(Expression::Function(Box::new(Function::new(
26507 "DATE_ADD".to_string(),
26508 vec![epoch_date, n],
26509 ))))
26510 }
26511 DialectType::PostgreSQL
26512 | DialectType::Materialize
26513 | DialectType::RisingWave => {
26514 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
26515 let n_str = match &n {
26516 Expression::Literal(Literal::Number(s)) => s.clone(),
26517 _ => Self::expr_to_string_static(&n),
26518 };
26519 let interval =
26520 Expression::Interval(Box::new(crate::expressions::Interval {
26521 this: Some(Expression::string(&format!("{} DAY", n_str))),
26522 unit: None,
26523 }));
26524 Ok(Expression::Add(Box::new(
26525 crate::expressions::BinaryOp::new(epoch_date, interval),
26526 )))
26527 }
26528 _ => {
26529 // Default: keep as-is
26530 Ok(Expression::Function(Box::new(Function::new(
26531 "DATE_FROM_UNIX_DATE".to_string(),
26532 vec![n],
26533 ))))
26534 }
26535 }
26536 } else {
26537 Ok(e)
26538 }
26539 }
26540
26541 Action::ArrayRemoveConvert => {
26542 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
26543 if let Expression::ArrayRemove(bf) = e {
26544 let arr = bf.this;
26545 let target_val = bf.expression;
26546 match target {
26547 DialectType::DuckDB => {
26548 let u_id = crate::expressions::Identifier::new("_u");
26549 let lambda =
26550 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26551 parameters: vec![u_id.clone()],
26552 body: Expression::Neq(Box::new(BinaryOp {
26553 left: Expression::Identifier(u_id),
26554 right: target_val,
26555 left_comments: Vec::new(),
26556 operator_comments: Vec::new(),
26557 trailing_comments: Vec::new(),
26558 inferred_type: None,
26559 })),
26560 colon: false,
26561 parameter_types: Vec::new(),
26562 }));
26563 Ok(Expression::Function(Box::new(Function::new(
26564 "LIST_FILTER".to_string(),
26565 vec![arr, lambda],
26566 ))))
26567 }
26568 DialectType::ClickHouse => {
26569 let u_id = crate::expressions::Identifier::new("_u");
26570 let lambda =
26571 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26572 parameters: vec![u_id.clone()],
26573 body: Expression::Neq(Box::new(BinaryOp {
26574 left: Expression::Identifier(u_id),
26575 right: target_val,
26576 left_comments: Vec::new(),
26577 operator_comments: Vec::new(),
26578 trailing_comments: Vec::new(),
26579 inferred_type: None,
26580 })),
26581 colon: false,
26582 parameter_types: Vec::new(),
26583 }));
26584 Ok(Expression::Function(Box::new(Function::new(
26585 "arrayFilter".to_string(),
26586 vec![lambda, arr],
26587 ))))
26588 }
26589 DialectType::BigQuery => {
26590 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
26591 let u_id = crate::expressions::Identifier::new("_u");
26592 let u_col = Expression::Column(Box::new(crate::expressions::Column {
26593 name: u_id.clone(),
26594 table: None,
26595 join_mark: false,
26596 trailing_comments: Vec::new(),
26597 span: None,
26598 inferred_type: None,
26599 }));
26600 let unnest_expr =
26601 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
26602 this: arr,
26603 expressions: Vec::new(),
26604 with_ordinality: false,
26605 alias: None,
26606 offset_alias: None,
26607 }));
26608 let aliased_unnest =
26609 Expression::Alias(Box::new(crate::expressions::Alias {
26610 this: unnest_expr,
26611 alias: u_id.clone(),
26612 column_aliases: Vec::new(),
26613 pre_alias_comments: Vec::new(),
26614 trailing_comments: Vec::new(),
26615 inferred_type: None,
26616 }));
26617 let where_cond = Expression::Neq(Box::new(BinaryOp {
26618 left: u_col.clone(),
26619 right: target_val,
26620 left_comments: Vec::new(),
26621 operator_comments: Vec::new(),
26622 trailing_comments: Vec::new(),
26623 inferred_type: None,
26624 }));
26625 let subquery = Expression::Select(Box::new(
26626 crate::expressions::Select::new()
26627 .column(u_col)
26628 .from(aliased_unnest)
26629 .where_(where_cond),
26630 ));
26631 Ok(Expression::ArrayFunc(Box::new(
26632 crate::expressions::ArrayConstructor {
26633 expressions: vec![subquery],
26634 bracket_notation: false,
26635 use_list_keyword: false,
26636 },
26637 )))
26638 }
26639 _ => Ok(Expression::ArrayRemove(Box::new(
26640 crate::expressions::BinaryFunc {
26641 original_name: None,
26642 this: arr,
26643 expression: target_val,
26644 inferred_type: None,
26645 },
26646 ))),
26647 }
26648 } else {
26649 Ok(e)
26650 }
26651 }
26652
26653 Action::ArrayReverseConvert => {
26654 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
26655 if let Expression::ArrayReverse(af) = e {
26656 Ok(Expression::Function(Box::new(Function::new(
26657 "arrayReverse".to_string(),
26658 vec![af.this],
26659 ))))
26660 } else {
26661 Ok(e)
26662 }
26663 }
26664
26665 Action::JsonKeysConvert => {
26666 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
26667 if let Expression::JsonKeys(uf) = e {
26668 match target {
26669 DialectType::Spark | DialectType::Databricks => {
26670 Ok(Expression::Function(Box::new(Function::new(
26671 "JSON_OBJECT_KEYS".to_string(),
26672 vec![uf.this],
26673 ))))
26674 }
26675 DialectType::Snowflake => Ok(Expression::Function(Box::new(
26676 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
26677 ))),
26678 _ => Ok(Expression::JsonKeys(uf)),
26679 }
26680 } else {
26681 Ok(e)
26682 }
26683 }
26684
26685 Action::ParseJsonStrip => {
26686 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
26687 if let Expression::ParseJson(uf) = e {
26688 Ok(uf.this)
26689 } else {
26690 Ok(e)
26691 }
26692 }
26693
26694 Action::ArraySizeDrill => {
26695 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
26696 if let Expression::ArraySize(uf) = e {
26697 Ok(Expression::Function(Box::new(Function::new(
26698 "REPEATED_COUNT".to_string(),
26699 vec![uf.this],
26700 ))))
26701 } else {
26702 Ok(e)
26703 }
26704 }
26705
26706 Action::WeekOfYearToWeekIso => {
26707 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
26708 if let Expression::WeekOfYear(uf) = e {
26709 Ok(Expression::Function(Box::new(Function::new(
26710 "WEEKISO".to_string(),
26711 vec![uf.this],
26712 ))))
26713 } else {
26714 Ok(e)
26715 }
26716 }
26717 }
26718 })
26719 }
26720
26721 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
26722 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
26723 use crate::expressions::Function;
26724 match unit {
26725 "DAY" => {
26726 // DATE(x)
26727 Ok(Expression::Function(Box::new(Function::new(
26728 "DATE".to_string(),
26729 vec![expr.clone()],
26730 ))))
26731 }
26732 "WEEK" => {
26733 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
26734 let year_x = Expression::Function(Box::new(Function::new(
26735 "YEAR".to_string(),
26736 vec![expr.clone()],
26737 )));
26738 let week_x = Expression::Function(Box::new(Function::new(
26739 "WEEK".to_string(),
26740 vec![expr.clone(), Expression::number(1)],
26741 )));
26742 let concat_args = vec![
26743 year_x,
26744 Expression::string(" "),
26745 week_x,
26746 Expression::string(" 1"),
26747 ];
26748 let concat = Expression::Function(Box::new(Function::new(
26749 "CONCAT".to_string(),
26750 concat_args,
26751 )));
26752 Ok(Expression::Function(Box::new(Function::new(
26753 "STR_TO_DATE".to_string(),
26754 vec![concat, Expression::string("%Y %u %w")],
26755 ))))
26756 }
26757 "MONTH" => {
26758 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
26759 let year_x = Expression::Function(Box::new(Function::new(
26760 "YEAR".to_string(),
26761 vec![expr.clone()],
26762 )));
26763 let month_x = Expression::Function(Box::new(Function::new(
26764 "MONTH".to_string(),
26765 vec![expr.clone()],
26766 )));
26767 let concat_args = vec![
26768 year_x,
26769 Expression::string(" "),
26770 month_x,
26771 Expression::string(" 1"),
26772 ];
26773 let concat = Expression::Function(Box::new(Function::new(
26774 "CONCAT".to_string(),
26775 concat_args,
26776 )));
26777 Ok(Expression::Function(Box::new(Function::new(
26778 "STR_TO_DATE".to_string(),
26779 vec![concat, Expression::string("%Y %c %e")],
26780 ))))
26781 }
26782 "QUARTER" => {
26783 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
26784 let year_x = Expression::Function(Box::new(Function::new(
26785 "YEAR".to_string(),
26786 vec![expr.clone()],
26787 )));
26788 let quarter_x = Expression::Function(Box::new(Function::new(
26789 "QUARTER".to_string(),
26790 vec![expr.clone()],
26791 )));
26792 // QUARTER(x) * 3 - 2
26793 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
26794 left: quarter_x,
26795 right: Expression::number(3),
26796 left_comments: Vec::new(),
26797 operator_comments: Vec::new(),
26798 trailing_comments: Vec::new(),
26799 inferred_type: None,
26800 }));
26801 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
26802 left: mul,
26803 right: Expression::number(2),
26804 left_comments: Vec::new(),
26805 operator_comments: Vec::new(),
26806 trailing_comments: Vec::new(),
26807 inferred_type: None,
26808 }));
26809 let concat_args = vec![
26810 year_x,
26811 Expression::string(" "),
26812 sub,
26813 Expression::string(" 1"),
26814 ];
26815 let concat = Expression::Function(Box::new(Function::new(
26816 "CONCAT".to_string(),
26817 concat_args,
26818 )));
26819 Ok(Expression::Function(Box::new(Function::new(
26820 "STR_TO_DATE".to_string(),
26821 vec![concat, Expression::string("%Y %c %e")],
26822 ))))
26823 }
26824 "YEAR" => {
26825 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
26826 let year_x = Expression::Function(Box::new(Function::new(
26827 "YEAR".to_string(),
26828 vec![expr.clone()],
26829 )));
26830 let concat_args = vec![year_x, Expression::string(" 1 1")];
26831 let concat = Expression::Function(Box::new(Function::new(
26832 "CONCAT".to_string(),
26833 concat_args,
26834 )));
26835 Ok(Expression::Function(Box::new(Function::new(
26836 "STR_TO_DATE".to_string(),
26837 vec![concat, Expression::string("%Y %c %e")],
26838 ))))
26839 }
26840 _ => {
26841 // Unsupported unit -> keep as DATE_TRUNC
26842 Ok(Expression::Function(Box::new(Function::new(
26843 "DATE_TRUNC".to_string(),
26844 vec![Expression::string(unit), expr.clone()],
26845 ))))
26846 }
26847 }
26848 }
26849
26850 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
26851 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
26852 use crate::expressions::DataType;
26853 match dt {
26854 DataType::VarChar { .. } | DataType::Char { .. } => true,
26855 DataType::Struct { fields, .. } => fields
26856 .iter()
26857 .any(|f| Self::has_varchar_char_type(&f.data_type)),
26858 _ => false,
26859 }
26860 }
26861
26862 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
26863 fn normalize_varchar_to_string(
26864 dt: crate::expressions::DataType,
26865 ) -> crate::expressions::DataType {
26866 use crate::expressions::DataType;
26867 match dt {
26868 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
26869 name: "STRING".to_string(),
26870 },
26871 DataType::Struct { fields, nested } => {
26872 let fields = fields
26873 .into_iter()
26874 .map(|mut f| {
26875 f.data_type = Self::normalize_varchar_to_string(f.data_type);
26876 f
26877 })
26878 .collect();
26879 DataType::Struct { fields, nested }
26880 }
26881 other => other,
26882 }
26883 }
26884
26885 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
26886 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
26887 if let Expression::Literal(crate::expressions::Literal::String(ref s)) = expr {
26888 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
26889 let trimmed = s.trim();
26890
26891 // Find where digits end and unit text begins
26892 let digit_end = trimmed
26893 .find(|c: char| !c.is_ascii_digit())
26894 .unwrap_or(trimmed.len());
26895 if digit_end == 0 || digit_end == trimmed.len() {
26896 return expr;
26897 }
26898 let num = &trimmed[..digit_end];
26899 let unit_text = trimmed[digit_end..].trim().to_ascii_uppercase();
26900 if unit_text.is_empty() {
26901 return expr;
26902 }
26903
26904 let known_units = [
26905 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS", "WEEK",
26906 "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
26907 ];
26908 if !known_units.contains(&unit_text.as_str()) {
26909 return expr;
26910 }
26911
26912 let unit_str = unit_text.clone();
26913 // Singularize
26914 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
26915 &unit_str[..unit_str.len() - 1]
26916 } else {
26917 &unit_str
26918 };
26919 let unit = unit_singular;
26920
26921 match target {
26922 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26923 // INTERVAL '2' DAY
26924 let iu = match unit {
26925 "DAY" => crate::expressions::IntervalUnit::Day,
26926 "HOUR" => crate::expressions::IntervalUnit::Hour,
26927 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26928 "SECOND" => crate::expressions::IntervalUnit::Second,
26929 "WEEK" => crate::expressions::IntervalUnit::Week,
26930 "MONTH" => crate::expressions::IntervalUnit::Month,
26931 "YEAR" => crate::expressions::IntervalUnit::Year,
26932 _ => return expr,
26933 };
26934 return Expression::Interval(Box::new(crate::expressions::Interval {
26935 this: Some(Expression::string(num)),
26936 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26937 unit: iu,
26938 use_plural: false,
26939 }),
26940 }));
26941 }
26942 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
26943 // INTERVAL '2 DAYS'
26944 let plural = if num != "1" && !unit_str.ends_with('S') {
26945 format!("{} {}S", num, unit)
26946 } else if unit_str.ends_with('S') {
26947 format!("{} {}", num, unit_str)
26948 } else {
26949 format!("{} {}", num, unit)
26950 };
26951 return Expression::Interval(Box::new(crate::expressions::Interval {
26952 this: Some(Expression::string(&plural)),
26953 unit: None,
26954 }));
26955 }
26956 _ => {
26957 // Spark/Databricks/Hive: INTERVAL '1' DAY
26958 let iu = match unit {
26959 "DAY" => crate::expressions::IntervalUnit::Day,
26960 "HOUR" => crate::expressions::IntervalUnit::Hour,
26961 "MINUTE" => crate::expressions::IntervalUnit::Minute,
26962 "SECOND" => crate::expressions::IntervalUnit::Second,
26963 "WEEK" => crate::expressions::IntervalUnit::Week,
26964 "MONTH" => crate::expressions::IntervalUnit::Month,
26965 "YEAR" => crate::expressions::IntervalUnit::Year,
26966 _ => return expr,
26967 };
26968 return Expression::Interval(Box::new(crate::expressions::Interval {
26969 this: Some(Expression::string(num)),
26970 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26971 unit: iu,
26972 use_plural: false,
26973 }),
26974 }));
26975 }
26976 }
26977 }
26978 // If it's already an INTERVAL expression, pass through
26979 expr
26980 }
26981
26982 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
26983 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
26984 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
26985 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
26986 fn rewrite_unnest_expansion(
26987 select: &crate::expressions::Select,
26988 target: DialectType,
26989 ) -> Option<crate::expressions::Select> {
26990 use crate::expressions::{
26991 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
26992 UnnestFunc,
26993 };
26994
26995 let index_offset: i64 = match target {
26996 DialectType::Presto | DialectType::Trino => 1,
26997 _ => 0, // BigQuery, Snowflake
26998 };
26999
27000 let if_func_name = match target {
27001 DialectType::Snowflake => "IFF",
27002 _ => "IF",
27003 };
27004
27005 let array_length_func = match target {
27006 DialectType::BigQuery => "ARRAY_LENGTH",
27007 DialectType::Presto | DialectType::Trino => "CARDINALITY",
27008 DialectType::Snowflake => "ARRAY_SIZE",
27009 _ => "ARRAY_LENGTH",
27010 };
27011
27012 let use_table_aliases = matches!(
27013 target,
27014 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
27015 );
27016 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
27017
27018 fn make_col(name: &str, table: Option<&str>) -> Expression {
27019 if let Some(tbl) = table {
27020 Expression::boxed_column(Column {
27021 name: Identifier::new(name.to_string()),
27022 table: Some(Identifier::new(tbl.to_string())),
27023 join_mark: false,
27024 trailing_comments: Vec::new(),
27025 span: None,
27026 inferred_type: None,
27027 })
27028 } else {
27029 Expression::Identifier(Identifier::new(name.to_string()))
27030 }
27031 }
27032
27033 fn make_join(this: Expression) -> Join {
27034 Join {
27035 this,
27036 on: None,
27037 using: Vec::new(),
27038 kind: JoinKind::Cross,
27039 use_inner_keyword: false,
27040 use_outer_keyword: false,
27041 deferred_condition: false,
27042 join_hint: None,
27043 match_condition: None,
27044 pivots: Vec::new(),
27045 comments: Vec::new(),
27046 nesting_group: 0,
27047 directed: false,
27048 }
27049 }
27050
27051 // Collect UNNEST info from SELECT expressions
27052 struct UnnestInfo {
27053 arr_expr: Expression,
27054 col_alias: String,
27055 pos_alias: String,
27056 source_alias: String,
27057 original_expr: Expression,
27058 has_outer_alias: Option<String>,
27059 }
27060
27061 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
27062 let mut col_counter = 0usize;
27063 let mut pos_counter = 1usize;
27064 let mut source_counter = 1usize;
27065
27066 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
27067 match expr {
27068 Expression::Unnest(u) => Some(u.this.clone()),
27069 Expression::Function(f)
27070 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
27071 {
27072 Some(f.args[0].clone())
27073 }
27074 Expression::Alias(a) => extract_unnest_arg(&a.this),
27075 Expression::Add(op)
27076 | Expression::Sub(op)
27077 | Expression::Mul(op)
27078 | Expression::Div(op) => {
27079 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
27080 }
27081 _ => None,
27082 }
27083 }
27084
27085 fn get_alias_name(expr: &Expression) -> Option<String> {
27086 if let Expression::Alias(a) = expr {
27087 Some(a.alias.name.clone())
27088 } else {
27089 None
27090 }
27091 }
27092
27093 for sel_expr in &select.expressions {
27094 if let Some(arr) = extract_unnest_arg(sel_expr) {
27095 col_counter += 1;
27096 pos_counter += 1;
27097 source_counter += 1;
27098
27099 let col_alias = if col_counter == 1 {
27100 "col".to_string()
27101 } else {
27102 format!("col_{}", col_counter)
27103 };
27104 let pos_alias = format!("pos_{}", pos_counter);
27105 let source_alias = format!("_u_{}", source_counter);
27106 let has_outer_alias = get_alias_name(sel_expr);
27107
27108 unnest_infos.push(UnnestInfo {
27109 arr_expr: arr,
27110 col_alias,
27111 pos_alias,
27112 source_alias,
27113 original_expr: sel_expr.clone(),
27114 has_outer_alias,
27115 });
27116 }
27117 }
27118
27119 if unnest_infos.is_empty() {
27120 return None;
27121 }
27122
27123 let series_alias = "pos".to_string();
27124 let series_source_alias = "_u".to_string();
27125 let tbl_ref = if use_table_aliases {
27126 Some(series_source_alias.as_str())
27127 } else {
27128 None
27129 };
27130
27131 // Build new SELECT expressions
27132 let mut new_select_exprs = Vec::new();
27133 for info in &unnest_infos {
27134 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
27135 let src_ref = if use_table_aliases {
27136 Some(info.source_alias.as_str())
27137 } else {
27138 None
27139 };
27140
27141 let pos_col = make_col(&series_alias, tbl_ref);
27142 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
27143 let col_ref = make_col(actual_col_name, src_ref);
27144
27145 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
27146 pos_col.clone(),
27147 unnest_pos_col.clone(),
27148 )));
27149 let mut if_args = vec![eq_cond, col_ref];
27150 if null_third_arg {
27151 if_args.push(Expression::Null(crate::expressions::Null));
27152 }
27153
27154 let if_expr =
27155 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
27156 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
27157
27158 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
27159 final_expr,
27160 Identifier::new(actual_col_name.clone()),
27161 ))));
27162 }
27163
27164 // Build array size expressions for GREATEST
27165 let size_exprs: Vec<Expression> = unnest_infos
27166 .iter()
27167 .map(|info| {
27168 Expression::Function(Box::new(Function::new(
27169 array_length_func.to_string(),
27170 vec![info.arr_expr.clone()],
27171 )))
27172 })
27173 .collect();
27174
27175 let greatest =
27176 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
27177
27178 let series_end = if index_offset == 0 {
27179 Expression::Sub(Box::new(BinaryOp::new(
27180 greatest,
27181 Expression::Literal(Literal::Number("1".to_string())),
27182 )))
27183 } else {
27184 greatest
27185 };
27186
27187 // Build the position array source
27188 let series_unnest_expr = match target {
27189 DialectType::BigQuery => {
27190 let gen_array = Expression::Function(Box::new(Function::new(
27191 "GENERATE_ARRAY".to_string(),
27192 vec![
27193 Expression::Literal(Literal::Number("0".to_string())),
27194 series_end,
27195 ],
27196 )));
27197 Expression::Unnest(Box::new(UnnestFunc {
27198 this: gen_array,
27199 expressions: Vec::new(),
27200 with_ordinality: false,
27201 alias: None,
27202 offset_alias: None,
27203 }))
27204 }
27205 DialectType::Presto | DialectType::Trino => {
27206 let sequence = Expression::Function(Box::new(Function::new(
27207 "SEQUENCE".to_string(),
27208 vec![
27209 Expression::Literal(Literal::Number("1".to_string())),
27210 series_end,
27211 ],
27212 )));
27213 Expression::Unnest(Box::new(UnnestFunc {
27214 this: sequence,
27215 expressions: Vec::new(),
27216 with_ordinality: false,
27217 alias: None,
27218 offset_alias: None,
27219 }))
27220 }
27221 DialectType::Snowflake => {
27222 let range_end = Expression::Add(Box::new(BinaryOp::new(
27223 Expression::Paren(Box::new(crate::expressions::Paren {
27224 this: series_end,
27225 trailing_comments: Vec::new(),
27226 })),
27227 Expression::Literal(Literal::Number("1".to_string())),
27228 )));
27229 let gen_range = Expression::Function(Box::new(Function::new(
27230 "ARRAY_GENERATE_RANGE".to_string(),
27231 vec![
27232 Expression::Literal(Literal::Number("0".to_string())),
27233 range_end,
27234 ],
27235 )));
27236 let flatten_arg =
27237 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
27238 name: Identifier::new("INPUT".to_string()),
27239 value: gen_range,
27240 separator: crate::expressions::NamedArgSeparator::DArrow,
27241 }));
27242 let flatten = Expression::Function(Box::new(Function::new(
27243 "FLATTEN".to_string(),
27244 vec![flatten_arg],
27245 )));
27246 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
27247 }
27248 _ => return None,
27249 };
27250
27251 // Build series alias expression
27252 let series_alias_expr = if use_table_aliases {
27253 let col_aliases = if matches!(target, DialectType::Snowflake) {
27254 vec![
27255 Identifier::new("seq".to_string()),
27256 Identifier::new("key".to_string()),
27257 Identifier::new("path".to_string()),
27258 Identifier::new("index".to_string()),
27259 Identifier::new(series_alias.clone()),
27260 Identifier::new("this".to_string()),
27261 ]
27262 } else {
27263 vec![Identifier::new(series_alias.clone())]
27264 };
27265 Expression::Alias(Box::new(Alias {
27266 this: series_unnest_expr,
27267 alias: Identifier::new(series_source_alias.clone()),
27268 column_aliases: col_aliases,
27269 pre_alias_comments: Vec::new(),
27270 trailing_comments: Vec::new(),
27271 inferred_type: None,
27272 }))
27273 } else {
27274 Expression::Alias(Box::new(Alias::new(
27275 series_unnest_expr,
27276 Identifier::new(series_alias.clone()),
27277 )))
27278 };
27279
27280 // Build CROSS JOINs for each UNNEST
27281 let mut joins = Vec::new();
27282 for info in &unnest_infos {
27283 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
27284
27285 let unnest_join_expr = match target {
27286 DialectType::BigQuery => {
27287 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
27288 let unnest = UnnestFunc {
27289 this: info.arr_expr.clone(),
27290 expressions: Vec::new(),
27291 with_ordinality: true,
27292 alias: Some(Identifier::new(actual_col_name.clone())),
27293 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
27294 };
27295 Expression::Unnest(Box::new(unnest))
27296 }
27297 DialectType::Presto | DialectType::Trino => {
27298 let unnest = UnnestFunc {
27299 this: info.arr_expr.clone(),
27300 expressions: Vec::new(),
27301 with_ordinality: true,
27302 alias: None,
27303 offset_alias: None,
27304 };
27305 Expression::Alias(Box::new(Alias {
27306 this: Expression::Unnest(Box::new(unnest)),
27307 alias: Identifier::new(info.source_alias.clone()),
27308 column_aliases: vec![
27309 Identifier::new(actual_col_name.clone()),
27310 Identifier::new(info.pos_alias.clone()),
27311 ],
27312 pre_alias_comments: Vec::new(),
27313 trailing_comments: Vec::new(),
27314 inferred_type: None,
27315 }))
27316 }
27317 DialectType::Snowflake => {
27318 let flatten_arg =
27319 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
27320 name: Identifier::new("INPUT".to_string()),
27321 value: info.arr_expr.clone(),
27322 separator: crate::expressions::NamedArgSeparator::DArrow,
27323 }));
27324 let flatten = Expression::Function(Box::new(Function::new(
27325 "FLATTEN".to_string(),
27326 vec![flatten_arg],
27327 )));
27328 let table_fn = Expression::Function(Box::new(Function::new(
27329 "TABLE".to_string(),
27330 vec![flatten],
27331 )));
27332 Expression::Alias(Box::new(Alias {
27333 this: table_fn,
27334 alias: Identifier::new(info.source_alias.clone()),
27335 column_aliases: vec![
27336 Identifier::new("seq".to_string()),
27337 Identifier::new("key".to_string()),
27338 Identifier::new("path".to_string()),
27339 Identifier::new(info.pos_alias.clone()),
27340 Identifier::new(actual_col_name.clone()),
27341 Identifier::new("this".to_string()),
27342 ],
27343 pre_alias_comments: Vec::new(),
27344 trailing_comments: Vec::new(),
27345 inferred_type: None,
27346 }))
27347 }
27348 _ => return None,
27349 };
27350
27351 joins.push(make_join(unnest_join_expr));
27352 }
27353
27354 // Build WHERE clause
27355 let mut where_conditions: Vec<Expression> = Vec::new();
27356 for info in &unnest_infos {
27357 let src_ref = if use_table_aliases {
27358 Some(info.source_alias.as_str())
27359 } else {
27360 None
27361 };
27362 let pos_col = make_col(&series_alias, tbl_ref);
27363 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
27364
27365 let arr_size = Expression::Function(Box::new(Function::new(
27366 array_length_func.to_string(),
27367 vec![info.arr_expr.clone()],
27368 )));
27369
27370 let size_ref = if index_offset == 0 {
27371 Expression::Paren(Box::new(crate::expressions::Paren {
27372 this: Expression::Sub(Box::new(BinaryOp::new(
27373 arr_size,
27374 Expression::Literal(Literal::Number("1".to_string())),
27375 ))),
27376 trailing_comments: Vec::new(),
27377 }))
27378 } else {
27379 arr_size
27380 };
27381
27382 let eq = Expression::Eq(Box::new(BinaryOp::new(
27383 pos_col.clone(),
27384 unnest_pos_col.clone(),
27385 )));
27386 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
27387 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
27388 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
27389 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
27390 this: and_cond,
27391 trailing_comments: Vec::new(),
27392 }));
27393 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
27394
27395 where_conditions.push(or_cond);
27396 }
27397
27398 let where_expr = if where_conditions.len() == 1 {
27399 // Single condition: no parens needed
27400 where_conditions.into_iter().next().unwrap()
27401 } else {
27402 // Multiple conditions: wrap each OR in parens, then combine with AND
27403 let wrap = |e: Expression| {
27404 Expression::Paren(Box::new(crate::expressions::Paren {
27405 this: e,
27406 trailing_comments: Vec::new(),
27407 }))
27408 };
27409 let mut iter = where_conditions.into_iter();
27410 let first = wrap(iter.next().unwrap());
27411 let second = wrap(iter.next().unwrap());
27412 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
27413 this: Expression::And(Box::new(BinaryOp::new(first, second))),
27414 trailing_comments: Vec::new(),
27415 }));
27416 for cond in iter {
27417 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
27418 }
27419 combined
27420 };
27421
27422 // Build the new SELECT
27423 let mut new_select = select.clone();
27424 new_select.expressions = new_select_exprs;
27425
27426 if new_select.from.is_some() {
27427 let mut all_joins = vec![make_join(series_alias_expr)];
27428 all_joins.extend(joins);
27429 new_select.joins.extend(all_joins);
27430 } else {
27431 new_select.from = Some(From {
27432 expressions: vec![series_alias_expr],
27433 });
27434 new_select.joins.extend(joins);
27435 }
27436
27437 if let Some(ref existing_where) = new_select.where_clause {
27438 let combined = Expression::And(Box::new(BinaryOp::new(
27439 existing_where.this.clone(),
27440 where_expr,
27441 )));
27442 new_select.where_clause = Some(crate::expressions::Where { this: combined });
27443 } else {
27444 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
27445 }
27446
27447 Some(new_select)
27448 }
27449
27450 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
27451 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
27452 match original {
27453 Expression::Unnest(_) => replacement.clone(),
27454 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
27455 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
27456 Expression::Add(op) => {
27457 let left = Self::replace_unnest_with_if(&op.left, replacement);
27458 let right = Self::replace_unnest_with_if(&op.right, replacement);
27459 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
27460 }
27461 Expression::Sub(op) => {
27462 let left = Self::replace_unnest_with_if(&op.left, replacement);
27463 let right = Self::replace_unnest_with_if(&op.right, replacement);
27464 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
27465 }
27466 Expression::Mul(op) => {
27467 let left = Self::replace_unnest_with_if(&op.left, replacement);
27468 let right = Self::replace_unnest_with_if(&op.right, replacement);
27469 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
27470 }
27471 Expression::Div(op) => {
27472 let left = Self::replace_unnest_with_if(&op.left, replacement);
27473 let right = Self::replace_unnest_with_if(&op.right, replacement);
27474 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
27475 }
27476 _ => original.clone(),
27477 }
27478 }
27479
27480 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
27481 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
27482 fn decompose_json_path(path: &str) -> Vec<String> {
27483 let mut parts = Vec::new();
27484 let path = if path.starts_with("$.") {
27485 &path[2..]
27486 } else if path.starts_with('$') {
27487 &path[1..]
27488 } else {
27489 path
27490 };
27491 if path.is_empty() {
27492 return parts;
27493 }
27494 let mut current = String::new();
27495 let chars: Vec<char> = path.chars().collect();
27496 let mut i = 0;
27497 while i < chars.len() {
27498 match chars[i] {
27499 '.' => {
27500 if !current.is_empty() {
27501 parts.push(current.clone());
27502 current.clear();
27503 }
27504 i += 1;
27505 }
27506 '[' => {
27507 if !current.is_empty() {
27508 parts.push(current.clone());
27509 current.clear();
27510 }
27511 i += 1;
27512 let mut bracket_content = String::new();
27513 while i < chars.len() && chars[i] != ']' {
27514 if chars[i] == '"' || chars[i] == '\'' {
27515 let quote = chars[i];
27516 i += 1;
27517 while i < chars.len() && chars[i] != quote {
27518 bracket_content.push(chars[i]);
27519 i += 1;
27520 }
27521 if i < chars.len() {
27522 i += 1;
27523 }
27524 } else {
27525 bracket_content.push(chars[i]);
27526 i += 1;
27527 }
27528 }
27529 if i < chars.len() {
27530 i += 1;
27531 }
27532 if bracket_content != "*" {
27533 parts.push(bracket_content);
27534 }
27535 }
27536 _ => {
27537 current.push(chars[i]);
27538 i += 1;
27539 }
27540 }
27541 }
27542 if !current.is_empty() {
27543 parts.push(current);
27544 }
27545 parts
27546 }
27547
27548 /// Strip `$` prefix from a JSON path, keeping the rest.
27549 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
27550 fn strip_json_dollar_prefix(path: &str) -> String {
27551 if path.starts_with("$.") {
27552 path[2..].to_string()
27553 } else if path.starts_with('$') {
27554 path[1..].to_string()
27555 } else {
27556 path.to_string()
27557 }
27558 }
27559
27560 /// Strip `[*]` wildcards from a JSON path.
27561 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
27562 fn strip_json_wildcards(path: &str) -> String {
27563 path.replace("[*]", "")
27564 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
27565 .trim_end_matches('.')
27566 .to_string()
27567 }
27568
27569 /// Convert bracket notation to dot notation for JSON paths.
27570 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
27571 fn bracket_to_dot_notation(path: &str) -> String {
27572 let mut result = String::new();
27573 let chars: Vec<char> = path.chars().collect();
27574 let mut i = 0;
27575 while i < chars.len() {
27576 if chars[i] == '[' {
27577 // Read bracket content
27578 i += 1;
27579 let mut bracket_content = String::new();
27580 let mut is_quoted = false;
27581 let mut _quote_char = '"';
27582 while i < chars.len() && chars[i] != ']' {
27583 if chars[i] == '"' || chars[i] == '\'' {
27584 is_quoted = true;
27585 _quote_char = chars[i];
27586 i += 1;
27587 while i < chars.len() && chars[i] != _quote_char {
27588 bracket_content.push(chars[i]);
27589 i += 1;
27590 }
27591 if i < chars.len() {
27592 i += 1;
27593 }
27594 } else {
27595 bracket_content.push(chars[i]);
27596 i += 1;
27597 }
27598 }
27599 if i < chars.len() {
27600 i += 1;
27601 } // skip ]
27602 if bracket_content == "*" {
27603 // Keep wildcard as-is
27604 result.push_str("[*]");
27605 } else if is_quoted {
27606 // Quoted bracket -> dot notation with quotes
27607 result.push('.');
27608 result.push('"');
27609 result.push_str(&bracket_content);
27610 result.push('"');
27611 } else {
27612 // Numeric index -> keep as bracket
27613 result.push('[');
27614 result.push_str(&bracket_content);
27615 result.push(']');
27616 }
27617 } else {
27618 result.push(chars[i]);
27619 i += 1;
27620 }
27621 }
27622 result
27623 }
27624
27625 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
27626 /// `$["a b"]` -> `$['a b']`
27627 fn bracket_to_single_quotes(path: &str) -> String {
27628 let mut result = String::new();
27629 let chars: Vec<char> = path.chars().collect();
27630 let mut i = 0;
27631 while i < chars.len() {
27632 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
27633 result.push('[');
27634 result.push('\'');
27635 i += 2; // skip [ and "
27636 while i < chars.len() && chars[i] != '"' {
27637 result.push(chars[i]);
27638 i += 1;
27639 }
27640 if i < chars.len() {
27641 i += 1;
27642 } // skip closing "
27643 result.push('\'');
27644 } else {
27645 result.push(chars[i]);
27646 i += 1;
27647 }
27648 }
27649 result
27650 }
27651
27652 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
27653 /// or PostgreSQL #temp -> TEMPORARY.
27654 /// Also strips # from INSERT INTO #table for non-TSQL targets.
27655 fn transform_select_into(
27656 expr: Expression,
27657 _source: DialectType,
27658 target: DialectType,
27659 ) -> Expression {
27660 use crate::expressions::{CreateTable, Expression, TableRef};
27661
27662 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
27663 if let Expression::Insert(ref insert) = expr {
27664 if insert.table.name.name.starts_with('#')
27665 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
27666 {
27667 let mut new_insert = insert.clone();
27668 new_insert.table.name.name =
27669 insert.table.name.name.trim_start_matches('#').to_string();
27670 return Expression::Insert(new_insert);
27671 }
27672 return expr;
27673 }
27674
27675 if let Expression::Select(ref select) = expr {
27676 if let Some(ref into) = select.into {
27677 let table_name_raw = match &into.this {
27678 Expression::Table(tr) => tr.name.name.clone(),
27679 Expression::Identifier(id) => id.name.clone(),
27680 _ => String::new(),
27681 };
27682 let is_temp = table_name_raw.starts_with('#') || into.temporary;
27683 let clean_name = table_name_raw.trim_start_matches('#').to_string();
27684
27685 match target {
27686 DialectType::DuckDB | DialectType::Snowflake => {
27687 // SELECT INTO -> CREATE TABLE AS SELECT
27688 let mut new_select = select.clone();
27689 new_select.into = None;
27690 let ct = CreateTable {
27691 name: TableRef::new(clean_name),
27692 on_cluster: None,
27693 columns: Vec::new(),
27694 constraints: Vec::new(),
27695 if_not_exists: false,
27696 temporary: is_temp,
27697 or_replace: false,
27698 table_modifier: None,
27699 as_select: Some(Expression::Select(new_select)),
27700 as_select_parenthesized: false,
27701 on_commit: None,
27702 clone_source: None,
27703 clone_at_clause: None,
27704 shallow_clone: false,
27705 is_copy: false,
27706 leading_comments: Vec::new(),
27707 with_properties: Vec::new(),
27708 teradata_post_name_options: Vec::new(),
27709 with_data: None,
27710 with_statistics: None,
27711 teradata_indexes: Vec::new(),
27712 with_cte: None,
27713 properties: Vec::new(),
27714 partition_of: None,
27715 post_table_properties: Vec::new(),
27716 mysql_table_options: Vec::new(),
27717 inherits: Vec::new(),
27718 on_property: None,
27719 copy_grants: false,
27720 using_template: None,
27721 rollup: None,
27722 };
27723 return Expression::CreateTable(Box::new(ct));
27724 }
27725 DialectType::PostgreSQL | DialectType::Redshift => {
27726 // PostgreSQL: #foo -> INTO TEMPORARY foo
27727 if is_temp && !into.temporary {
27728 let mut new_select = select.clone();
27729 let mut new_into = into.clone();
27730 new_into.temporary = true;
27731 new_into.unlogged = false;
27732 new_into.this = Expression::Table(Box::new(TableRef::new(clean_name)));
27733 new_select.into = Some(new_into);
27734 Expression::Select(new_select)
27735 } else {
27736 expr
27737 }
27738 }
27739 _ => expr,
27740 }
27741 } else {
27742 expr
27743 }
27744 } else {
27745 expr
27746 }
27747 }
27748
27749 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
27750 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
27751 fn transform_create_table_properties(
27752 ct: &mut crate::expressions::CreateTable,
27753 _source: DialectType,
27754 target: DialectType,
27755 ) {
27756 use crate::expressions::{
27757 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
27758 Properties,
27759 };
27760
27761 // Helper to convert a raw property value string to the correct Expression
27762 let value_to_expr = |v: &str| -> Expression {
27763 let trimmed = v.trim();
27764 // Check if it's a quoted string (starts and ends with ')
27765 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
27766 Expression::Literal(Literal::String(trimmed[1..trimmed.len() - 1].to_string()))
27767 }
27768 // Check if it's a number
27769 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
27770 Expression::Literal(Literal::Number(trimmed.to_string()))
27771 }
27772 // Check if it's ARRAY[...] or ARRAY(...)
27773 else if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
27774 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
27775 let inner = trimmed
27776 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
27777 .trim_start_matches('[')
27778 .trim_start_matches('(')
27779 .trim_end_matches(']')
27780 .trim_end_matches(')');
27781 let elements: Vec<Expression> = inner
27782 .split(',')
27783 .map(|e| {
27784 let elem = e.trim().trim_matches('\'');
27785 Expression::Literal(Literal::String(elem.to_string()))
27786 })
27787 .collect();
27788 Expression::Function(Box::new(crate::expressions::Function::new(
27789 "ARRAY".to_string(),
27790 elements,
27791 )))
27792 }
27793 // Otherwise, just output as identifier (unquoted)
27794 else {
27795 Expression::Identifier(Identifier::new(trimmed.to_string()))
27796 }
27797 };
27798
27799 if ct.with_properties.is_empty() && ct.properties.is_empty() {
27800 return;
27801 }
27802
27803 // Handle Presto-style WITH properties
27804 if !ct.with_properties.is_empty() {
27805 // Extract FORMAT property and remaining properties
27806 let mut format_value: Option<String> = None;
27807 let mut partitioned_by: Option<String> = None;
27808 let mut other_props: Vec<(String, String)> = Vec::new();
27809
27810 for (key, value) in ct.with_properties.drain(..) {
27811 if key.eq_ignore_ascii_case("FORMAT") {
27812 // Strip surrounding quotes from value if present
27813 format_value = Some(value.trim_matches('\'').to_string());
27814 } else if key.eq_ignore_ascii_case("PARTITIONED_BY") {
27815 partitioned_by = Some(value);
27816 } else {
27817 other_props.push((key, value));
27818 }
27819 }
27820
27821 match target {
27822 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27823 // Presto: keep WITH properties but lowercase 'format' key
27824 if let Some(fmt) = format_value {
27825 ct.with_properties
27826 .push(("format".to_string(), format!("'{}'", fmt)));
27827 }
27828 if let Some(part) = partitioned_by {
27829 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
27830 let trimmed = part.trim();
27831 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
27832 // Also handle ARRAY['...'] format - keep as-is
27833 if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
27834 ct.with_properties
27835 .push(("PARTITIONED_BY".to_string(), part));
27836 } else {
27837 // Parse column names from the parenthesized list
27838 let cols: Vec<&str> = inner
27839 .split(',')
27840 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
27841 .collect();
27842 let array_val = format!(
27843 "ARRAY[{}]",
27844 cols.iter()
27845 .map(|c| format!("'{}'", c))
27846 .collect::<Vec<_>>()
27847 .join(", ")
27848 );
27849 ct.with_properties
27850 .push(("PARTITIONED_BY".to_string(), array_val));
27851 }
27852 }
27853 ct.with_properties.extend(other_props);
27854 }
27855 DialectType::Hive => {
27856 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
27857 if let Some(fmt) = format_value {
27858 ct.properties.push(Expression::FileFormatProperty(Box::new(
27859 FileFormatProperty {
27860 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
27861 expressions: vec![],
27862 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
27863 value: true,
27864 }))),
27865 },
27866 )));
27867 }
27868 if let Some(_part) = partitioned_by {
27869 // PARTITIONED_BY handling is complex - move columns to partitioned by
27870 // For now, the partition columns are extracted from the column list
27871 Self::apply_partitioned_by(ct, &_part, target);
27872 }
27873 if !other_props.is_empty() {
27874 let eq_exprs: Vec<Expression> = other_props
27875 .into_iter()
27876 .map(|(k, v)| {
27877 Expression::Eq(Box::new(BinaryOp::new(
27878 Expression::Literal(Literal::String(k)),
27879 value_to_expr(&v),
27880 )))
27881 })
27882 .collect();
27883 ct.properties
27884 .push(Expression::Properties(Box::new(Properties {
27885 expressions: eq_exprs,
27886 })));
27887 }
27888 }
27889 DialectType::Spark | DialectType::Databricks => {
27890 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
27891 if let Some(fmt) = format_value {
27892 ct.properties.push(Expression::FileFormatProperty(Box::new(
27893 FileFormatProperty {
27894 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
27895 expressions: vec![],
27896 hive_format: None, // None means USING syntax
27897 },
27898 )));
27899 }
27900 if let Some(_part) = partitioned_by {
27901 Self::apply_partitioned_by(ct, &_part, target);
27902 }
27903 if !other_props.is_empty() {
27904 let eq_exprs: Vec<Expression> = other_props
27905 .into_iter()
27906 .map(|(k, v)| {
27907 Expression::Eq(Box::new(BinaryOp::new(
27908 Expression::Literal(Literal::String(k)),
27909 value_to_expr(&v),
27910 )))
27911 })
27912 .collect();
27913 ct.properties
27914 .push(Expression::Properties(Box::new(Properties {
27915 expressions: eq_exprs,
27916 })));
27917 }
27918 }
27919 DialectType::DuckDB => {
27920 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
27921 // Keep nothing
27922 }
27923 _ => {
27924 // For other dialects, keep WITH properties as-is
27925 if let Some(fmt) = format_value {
27926 ct.with_properties
27927 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
27928 }
27929 if let Some(part) = partitioned_by {
27930 ct.with_properties
27931 .push(("PARTITIONED_BY".to_string(), part));
27932 }
27933 ct.with_properties.extend(other_props);
27934 }
27935 }
27936 }
27937
27938 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
27939 // and Hive STORED AS -> Presto WITH (format=...) conversion
27940 if !ct.properties.is_empty() {
27941 let is_presto_target = matches!(
27942 target,
27943 DialectType::Presto | DialectType::Trino | DialectType::Athena
27944 );
27945 let is_duckdb_target = matches!(target, DialectType::DuckDB);
27946
27947 if is_presto_target || is_duckdb_target {
27948 let mut new_properties = Vec::new();
27949 for prop in ct.properties.drain(..) {
27950 match &prop {
27951 Expression::FileFormatProperty(ffp) => {
27952 if is_presto_target {
27953 // Convert STORED AS/USING to WITH (format=...)
27954 if let Some(ref fmt_expr) = ffp.this {
27955 let fmt_str = match fmt_expr.as_ref() {
27956 Expression::Identifier(id) => id.name.clone(),
27957 Expression::Literal(Literal::String(s)) => s.clone(),
27958 _ => {
27959 new_properties.push(prop);
27960 continue;
27961 }
27962 };
27963 ct.with_properties
27964 .push(("format".to_string(), format!("'{}'", fmt_str)));
27965 }
27966 }
27967 // DuckDB: just strip file format properties
27968 }
27969 // Convert TBLPROPERTIES to WITH properties for Presto target
27970 Expression::Properties(props) if is_presto_target => {
27971 for expr in &props.expressions {
27972 if let Expression::Eq(eq) = expr {
27973 // Extract key and value from the Eq expression
27974 let key = match &eq.left {
27975 Expression::Literal(Literal::String(s)) => s.clone(),
27976 Expression::Identifier(id) => id.name.clone(),
27977 _ => continue,
27978 };
27979 let value = match &eq.right {
27980 Expression::Literal(Literal::String(s)) => {
27981 format!("'{}'", s)
27982 }
27983 Expression::Literal(Literal::Number(n)) => n.clone(),
27984 Expression::Identifier(id) => id.name.clone(),
27985 _ => continue,
27986 };
27987 ct.with_properties.push((key, value));
27988 }
27989 }
27990 }
27991 // Convert PartitionedByProperty for Presto target
27992 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
27993 // Check if it contains ColumnDef expressions (Hive-style with types)
27994 if let Expression::Tuple(ref tuple) = *pbp.this {
27995 let mut col_names: Vec<String> = Vec::new();
27996 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
27997 let mut has_col_defs = false;
27998 for expr in &tuple.expressions {
27999 if let Expression::ColumnDef(ref cd) = expr {
28000 has_col_defs = true;
28001 col_names.push(cd.name.name.clone());
28002 col_defs.push(*cd.clone());
28003 } else if let Expression::Column(ref col) = expr {
28004 col_names.push(col.name.name.clone());
28005 } else if let Expression::Identifier(ref id) = expr {
28006 col_names.push(id.name.clone());
28007 } else {
28008 // For function expressions like MONTHS(y), serialize to SQL
28009 let generic = Dialect::get(DialectType::Generic);
28010 if let Ok(sql) = generic.generate(expr) {
28011 col_names.push(sql);
28012 }
28013 }
28014 }
28015 if has_col_defs {
28016 // Merge partition column defs into the main column list
28017 for cd in col_defs {
28018 ct.columns.push(cd);
28019 }
28020 }
28021 if !col_names.is_empty() {
28022 // Add PARTITIONED_BY property
28023 let array_val = format!(
28024 "ARRAY[{}]",
28025 col_names
28026 .iter()
28027 .map(|n| format!("'{}'", n))
28028 .collect::<Vec<_>>()
28029 .join(", ")
28030 );
28031 ct.with_properties
28032 .push(("PARTITIONED_BY".to_string(), array_val));
28033 }
28034 }
28035 // Skip - don't keep in properties
28036 }
28037 _ => {
28038 if !is_duckdb_target {
28039 new_properties.push(prop);
28040 }
28041 }
28042 }
28043 }
28044 ct.properties = new_properties;
28045 } else {
28046 // For Hive/Spark targets, unquote format names in STORED AS
28047 for prop in &mut ct.properties {
28048 if let Expression::FileFormatProperty(ref mut ffp) = prop {
28049 if let Some(ref mut fmt_expr) = ffp.this {
28050 if let Expression::Literal(Literal::String(s)) = fmt_expr.as_ref() {
28051 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
28052 let unquoted = s.clone();
28053 *fmt_expr =
28054 Box::new(Expression::Identifier(Identifier::new(unquoted)));
28055 }
28056 }
28057 }
28058 }
28059 }
28060 }
28061 }
28062
28063 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
28064 fn apply_partitioned_by(
28065 ct: &mut crate::expressions::CreateTable,
28066 partitioned_by_value: &str,
28067 target: DialectType,
28068 ) {
28069 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
28070
28071 // Parse the ARRAY['col1', 'col2'] value to extract column names
28072 let mut col_names: Vec<String> = Vec::new();
28073 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
28074 let inner = partitioned_by_value
28075 .trim()
28076 .trim_start_matches("ARRAY")
28077 .trim_start_matches('[')
28078 .trim_start_matches('(')
28079 .trim_end_matches(']')
28080 .trim_end_matches(')');
28081 for part in inner.split(',') {
28082 let col = part.trim().trim_matches('\'').trim_matches('"');
28083 if !col.is_empty() {
28084 col_names.push(col.to_string());
28085 }
28086 }
28087
28088 if col_names.is_empty() {
28089 return;
28090 }
28091
28092 if matches!(target, DialectType::Hive) {
28093 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
28094 let mut partition_col_defs = Vec::new();
28095 for col_name in &col_names {
28096 // Find and remove from columns
28097 if let Some(pos) = ct
28098 .columns
28099 .iter()
28100 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
28101 {
28102 let col_def = ct.columns.remove(pos);
28103 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
28104 }
28105 }
28106 if !partition_col_defs.is_empty() {
28107 ct.properties
28108 .push(Expression::PartitionedByProperty(Box::new(
28109 PartitionedByProperty {
28110 this: Box::new(Expression::Tuple(Box::new(Tuple {
28111 expressions: partition_col_defs,
28112 }))),
28113 },
28114 )));
28115 }
28116 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
28117 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
28118 // Use quoted identifiers to match the quoting style of the original column definitions
28119 let partition_exprs: Vec<Expression> = col_names
28120 .iter()
28121 .map(|name| {
28122 // Check if the column exists in the column list and use its quoting
28123 let is_quoted = ct
28124 .columns
28125 .iter()
28126 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
28127 let ident = if is_quoted {
28128 Identifier::quoted(name.clone())
28129 } else {
28130 Identifier::new(name.clone())
28131 };
28132 Expression::boxed_column(Column {
28133 name: ident,
28134 table: None,
28135 join_mark: false,
28136 trailing_comments: Vec::new(),
28137 span: None,
28138 inferred_type: None,
28139 })
28140 })
28141 .collect();
28142 ct.properties
28143 .push(Expression::PartitionedByProperty(Box::new(
28144 PartitionedByProperty {
28145 this: Box::new(Expression::Tuple(Box::new(Tuple {
28146 expressions: partition_exprs,
28147 }))),
28148 },
28149 )));
28150 }
28151 // DuckDB: strip partitioned_by entirely (already handled)
28152 }
28153
28154 /// Convert a DataType to Spark's type string format (using angle brackets)
28155 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
28156 use crate::expressions::DataType;
28157 match dt {
28158 DataType::Int { .. } => "INT".to_string(),
28159 DataType::BigInt { .. } => "BIGINT".to_string(),
28160 DataType::SmallInt { .. } => "SMALLINT".to_string(),
28161 DataType::TinyInt { .. } => "TINYINT".to_string(),
28162 DataType::Float { .. } => "FLOAT".to_string(),
28163 DataType::Double { .. } => "DOUBLE".to_string(),
28164 DataType::Decimal {
28165 precision: Some(p),
28166 scale: Some(s),
28167 } => format!("DECIMAL({}, {})", p, s),
28168 DataType::Decimal {
28169 precision: Some(p), ..
28170 } => format!("DECIMAL({})", p),
28171 DataType::Decimal { .. } => "DECIMAL".to_string(),
28172 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
28173 "STRING".to_string()
28174 }
28175 DataType::Char { .. } => "STRING".to_string(),
28176 DataType::Boolean => "BOOLEAN".to_string(),
28177 DataType::Date => "DATE".to_string(),
28178 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
28179 DataType::Json | DataType::JsonB => "STRING".to_string(),
28180 DataType::Binary { .. } => "BINARY".to_string(),
28181 DataType::Array { element_type, .. } => {
28182 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
28183 }
28184 DataType::Map {
28185 key_type,
28186 value_type,
28187 } => format!(
28188 "MAP<{}, {}>",
28189 Self::data_type_to_spark_string(key_type),
28190 Self::data_type_to_spark_string(value_type)
28191 ),
28192 DataType::Struct { fields, .. } => {
28193 let field_strs: Vec<String> = fields
28194 .iter()
28195 .map(|f| {
28196 if f.name.is_empty() {
28197 Self::data_type_to_spark_string(&f.data_type)
28198 } else {
28199 format!(
28200 "{}: {}",
28201 f.name,
28202 Self::data_type_to_spark_string(&f.data_type)
28203 )
28204 }
28205 })
28206 .collect();
28207 format!("STRUCT<{}>", field_strs.join(", "))
28208 }
28209 DataType::Custom { name } => name.clone(),
28210 _ => format!("{:?}", dt),
28211 }
28212 }
28213
28214 /// Extract value and unit from an Interval expression
28215 /// Returns (value_expression, IntervalUnit)
28216 fn extract_interval_parts(
28217 interval_expr: &Expression,
28218 ) -> (Expression, crate::expressions::IntervalUnit) {
28219 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
28220
28221 if let Expression::Interval(iv) = interval_expr {
28222 let val = iv.this.clone().unwrap_or(Expression::number(0));
28223 let unit = match &iv.unit {
28224 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
28225 None => {
28226 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
28227 if let Expression::Literal(crate::expressions::Literal::String(s)) = &val {
28228 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
28229 if parts.len() == 2 {
28230 let unit_str = parts[1].trim().to_ascii_uppercase();
28231 let parsed_unit = match unit_str.as_str() {
28232 "YEAR" | "YEARS" => IntervalUnit::Year,
28233 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
28234 "MONTH" | "MONTHS" => IntervalUnit::Month,
28235 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
28236 "DAY" | "DAYS" => IntervalUnit::Day,
28237 "HOUR" | "HOURS" => IntervalUnit::Hour,
28238 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
28239 "SECOND" | "SECONDS" => IntervalUnit::Second,
28240 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
28241 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
28242 _ => IntervalUnit::Day,
28243 };
28244 // Return just the numeric part as value and parsed unit
28245 return (
28246 Expression::Literal(crate::expressions::Literal::String(
28247 parts[0].to_string(),
28248 )),
28249 parsed_unit,
28250 );
28251 }
28252 IntervalUnit::Day
28253 } else {
28254 IntervalUnit::Day
28255 }
28256 }
28257 _ => IntervalUnit::Day,
28258 };
28259 (val, unit)
28260 } else {
28261 // Not an interval - pass through
28262 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
28263 }
28264 }
28265
28266 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
28267 fn normalize_bigquery_function(
28268 e: Expression,
28269 source: DialectType,
28270 target: DialectType,
28271 ) -> Result<Expression> {
28272 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
28273
28274 let f = if let Expression::Function(f) = e {
28275 *f
28276 } else {
28277 return Ok(e);
28278 };
28279 let name = f.name.to_ascii_uppercase();
28280 let mut args = f.args;
28281
28282 /// Helper to extract unit string from an identifier, column, or literal expression
28283 fn get_unit_str(expr: &Expression) -> String {
28284 match expr {
28285 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
28286 Expression::Literal(Literal::String(s)) => s.to_ascii_uppercase(),
28287 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
28288 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
28289 Expression::Function(f) => {
28290 let base = f.name.to_ascii_uppercase();
28291 if !f.args.is_empty() {
28292 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
28293 let inner = get_unit_str(&f.args[0]);
28294 format!("{}({})", base, inner)
28295 } else {
28296 base
28297 }
28298 }
28299 _ => "DAY".to_string(),
28300 }
28301 }
28302
28303 /// Parse unit string to IntervalUnit
28304 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
28305 match s {
28306 "YEAR" => crate::expressions::IntervalUnit::Year,
28307 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
28308 "MONTH" => crate::expressions::IntervalUnit::Month,
28309 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
28310 "DAY" => crate::expressions::IntervalUnit::Day,
28311 "HOUR" => crate::expressions::IntervalUnit::Hour,
28312 "MINUTE" => crate::expressions::IntervalUnit::Minute,
28313 "SECOND" => crate::expressions::IntervalUnit::Second,
28314 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
28315 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
28316 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
28317 _ => crate::expressions::IntervalUnit::Day,
28318 }
28319 }
28320
28321 match name.as_str() {
28322 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
28323 // (BigQuery: result = date1 - date2, Standard: result = end - start)
28324 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
28325 let date1 = args.remove(0);
28326 let date2 = args.remove(0);
28327 let unit_expr = args.remove(0);
28328 let unit_str = get_unit_str(&unit_expr);
28329
28330 if matches!(target, DialectType::BigQuery) {
28331 // BigQuery -> BigQuery: just uppercase the unit
28332 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
28333 return Ok(Expression::Function(Box::new(Function::new(
28334 f.name,
28335 vec![date1, date2, unit],
28336 ))));
28337 }
28338
28339 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
28340 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
28341 if matches!(target, DialectType::Snowflake) {
28342 return Ok(Expression::TimestampDiff(Box::new(
28343 crate::expressions::TimestampDiff {
28344 this: Box::new(date2),
28345 expression: Box::new(date1),
28346 unit: Some(unit_str),
28347 },
28348 )));
28349 }
28350
28351 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
28352 if matches!(target, DialectType::DuckDB) {
28353 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
28354 // CAST to TIME
28355 let cast_fn = |e: Expression| -> Expression {
28356 match e {
28357 Expression::Literal(Literal::String(s)) => {
28358 Expression::Cast(Box::new(Cast {
28359 this: Expression::Literal(Literal::String(s)),
28360 to: DataType::Custom {
28361 name: "TIME".to_string(),
28362 },
28363 trailing_comments: vec![],
28364 double_colon_syntax: false,
28365 format: None,
28366 default: None,
28367 inferred_type: None,
28368 }))
28369 }
28370 other => other,
28371 }
28372 };
28373 (cast_fn(date1), cast_fn(date2))
28374 } else if name == "DATETIME_DIFF" {
28375 // CAST to TIMESTAMP
28376 (
28377 Self::ensure_cast_timestamp(date1),
28378 Self::ensure_cast_timestamp(date2),
28379 )
28380 } else {
28381 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
28382 (
28383 Self::ensure_cast_timestamptz(date1),
28384 Self::ensure_cast_timestamptz(date2),
28385 )
28386 };
28387 return Ok(Expression::Function(Box::new(Function::new(
28388 "DATE_DIFF".to_string(),
28389 vec![
28390 Expression::Literal(Literal::String(unit_str)),
28391 cast_d2,
28392 cast_d1,
28393 ],
28394 ))));
28395 }
28396
28397 // Convert to standard TIMESTAMPDIFF(unit, start, end)
28398 let unit = Expression::Identifier(Identifier::new(unit_str));
28399 Ok(Expression::Function(Box::new(Function::new(
28400 "TIMESTAMPDIFF".to_string(),
28401 vec![unit, date2, date1],
28402 ))))
28403 }
28404
28405 // DATEDIFF(unit, start, end) -> target-specific form
28406 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
28407 "DATEDIFF" if args.len() == 3 => {
28408 let arg0 = args.remove(0);
28409 let arg1 = args.remove(0);
28410 let arg2 = args.remove(0);
28411 let unit_str = get_unit_str(&arg0);
28412
28413 // Redshift DATEDIFF(unit, start, end) order: result = end - start
28414 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
28415 // TSQL DATEDIFF(unit, start, end) order: result = end - start
28416
28417 if matches!(target, DialectType::Snowflake) {
28418 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
28419 let unit = Expression::Identifier(Identifier::new(unit_str));
28420 return Ok(Expression::Function(Box::new(Function::new(
28421 "DATEDIFF".to_string(),
28422 vec![unit, arg1, arg2],
28423 ))));
28424 }
28425
28426 if matches!(target, DialectType::DuckDB) {
28427 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
28428 let cast_d1 = Self::ensure_cast_timestamp(arg1);
28429 let cast_d2 = Self::ensure_cast_timestamp(arg2);
28430 return Ok(Expression::Function(Box::new(Function::new(
28431 "DATE_DIFF".to_string(),
28432 vec![
28433 Expression::Literal(Literal::String(unit_str)),
28434 cast_d1,
28435 cast_d2,
28436 ],
28437 ))));
28438 }
28439
28440 if matches!(target, DialectType::BigQuery) {
28441 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
28442 let cast_d1 = Self::ensure_cast_datetime(arg1);
28443 let cast_d2 = Self::ensure_cast_datetime(arg2);
28444 let unit = Expression::Identifier(Identifier::new(unit_str));
28445 return Ok(Expression::Function(Box::new(Function::new(
28446 "DATE_DIFF".to_string(),
28447 vec![cast_d2, cast_d1, unit],
28448 ))));
28449 }
28450
28451 if matches!(target, DialectType::Spark | DialectType::Databricks) {
28452 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
28453 let unit = Expression::Identifier(Identifier::new(unit_str));
28454 return Ok(Expression::Function(Box::new(Function::new(
28455 "DATEDIFF".to_string(),
28456 vec![unit, arg1, arg2],
28457 ))));
28458 }
28459
28460 if matches!(target, DialectType::Hive) {
28461 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
28462 match unit_str.as_str() {
28463 "MONTH" => {
28464 return Ok(Expression::Function(Box::new(Function::new(
28465 "CAST".to_string(),
28466 vec![Expression::Function(Box::new(Function::new(
28467 "MONTHS_BETWEEN".to_string(),
28468 vec![arg2, arg1],
28469 )))],
28470 ))));
28471 }
28472 "WEEK" => {
28473 return Ok(Expression::Cast(Box::new(Cast {
28474 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
28475 Expression::Function(Box::new(Function::new(
28476 "DATEDIFF".to_string(),
28477 vec![arg2, arg1],
28478 ))),
28479 Expression::Literal(Literal::Number("7".to_string())),
28480 ))),
28481 to: DataType::Int {
28482 length: None,
28483 integer_spelling: false,
28484 },
28485 trailing_comments: vec![],
28486 double_colon_syntax: false,
28487 format: None,
28488 default: None,
28489 inferred_type: None,
28490 })));
28491 }
28492 _ => {
28493 // Default: DATEDIFF(end, start) for DAY
28494 return Ok(Expression::Function(Box::new(Function::new(
28495 "DATEDIFF".to_string(),
28496 vec![arg2, arg1],
28497 ))));
28498 }
28499 }
28500 }
28501
28502 if matches!(
28503 target,
28504 DialectType::Presto | DialectType::Trino | DialectType::Athena
28505 ) {
28506 // Presto/Trino: DATE_DIFF('UNIT', start, end)
28507 return Ok(Expression::Function(Box::new(Function::new(
28508 "DATE_DIFF".to_string(),
28509 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
28510 ))));
28511 }
28512
28513 if matches!(target, DialectType::TSQL) {
28514 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
28515 let cast_d2 = Self::ensure_cast_datetime2(arg2);
28516 let unit = Expression::Identifier(Identifier::new(unit_str));
28517 return Ok(Expression::Function(Box::new(Function::new(
28518 "DATEDIFF".to_string(),
28519 vec![unit, arg1, cast_d2],
28520 ))));
28521 }
28522
28523 if matches!(target, DialectType::PostgreSQL) {
28524 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
28525 // For now, use DATEDIFF (passthrough) with uppercased unit
28526 let unit = Expression::Identifier(Identifier::new(unit_str));
28527 return Ok(Expression::Function(Box::new(Function::new(
28528 "DATEDIFF".to_string(),
28529 vec![unit, arg1, arg2],
28530 ))));
28531 }
28532
28533 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
28534 let unit = Expression::Identifier(Identifier::new(unit_str));
28535 Ok(Expression::Function(Box::new(Function::new(
28536 "DATEDIFF".to_string(),
28537 vec![unit, arg1, arg2],
28538 ))))
28539 }
28540
28541 // DATE_DIFF(date1, date2, unit) -> standard form
28542 "DATE_DIFF" if args.len() == 3 => {
28543 let date1 = args.remove(0);
28544 let date2 = args.remove(0);
28545 let unit_expr = args.remove(0);
28546 let unit_str = get_unit_str(&unit_expr);
28547
28548 if matches!(target, DialectType::BigQuery) {
28549 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
28550 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
28551 "WEEK".to_string()
28552 } else {
28553 unit_str
28554 };
28555 let norm_d1 = Self::date_literal_to_cast(date1);
28556 let norm_d2 = Self::date_literal_to_cast(date2);
28557 let unit = Expression::Identifier(Identifier::new(norm_unit));
28558 return Ok(Expression::Function(Box::new(Function::new(
28559 f.name,
28560 vec![norm_d1, norm_d2, unit],
28561 ))));
28562 }
28563
28564 if matches!(target, DialectType::MySQL) {
28565 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
28566 let norm_d1 = Self::date_literal_to_cast(date1);
28567 let norm_d2 = Self::date_literal_to_cast(date2);
28568 return Ok(Expression::Function(Box::new(Function::new(
28569 "DATEDIFF".to_string(),
28570 vec![norm_d1, norm_d2],
28571 ))));
28572 }
28573
28574 if matches!(target, DialectType::StarRocks) {
28575 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
28576 let norm_d1 = Self::date_literal_to_cast(date1);
28577 let norm_d2 = Self::date_literal_to_cast(date2);
28578 return Ok(Expression::Function(Box::new(Function::new(
28579 "DATE_DIFF".to_string(),
28580 vec![
28581 Expression::Literal(Literal::String(unit_str)),
28582 norm_d1,
28583 norm_d2,
28584 ],
28585 ))));
28586 }
28587
28588 if matches!(target, DialectType::DuckDB) {
28589 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
28590 let norm_d1 = Self::ensure_cast_date(date1);
28591 let norm_d2 = Self::ensure_cast_date(date2);
28592
28593 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
28594 let is_week_variant = unit_str == "WEEK"
28595 || unit_str.starts_with("WEEK(")
28596 || unit_str == "ISOWEEK";
28597 if is_week_variant {
28598 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
28599 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
28600 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
28601 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
28602 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
28603 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
28604 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
28605 Some("1") // Shift Sunday to Monday alignment
28606 } else if unit_str == "WEEK(SATURDAY)" {
28607 Some("-5")
28608 } else if unit_str == "WEEK(TUESDAY)" {
28609 Some("-1")
28610 } else if unit_str == "WEEK(WEDNESDAY)" {
28611 Some("-2")
28612 } else if unit_str == "WEEK(THURSDAY)" {
28613 Some("-3")
28614 } else if unit_str == "WEEK(FRIDAY)" {
28615 Some("-4")
28616 } else {
28617 Some("1") // default to Sunday
28618 };
28619
28620 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
28621 let shifted = if let Some(off) = offset {
28622 let interval =
28623 Expression::Interval(Box::new(crate::expressions::Interval {
28624 this: Some(Expression::Literal(Literal::String(
28625 off.to_string(),
28626 ))),
28627 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28628 unit: crate::expressions::IntervalUnit::Day,
28629 use_plural: false,
28630 }),
28631 }));
28632 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
28633 date, interval,
28634 )))
28635 } else {
28636 date
28637 };
28638 Expression::Function(Box::new(Function::new(
28639 "DATE_TRUNC".to_string(),
28640 vec![
28641 Expression::Literal(Literal::String("WEEK".to_string())),
28642 shifted,
28643 ],
28644 )))
28645 };
28646
28647 let trunc_d2 = make_trunc(norm_d2, day_offset);
28648 let trunc_d1 = make_trunc(norm_d1, day_offset);
28649 return Ok(Expression::Function(Box::new(Function::new(
28650 "DATE_DIFF".to_string(),
28651 vec![
28652 Expression::Literal(Literal::String("WEEK".to_string())),
28653 trunc_d2,
28654 trunc_d1,
28655 ],
28656 ))));
28657 }
28658
28659 return Ok(Expression::Function(Box::new(Function::new(
28660 "DATE_DIFF".to_string(),
28661 vec![
28662 Expression::Literal(Literal::String(unit_str)),
28663 norm_d2,
28664 norm_d1,
28665 ],
28666 ))));
28667 }
28668
28669 // Default: DATEDIFF(unit, date2, date1)
28670 let unit = Expression::Identifier(Identifier::new(unit_str));
28671 Ok(Expression::Function(Box::new(Function::new(
28672 "DATEDIFF".to_string(),
28673 vec![unit, date2, date1],
28674 ))))
28675 }
28676
28677 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
28678 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
28679 let ts = args.remove(0);
28680 let interval_expr = args.remove(0);
28681 let (val, unit) = Self::extract_interval_parts(&interval_expr);
28682
28683 match target {
28684 DialectType::Snowflake => {
28685 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
28686 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
28687 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
28688 let unit_str = Self::interval_unit_to_string(&unit);
28689 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
28690 Ok(Expression::TimestampAdd(Box::new(
28691 crate::expressions::TimestampAdd {
28692 this: Box::new(val),
28693 expression: Box::new(cast_ts),
28694 unit: Some(unit_str.to_string()),
28695 },
28696 )))
28697 }
28698 DialectType::Spark | DialectType::Databricks => {
28699 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
28700 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
28701 let interval =
28702 Expression::Interval(Box::new(crate::expressions::Interval {
28703 this: Some(val),
28704 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28705 unit,
28706 use_plural: false,
28707 }),
28708 }));
28709 Ok(Expression::Add(Box::new(
28710 crate::expressions::BinaryOp::new(ts, interval),
28711 )))
28712 } else if name == "DATETIME_ADD"
28713 && matches!(target, DialectType::Databricks)
28714 {
28715 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
28716 let unit_str = Self::interval_unit_to_string(&unit);
28717 Ok(Expression::Function(Box::new(Function::new(
28718 "TIMESTAMPADD".to_string(),
28719 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
28720 ))))
28721 } else {
28722 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
28723 let unit_str = Self::interval_unit_to_string(&unit);
28724 let cast_ts =
28725 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
28726 Self::maybe_cast_ts(ts)
28727 } else {
28728 ts
28729 };
28730 Ok(Expression::Function(Box::new(Function::new(
28731 "DATE_ADD".to_string(),
28732 vec![
28733 Expression::Identifier(Identifier::new(unit_str)),
28734 val,
28735 cast_ts,
28736 ],
28737 ))))
28738 }
28739 }
28740 DialectType::MySQL => {
28741 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
28742 let mysql_ts = if name.starts_with("TIMESTAMP") {
28743 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
28744 match &ts {
28745 Expression::Function(ref inner_f)
28746 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
28747 {
28748 // Already wrapped, keep as-is
28749 ts
28750 }
28751 _ => {
28752 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
28753 let unwrapped = match ts {
28754 Expression::Literal(Literal::Timestamp(s)) => {
28755 Expression::Literal(Literal::String(s))
28756 }
28757 other => other,
28758 };
28759 Expression::Function(Box::new(Function::new(
28760 "TIMESTAMP".to_string(),
28761 vec![unwrapped],
28762 )))
28763 }
28764 }
28765 } else {
28766 ts
28767 };
28768 Ok(Expression::DateAdd(Box::new(
28769 crate::expressions::DateAddFunc {
28770 this: mysql_ts,
28771 interval: val,
28772 unit,
28773 },
28774 )))
28775 }
28776 _ => {
28777 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
28778 let cast_ts = if matches!(target, DialectType::DuckDB) {
28779 if name == "DATETIME_ADD" {
28780 Self::ensure_cast_timestamp(ts)
28781 } else if name.starts_with("TIMESTAMP") {
28782 Self::maybe_cast_ts_to_tz(ts, &name)
28783 } else {
28784 ts
28785 }
28786 } else {
28787 ts
28788 };
28789 Ok(Expression::DateAdd(Box::new(
28790 crate::expressions::DateAddFunc {
28791 this: cast_ts,
28792 interval: val,
28793 unit,
28794 },
28795 )))
28796 }
28797 }
28798 }
28799
28800 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
28801 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
28802 let ts = args.remove(0);
28803 let interval_expr = args.remove(0);
28804 let (val, unit) = Self::extract_interval_parts(&interval_expr);
28805
28806 match target {
28807 DialectType::Snowflake => {
28808 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
28809 let unit_str = Self::interval_unit_to_string(&unit);
28810 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
28811 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
28812 val,
28813 Expression::Neg(Box::new(crate::expressions::UnaryOp {
28814 this: Expression::number(1),
28815 inferred_type: None,
28816 })),
28817 )));
28818 Ok(Expression::TimestampAdd(Box::new(
28819 crate::expressions::TimestampAdd {
28820 this: Box::new(neg_val),
28821 expression: Box::new(cast_ts),
28822 unit: Some(unit_str.to_string()),
28823 },
28824 )))
28825 }
28826 DialectType::Spark | DialectType::Databricks => {
28827 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
28828 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
28829 {
28830 // Spark: ts - INTERVAL val UNIT
28831 let cast_ts = if name.starts_with("TIMESTAMP") {
28832 Self::maybe_cast_ts(ts)
28833 } else {
28834 ts
28835 };
28836 let interval =
28837 Expression::Interval(Box::new(crate::expressions::Interval {
28838 this: Some(val),
28839 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28840 unit,
28841 use_plural: false,
28842 }),
28843 }));
28844 Ok(Expression::Sub(Box::new(
28845 crate::expressions::BinaryOp::new(cast_ts, interval),
28846 )))
28847 } else {
28848 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
28849 let unit_str = Self::interval_unit_to_string(&unit);
28850 let neg_val =
28851 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
28852 val,
28853 Expression::Neg(Box::new(crate::expressions::UnaryOp {
28854 this: Expression::number(1),
28855 inferred_type: None,
28856 })),
28857 )));
28858 Ok(Expression::Function(Box::new(Function::new(
28859 "TIMESTAMPADD".to_string(),
28860 vec![
28861 Expression::Identifier(Identifier::new(unit_str)),
28862 neg_val,
28863 ts,
28864 ],
28865 ))))
28866 }
28867 }
28868 DialectType::MySQL => {
28869 let mysql_ts = if name.starts_with("TIMESTAMP") {
28870 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
28871 match &ts {
28872 Expression::Function(ref inner_f)
28873 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
28874 {
28875 // Already wrapped, keep as-is
28876 ts
28877 }
28878 _ => {
28879 let unwrapped = match ts {
28880 Expression::Literal(Literal::Timestamp(s)) => {
28881 Expression::Literal(Literal::String(s))
28882 }
28883 other => other,
28884 };
28885 Expression::Function(Box::new(Function::new(
28886 "TIMESTAMP".to_string(),
28887 vec![unwrapped],
28888 )))
28889 }
28890 }
28891 } else {
28892 ts
28893 };
28894 Ok(Expression::DateSub(Box::new(
28895 crate::expressions::DateAddFunc {
28896 this: mysql_ts,
28897 interval: val,
28898 unit,
28899 },
28900 )))
28901 }
28902 _ => {
28903 let cast_ts = if matches!(target, DialectType::DuckDB) {
28904 if name == "DATETIME_SUB" {
28905 Self::ensure_cast_timestamp(ts)
28906 } else if name.starts_with("TIMESTAMP") {
28907 Self::maybe_cast_ts_to_tz(ts, &name)
28908 } else {
28909 ts
28910 }
28911 } else {
28912 ts
28913 };
28914 Ok(Expression::DateSub(Box::new(
28915 crate::expressions::DateAddFunc {
28916 this: cast_ts,
28917 interval: val,
28918 unit,
28919 },
28920 )))
28921 }
28922 }
28923 }
28924
28925 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
28926 "DATE_SUB" if args.len() == 2 => {
28927 let date = args.remove(0);
28928 let interval_expr = args.remove(0);
28929 let (val, unit) = Self::extract_interval_parts(&interval_expr);
28930
28931 match target {
28932 DialectType::Databricks | DialectType::Spark => {
28933 // Databricks/Spark: DATE_ADD(date, -val)
28934 // Use DateAdd expression with negative val so it generates correctly
28935 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
28936 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
28937 // Instead, we directly output as a simple negated DateSub
28938 Ok(Expression::DateSub(Box::new(
28939 crate::expressions::DateAddFunc {
28940 this: date,
28941 interval: val,
28942 unit,
28943 },
28944 )))
28945 }
28946 DialectType::DuckDB => {
28947 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
28948 let cast_date = Self::ensure_cast_date(date);
28949 let interval =
28950 Expression::Interval(Box::new(crate::expressions::Interval {
28951 this: Some(val),
28952 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28953 unit,
28954 use_plural: false,
28955 }),
28956 }));
28957 Ok(Expression::Sub(Box::new(
28958 crate::expressions::BinaryOp::new(cast_date, interval),
28959 )))
28960 }
28961 DialectType::Snowflake => {
28962 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
28963 // Just ensure the date is cast properly
28964 let cast_date = Self::ensure_cast_date(date);
28965 Ok(Expression::DateSub(Box::new(
28966 crate::expressions::DateAddFunc {
28967 this: cast_date,
28968 interval: val,
28969 unit,
28970 },
28971 )))
28972 }
28973 DialectType::PostgreSQL => {
28974 // PostgreSQL: date - INTERVAL 'val UNIT'
28975 let unit_str = Self::interval_unit_to_string(&unit);
28976 let interval =
28977 Expression::Interval(Box::new(crate::expressions::Interval {
28978 this: Some(Expression::Literal(Literal::String(format!(
28979 "{} {}",
28980 Self::expr_to_string(&val),
28981 unit_str
28982 )))),
28983 unit: None,
28984 }));
28985 Ok(Expression::Sub(Box::new(
28986 crate::expressions::BinaryOp::new(date, interval),
28987 )))
28988 }
28989 _ => Ok(Expression::DateSub(Box::new(
28990 crate::expressions::DateAddFunc {
28991 this: date,
28992 interval: val,
28993 unit,
28994 },
28995 ))),
28996 }
28997 }
28998
28999 // DATEADD(unit, val, date) -> target-specific form
29000 // Used by: Redshift, Snowflake, TSQL, ClickHouse
29001 "DATEADD" if args.len() == 3 => {
29002 let arg0 = args.remove(0);
29003 let arg1 = args.remove(0);
29004 let arg2 = args.remove(0);
29005 let unit_str = get_unit_str(&arg0);
29006
29007 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
29008 // Keep DATEADD(UNIT, val, date) with uppercased unit
29009 let unit = Expression::Identifier(Identifier::new(unit_str));
29010 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
29011 let date = if matches!(target, DialectType::TSQL)
29012 && !matches!(
29013 source,
29014 DialectType::Spark | DialectType::Databricks | DialectType::Hive
29015 ) {
29016 Self::ensure_cast_datetime2(arg2)
29017 } else {
29018 arg2
29019 };
29020 return Ok(Expression::Function(Box::new(Function::new(
29021 "DATEADD".to_string(),
29022 vec![unit, arg1, date],
29023 ))));
29024 }
29025
29026 if matches!(target, DialectType::DuckDB) {
29027 // DuckDB: date + INTERVAL 'val' UNIT
29028 let iu = parse_interval_unit(&unit_str);
29029 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
29030 this: Some(arg1),
29031 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29032 unit: iu,
29033 use_plural: false,
29034 }),
29035 }));
29036 let cast_date = Self::ensure_cast_timestamp(arg2);
29037 return Ok(Expression::Add(Box::new(
29038 crate::expressions::BinaryOp::new(cast_date, interval),
29039 )));
29040 }
29041
29042 if matches!(target, DialectType::BigQuery) {
29043 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
29044 let iu = parse_interval_unit(&unit_str);
29045 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
29046 this: Some(arg1),
29047 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29048 unit: iu,
29049 use_plural: false,
29050 }),
29051 }));
29052 return Ok(Expression::Function(Box::new(Function::new(
29053 "DATE_ADD".to_string(),
29054 vec![arg2, interval],
29055 ))));
29056 }
29057
29058 if matches!(target, DialectType::Databricks) {
29059 // Databricks: keep DATEADD(UNIT, val, date) format
29060 let unit = Expression::Identifier(Identifier::new(unit_str));
29061 return Ok(Expression::Function(Box::new(Function::new(
29062 "DATEADD".to_string(),
29063 vec![unit, arg1, arg2],
29064 ))));
29065 }
29066
29067 if matches!(target, DialectType::Spark) {
29068 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
29069 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
29070 if let Expression::Literal(crate::expressions::Literal::Number(n)) = &expr {
29071 if let Ok(val) = n.parse::<i64>() {
29072 return Expression::Literal(crate::expressions::Literal::Number(
29073 (val * factor).to_string(),
29074 ));
29075 }
29076 }
29077 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
29078 expr,
29079 Expression::Literal(crate::expressions::Literal::Number(
29080 factor.to_string(),
29081 )),
29082 )))
29083 }
29084 match unit_str.as_str() {
29085 "YEAR" => {
29086 let months = multiply_expr_dateadd(arg1, 12);
29087 return Ok(Expression::Function(Box::new(Function::new(
29088 "ADD_MONTHS".to_string(),
29089 vec![arg2, months],
29090 ))));
29091 }
29092 "QUARTER" => {
29093 let months = multiply_expr_dateadd(arg1, 3);
29094 return Ok(Expression::Function(Box::new(Function::new(
29095 "ADD_MONTHS".to_string(),
29096 vec![arg2, months],
29097 ))));
29098 }
29099 "MONTH" => {
29100 return Ok(Expression::Function(Box::new(Function::new(
29101 "ADD_MONTHS".to_string(),
29102 vec![arg2, arg1],
29103 ))));
29104 }
29105 "WEEK" => {
29106 let days = multiply_expr_dateadd(arg1, 7);
29107 return Ok(Expression::Function(Box::new(Function::new(
29108 "DATE_ADD".to_string(),
29109 vec![arg2, days],
29110 ))));
29111 }
29112 "DAY" => {
29113 return Ok(Expression::Function(Box::new(Function::new(
29114 "DATE_ADD".to_string(),
29115 vec![arg2, arg1],
29116 ))));
29117 }
29118 _ => {
29119 let unit = Expression::Identifier(Identifier::new(unit_str));
29120 return Ok(Expression::Function(Box::new(Function::new(
29121 "DATE_ADD".to_string(),
29122 vec![unit, arg1, arg2],
29123 ))));
29124 }
29125 }
29126 }
29127
29128 if matches!(target, DialectType::Hive) {
29129 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
29130 match unit_str.as_str() {
29131 "DAY" => {
29132 return Ok(Expression::Function(Box::new(Function::new(
29133 "DATE_ADD".to_string(),
29134 vec![arg2, arg1],
29135 ))));
29136 }
29137 "MONTH" => {
29138 return Ok(Expression::Function(Box::new(Function::new(
29139 "ADD_MONTHS".to_string(),
29140 vec![arg2, arg1],
29141 ))));
29142 }
29143 _ => {
29144 let iu = parse_interval_unit(&unit_str);
29145 let interval =
29146 Expression::Interval(Box::new(crate::expressions::Interval {
29147 this: Some(arg1),
29148 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29149 unit: iu,
29150 use_plural: false,
29151 }),
29152 }));
29153 return Ok(Expression::Add(Box::new(
29154 crate::expressions::BinaryOp::new(arg2, interval),
29155 )));
29156 }
29157 }
29158 }
29159
29160 if matches!(target, DialectType::PostgreSQL) {
29161 // PostgreSQL: date + INTERVAL 'val UNIT'
29162 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
29163 this: Some(Expression::Literal(Literal::String(format!(
29164 "{} {}",
29165 Self::expr_to_string(&arg1),
29166 unit_str
29167 )))),
29168 unit: None,
29169 }));
29170 return Ok(Expression::Add(Box::new(
29171 crate::expressions::BinaryOp::new(arg2, interval),
29172 )));
29173 }
29174
29175 if matches!(
29176 target,
29177 DialectType::Presto | DialectType::Trino | DialectType::Athena
29178 ) {
29179 // Presto/Trino: DATE_ADD('UNIT', val, date)
29180 return Ok(Expression::Function(Box::new(Function::new(
29181 "DATE_ADD".to_string(),
29182 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
29183 ))));
29184 }
29185
29186 if matches!(target, DialectType::ClickHouse) {
29187 // ClickHouse: DATE_ADD(UNIT, val, date)
29188 let unit = Expression::Identifier(Identifier::new(unit_str));
29189 return Ok(Expression::Function(Box::new(Function::new(
29190 "DATE_ADD".to_string(),
29191 vec![unit, arg1, arg2],
29192 ))));
29193 }
29194
29195 // Default: keep DATEADD with uppercased unit
29196 let unit = Expression::Identifier(Identifier::new(unit_str));
29197 Ok(Expression::Function(Box::new(Function::new(
29198 "DATEADD".to_string(),
29199 vec![unit, arg1, arg2],
29200 ))))
29201 }
29202
29203 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
29204 "DATE_ADD" if args.len() == 3 => {
29205 let arg0 = args.remove(0);
29206 let arg1 = args.remove(0);
29207 let arg2 = args.remove(0);
29208 let unit_str = get_unit_str(&arg0);
29209
29210 if matches!(
29211 target,
29212 DialectType::Presto | DialectType::Trino | DialectType::Athena
29213 ) {
29214 // Presto/Trino: DATE_ADD('UNIT', val, date)
29215 return Ok(Expression::Function(Box::new(Function::new(
29216 "DATE_ADD".to_string(),
29217 vec![Expression::Literal(Literal::String(unit_str)), arg1, arg2],
29218 ))));
29219 }
29220
29221 if matches!(
29222 target,
29223 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
29224 ) {
29225 // DATEADD(UNIT, val, date)
29226 let unit = Expression::Identifier(Identifier::new(unit_str));
29227 let date = if matches!(target, DialectType::TSQL) {
29228 Self::ensure_cast_datetime2(arg2)
29229 } else {
29230 arg2
29231 };
29232 return Ok(Expression::Function(Box::new(Function::new(
29233 "DATEADD".to_string(),
29234 vec![unit, arg1, date],
29235 ))));
29236 }
29237
29238 if matches!(target, DialectType::DuckDB) {
29239 // DuckDB: date + INTERVAL val UNIT
29240 let iu = parse_interval_unit(&unit_str);
29241 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
29242 this: Some(arg1),
29243 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29244 unit: iu,
29245 use_plural: false,
29246 }),
29247 }));
29248 return Ok(Expression::Add(Box::new(
29249 crate::expressions::BinaryOp::new(arg2, interval),
29250 )));
29251 }
29252
29253 if matches!(target, DialectType::Spark | DialectType::Databricks) {
29254 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
29255 let unit = Expression::Identifier(Identifier::new(unit_str));
29256 return Ok(Expression::Function(Box::new(Function::new(
29257 "DATE_ADD".to_string(),
29258 vec![unit, arg1, arg2],
29259 ))));
29260 }
29261
29262 // Default: DATE_ADD(UNIT, val, date)
29263 let unit = Expression::Identifier(Identifier::new(unit_str));
29264 Ok(Expression::Function(Box::new(Function::new(
29265 "DATE_ADD".to_string(),
29266 vec![unit, arg1, arg2],
29267 ))))
29268 }
29269
29270 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
29271 "DATE_ADD" if args.len() == 2 => {
29272 let date = args.remove(0);
29273 let interval_expr = args.remove(0);
29274 let (val, unit) = Self::extract_interval_parts(&interval_expr);
29275 let unit_str = Self::interval_unit_to_string(&unit);
29276
29277 match target {
29278 DialectType::DuckDB => {
29279 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
29280 let cast_date = Self::ensure_cast_date(date);
29281 let quoted_val = Self::quote_interval_val(&val);
29282 let interval =
29283 Expression::Interval(Box::new(crate::expressions::Interval {
29284 this: Some(quoted_val),
29285 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29286 unit,
29287 use_plural: false,
29288 }),
29289 }));
29290 Ok(Expression::Add(Box::new(
29291 crate::expressions::BinaryOp::new(cast_date, interval),
29292 )))
29293 }
29294 DialectType::PostgreSQL => {
29295 // PostgreSQL: date + INTERVAL 'val UNIT'
29296 let interval =
29297 Expression::Interval(Box::new(crate::expressions::Interval {
29298 this: Some(Expression::Literal(Literal::String(format!(
29299 "{} {}",
29300 Self::expr_to_string(&val),
29301 unit_str
29302 )))),
29303 unit: None,
29304 }));
29305 Ok(Expression::Add(Box::new(
29306 crate::expressions::BinaryOp::new(date, interval),
29307 )))
29308 }
29309 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29310 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
29311 let val_str = Self::expr_to_string(&val);
29312 Ok(Expression::Function(Box::new(Function::new(
29313 "DATE_ADD".to_string(),
29314 vec![
29315 Expression::Literal(Literal::String(unit_str.to_string())),
29316 Expression::Cast(Box::new(Cast {
29317 this: Expression::Literal(Literal::String(val_str)),
29318 to: DataType::BigInt { length: None },
29319 trailing_comments: vec![],
29320 double_colon_syntax: false,
29321 format: None,
29322 default: None,
29323 inferred_type: None,
29324 })),
29325 date,
29326 ],
29327 ))))
29328 }
29329 DialectType::Spark | DialectType::Hive => {
29330 // Spark/Hive: DATE_ADD(date, val) for DAY
29331 match unit_str {
29332 "DAY" => Ok(Expression::Function(Box::new(Function::new(
29333 "DATE_ADD".to_string(),
29334 vec![date, val],
29335 )))),
29336 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
29337 "ADD_MONTHS".to_string(),
29338 vec![date, val],
29339 )))),
29340 _ => {
29341 let iu = parse_interval_unit(&unit_str);
29342 let interval =
29343 Expression::Interval(Box::new(crate::expressions::Interval {
29344 this: Some(val),
29345 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29346 unit: iu,
29347 use_plural: false,
29348 }),
29349 }));
29350 Ok(Expression::Function(Box::new(Function::new(
29351 "DATE_ADD".to_string(),
29352 vec![date, interval],
29353 ))))
29354 }
29355 }
29356 }
29357 DialectType::Snowflake => {
29358 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
29359 let cast_date = Self::ensure_cast_date(date);
29360 let val_str = Self::expr_to_string(&val);
29361 Ok(Expression::Function(Box::new(Function::new(
29362 "DATEADD".to_string(),
29363 vec![
29364 Expression::Identifier(Identifier::new(unit_str)),
29365 Expression::Literal(Literal::String(val_str)),
29366 cast_date,
29367 ],
29368 ))))
29369 }
29370 DialectType::TSQL | DialectType::Fabric => {
29371 let cast_date = Self::ensure_cast_datetime2(date);
29372 Ok(Expression::Function(Box::new(Function::new(
29373 "DATEADD".to_string(),
29374 vec![
29375 Expression::Identifier(Identifier::new(unit_str)),
29376 val,
29377 cast_date,
29378 ],
29379 ))))
29380 }
29381 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
29382 "DATEADD".to_string(),
29383 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
29384 )))),
29385 DialectType::MySQL => {
29386 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
29387 let quoted_val = Self::quote_interval_val(&val);
29388 let iu = parse_interval_unit(&unit_str);
29389 let interval =
29390 Expression::Interval(Box::new(crate::expressions::Interval {
29391 this: Some(quoted_val),
29392 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29393 unit: iu,
29394 use_plural: false,
29395 }),
29396 }));
29397 Ok(Expression::Function(Box::new(Function::new(
29398 "DATE_ADD".to_string(),
29399 vec![date, interval],
29400 ))))
29401 }
29402 DialectType::BigQuery => {
29403 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
29404 let quoted_val = Self::quote_interval_val(&val);
29405 let iu = parse_interval_unit(&unit_str);
29406 let interval =
29407 Expression::Interval(Box::new(crate::expressions::Interval {
29408 this: Some(quoted_val),
29409 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29410 unit: iu,
29411 use_plural: false,
29412 }),
29413 }));
29414 Ok(Expression::Function(Box::new(Function::new(
29415 "DATE_ADD".to_string(),
29416 vec![date, interval],
29417 ))))
29418 }
29419 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
29420 "DATEADD".to_string(),
29421 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
29422 )))),
29423 _ => {
29424 // Default: keep as DATE_ADD with decomposed interval
29425 Ok(Expression::DateAdd(Box::new(
29426 crate::expressions::DateAddFunc {
29427 this: date,
29428 interval: val,
29429 unit,
29430 },
29431 )))
29432 }
29433 }
29434 }
29435
29436 // ADD_MONTHS(date, val) -> target-specific form
29437 "ADD_MONTHS" if args.len() == 2 => {
29438 let date = args.remove(0);
29439 let val = args.remove(0);
29440
29441 if matches!(target, DialectType::TSQL) {
29442 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
29443 let cast_date = Self::ensure_cast_datetime2(date);
29444 return Ok(Expression::Function(Box::new(Function::new(
29445 "DATEADD".to_string(),
29446 vec![
29447 Expression::Identifier(Identifier::new("MONTH")),
29448 val,
29449 cast_date,
29450 ],
29451 ))));
29452 }
29453
29454 if matches!(target, DialectType::DuckDB) {
29455 // DuckDB: date + INTERVAL val MONTH
29456 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
29457 this: Some(val),
29458 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29459 unit: crate::expressions::IntervalUnit::Month,
29460 use_plural: false,
29461 }),
29462 }));
29463 return Ok(Expression::Add(Box::new(
29464 crate::expressions::BinaryOp::new(date, interval),
29465 )));
29466 }
29467
29468 if matches!(target, DialectType::Snowflake) {
29469 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
29470 if matches!(source, DialectType::Snowflake) {
29471 return Ok(Expression::Function(Box::new(Function::new(
29472 "ADD_MONTHS".to_string(),
29473 vec![date, val],
29474 ))));
29475 }
29476 return Ok(Expression::Function(Box::new(Function::new(
29477 "DATEADD".to_string(),
29478 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
29479 ))));
29480 }
29481
29482 if matches!(target, DialectType::Spark | DialectType::Databricks) {
29483 // Spark: ADD_MONTHS(date, val) - keep as is
29484 return Ok(Expression::Function(Box::new(Function::new(
29485 "ADD_MONTHS".to_string(),
29486 vec![date, val],
29487 ))));
29488 }
29489
29490 if matches!(target, DialectType::Hive) {
29491 return Ok(Expression::Function(Box::new(Function::new(
29492 "ADD_MONTHS".to_string(),
29493 vec![date, val],
29494 ))));
29495 }
29496
29497 if matches!(
29498 target,
29499 DialectType::Presto | DialectType::Trino | DialectType::Athena
29500 ) {
29501 // Presto: DATE_ADD('MONTH', val, date)
29502 return Ok(Expression::Function(Box::new(Function::new(
29503 "DATE_ADD".to_string(),
29504 vec![
29505 Expression::Literal(Literal::String("MONTH".to_string())),
29506 val,
29507 date,
29508 ],
29509 ))));
29510 }
29511
29512 // Default: keep ADD_MONTHS
29513 Ok(Expression::Function(Box::new(Function::new(
29514 "ADD_MONTHS".to_string(),
29515 vec![date, val],
29516 ))))
29517 }
29518
29519 // SAFE_DIVIDE(x, y) -> target-specific form directly
29520 "SAFE_DIVIDE" if args.len() == 2 => {
29521 let x = args.remove(0);
29522 let y = args.remove(0);
29523 // Wrap x and y in parens if they're complex expressions
29524 let y_ref = match &y {
29525 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
29526 y.clone()
29527 }
29528 _ => Expression::Paren(Box::new(Paren {
29529 this: y.clone(),
29530 trailing_comments: vec![],
29531 })),
29532 };
29533 let x_ref = match &x {
29534 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
29535 x.clone()
29536 }
29537 _ => Expression::Paren(Box::new(Paren {
29538 this: x.clone(),
29539 trailing_comments: vec![],
29540 })),
29541 };
29542 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
29543 y_ref.clone(),
29544 Expression::number(0),
29545 )));
29546 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
29547 x_ref.clone(),
29548 y_ref.clone(),
29549 )));
29550
29551 match target {
29552 DialectType::DuckDB | DialectType::PostgreSQL => {
29553 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
29554 let result_div = if matches!(target, DialectType::PostgreSQL) {
29555 let cast_x = Expression::Cast(Box::new(Cast {
29556 this: x_ref,
29557 to: DataType::Custom {
29558 name: "DOUBLE PRECISION".to_string(),
29559 },
29560 trailing_comments: vec![],
29561 double_colon_syntax: false,
29562 format: None,
29563 default: None,
29564 inferred_type: None,
29565 }));
29566 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
29567 cast_x, y_ref,
29568 )))
29569 } else {
29570 div_expr
29571 };
29572 Ok(Expression::Case(Box::new(crate::expressions::Case {
29573 operand: None,
29574 whens: vec![(condition, result_div)],
29575 else_: Some(Expression::Null(crate::expressions::Null)),
29576 comments: Vec::new(),
29577 inferred_type: None,
29578 })))
29579 }
29580 DialectType::Snowflake => {
29581 // IFF(y <> 0, x / y, NULL)
29582 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
29583 condition,
29584 true_value: div_expr,
29585 false_value: Some(Expression::Null(crate::expressions::Null)),
29586 original_name: Some("IFF".to_string()),
29587 inferred_type: None,
29588 })))
29589 }
29590 DialectType::Presto | DialectType::Trino => {
29591 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
29592 let cast_x = Expression::Cast(Box::new(Cast {
29593 this: x_ref,
29594 to: DataType::Double {
29595 precision: None,
29596 scale: None,
29597 },
29598 trailing_comments: vec![],
29599 double_colon_syntax: false,
29600 format: None,
29601 default: None,
29602 inferred_type: None,
29603 }));
29604 let cast_div = Expression::Div(Box::new(
29605 crate::expressions::BinaryOp::new(cast_x, y_ref),
29606 ));
29607 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
29608 condition,
29609 true_value: cast_div,
29610 false_value: Some(Expression::Null(crate::expressions::Null)),
29611 original_name: None,
29612 inferred_type: None,
29613 })))
29614 }
29615 _ => {
29616 // IF(y <> 0, x / y, NULL)
29617 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
29618 condition,
29619 true_value: div_expr,
29620 false_value: Some(Expression::Null(crate::expressions::Null)),
29621 original_name: None,
29622 inferred_type: None,
29623 })))
29624 }
29625 }
29626 }
29627
29628 // GENERATE_UUID() -> UUID() with CAST to string
29629 "GENERATE_UUID" => {
29630 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
29631 this: None,
29632 name: None,
29633 is_string: None,
29634 }));
29635 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
29636 let cast_type = match target {
29637 DialectType::DuckDB => Some(DataType::Text),
29638 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
29639 length: None,
29640 parenthesized_length: false,
29641 }),
29642 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
29643 Some(DataType::String { length: None })
29644 }
29645 _ => None,
29646 };
29647 if let Some(dt) = cast_type {
29648 Ok(Expression::Cast(Box::new(Cast {
29649 this: uuid_expr,
29650 to: dt,
29651 trailing_comments: vec![],
29652 double_colon_syntax: false,
29653 format: None,
29654 default: None,
29655 inferred_type: None,
29656 })))
29657 } else {
29658 Ok(uuid_expr)
29659 }
29660 }
29661
29662 // COUNTIF(x) -> CountIf expression
29663 "COUNTIF" if args.len() == 1 => {
29664 let arg = args.remove(0);
29665 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
29666 this: arg,
29667 distinct: false,
29668 filter: None,
29669 order_by: vec![],
29670 name: None,
29671 ignore_nulls: None,
29672 having_max: None,
29673 limit: None,
29674 inferred_type: None,
29675 })))
29676 }
29677
29678 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
29679 "EDIT_DISTANCE" => {
29680 // Strip named arguments (max_distance => N) and pass as positional
29681 let mut positional_args: Vec<Expression> = vec![];
29682 for arg in args {
29683 match arg {
29684 Expression::NamedArgument(na) => {
29685 positional_args.push(na.value);
29686 }
29687 other => positional_args.push(other),
29688 }
29689 }
29690 if positional_args.len() >= 2 {
29691 let col1 = positional_args.remove(0);
29692 let col2 = positional_args.remove(0);
29693 let levenshtein = crate::expressions::BinaryFunc {
29694 this: col1,
29695 expression: col2,
29696 original_name: None,
29697 inferred_type: None,
29698 };
29699 // Pass extra args through a function wrapper with all args
29700 if !positional_args.is_empty() {
29701 let max_dist = positional_args.remove(0);
29702 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
29703 if matches!(target, DialectType::DuckDB) {
29704 let lev = Expression::Function(Box::new(Function::new(
29705 "LEVENSHTEIN".to_string(),
29706 vec![levenshtein.this, levenshtein.expression],
29707 )));
29708 let lev_is_null =
29709 Expression::IsNull(Box::new(crate::expressions::IsNull {
29710 this: lev.clone(),
29711 not: false,
29712 postfix_form: false,
29713 }));
29714 let max_is_null =
29715 Expression::IsNull(Box::new(crate::expressions::IsNull {
29716 this: max_dist.clone(),
29717 not: false,
29718 postfix_form: false,
29719 }));
29720 let null_check =
29721 Expression::Or(Box::new(crate::expressions::BinaryOp {
29722 left: lev_is_null,
29723 right: max_is_null,
29724 left_comments: Vec::new(),
29725 operator_comments: Vec::new(),
29726 trailing_comments: Vec::new(),
29727 inferred_type: None,
29728 }));
29729 let least =
29730 Expression::Least(Box::new(crate::expressions::VarArgFunc {
29731 expressions: vec![lev, max_dist],
29732 original_name: None,
29733 inferred_type: None,
29734 }));
29735 return Ok(Expression::Case(Box::new(crate::expressions::Case {
29736 operand: None,
29737 whens: vec![(
29738 null_check,
29739 Expression::Null(crate::expressions::Null),
29740 )],
29741 else_: Some(least),
29742 comments: Vec::new(),
29743 inferred_type: None,
29744 })));
29745 }
29746 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
29747 all_args.extend(positional_args);
29748 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
29749 let func_name = if matches!(target, DialectType::PostgreSQL) {
29750 "LEVENSHTEIN_LESS_EQUAL"
29751 } else {
29752 "LEVENSHTEIN"
29753 };
29754 return Ok(Expression::Function(Box::new(Function::new(
29755 func_name.to_string(),
29756 all_args,
29757 ))));
29758 }
29759 Ok(Expression::Levenshtein(Box::new(levenshtein)))
29760 } else {
29761 Ok(Expression::Function(Box::new(Function::new(
29762 "EDIT_DISTANCE".to_string(),
29763 positional_args,
29764 ))))
29765 }
29766 }
29767
29768 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
29769 "TIMESTAMP_SECONDS" if args.len() == 1 => {
29770 let arg = args.remove(0);
29771 Ok(Expression::UnixToTime(Box::new(
29772 crate::expressions::UnixToTime {
29773 this: Box::new(arg),
29774 scale: Some(0),
29775 zone: None,
29776 hours: None,
29777 minutes: None,
29778 format: None,
29779 target_type: None,
29780 },
29781 )))
29782 }
29783
29784 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
29785 "TIMESTAMP_MILLIS" if args.len() == 1 => {
29786 let arg = args.remove(0);
29787 Ok(Expression::UnixToTime(Box::new(
29788 crate::expressions::UnixToTime {
29789 this: Box::new(arg),
29790 scale: Some(3),
29791 zone: None,
29792 hours: None,
29793 minutes: None,
29794 format: None,
29795 target_type: None,
29796 },
29797 )))
29798 }
29799
29800 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
29801 "TIMESTAMP_MICROS" if args.len() == 1 => {
29802 let arg = args.remove(0);
29803 Ok(Expression::UnixToTime(Box::new(
29804 crate::expressions::UnixToTime {
29805 this: Box::new(arg),
29806 scale: Some(6),
29807 zone: None,
29808 hours: None,
29809 minutes: None,
29810 format: None,
29811 target_type: None,
29812 },
29813 )))
29814 }
29815
29816 // DIV(x, y) -> IntDiv expression
29817 "DIV" if args.len() == 2 => {
29818 let x = args.remove(0);
29819 let y = args.remove(0);
29820 Ok(Expression::IntDiv(Box::new(
29821 crate::expressions::BinaryFunc {
29822 this: x,
29823 expression: y,
29824 original_name: None,
29825 inferred_type: None,
29826 },
29827 )))
29828 }
29829
29830 // TO_HEX(x) -> target-specific form
29831 "TO_HEX" if args.len() == 1 => {
29832 let arg = args.remove(0);
29833 // Check if inner function already returns hex string in certain targets
29834 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
29835 if matches!(target, DialectType::BigQuery) {
29836 // BQ->BQ: keep as TO_HEX
29837 Ok(Expression::Function(Box::new(Function::new(
29838 "TO_HEX".to_string(),
29839 vec![arg],
29840 ))))
29841 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
29842 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
29843 Ok(arg)
29844 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
29845 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
29846 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
29847 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
29848 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
29849 if let Expression::Function(ref inner_f) = arg {
29850 let inner_args = inner_f.args.clone();
29851 let binary_func = match inner_f.name.to_ascii_uppercase().as_str() {
29852 "SHA1" => Expression::Function(Box::new(Function::new(
29853 "SHA1_BINARY".to_string(),
29854 inner_args,
29855 ))),
29856 "MD5" => Expression::Function(Box::new(Function::new(
29857 "MD5_BINARY".to_string(),
29858 inner_args,
29859 ))),
29860 "SHA256" => {
29861 let mut a = inner_args;
29862 a.push(Expression::number(256));
29863 Expression::Function(Box::new(Function::new(
29864 "SHA2_BINARY".to_string(),
29865 a,
29866 )))
29867 }
29868 "SHA512" => {
29869 let mut a = inner_args;
29870 a.push(Expression::number(512));
29871 Expression::Function(Box::new(Function::new(
29872 "SHA2_BINARY".to_string(),
29873 a,
29874 )))
29875 }
29876 _ => arg.clone(),
29877 };
29878 Ok(Expression::Function(Box::new(Function::new(
29879 "TO_CHAR".to_string(),
29880 vec![binary_func],
29881 ))))
29882 } else {
29883 let inner = Expression::Function(Box::new(Function::new(
29884 "HEX".to_string(),
29885 vec![arg],
29886 )));
29887 Ok(Expression::Lower(Box::new(
29888 crate::expressions::UnaryFunc::new(inner),
29889 )))
29890 }
29891 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
29892 let inner = Expression::Function(Box::new(Function::new(
29893 "TO_HEX".to_string(),
29894 vec![arg],
29895 )));
29896 Ok(Expression::Lower(Box::new(
29897 crate::expressions::UnaryFunc::new(inner),
29898 )))
29899 } else {
29900 let inner =
29901 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
29902 Ok(Expression::Lower(Box::new(
29903 crate::expressions::UnaryFunc::new(inner),
29904 )))
29905 }
29906 }
29907
29908 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
29909 "LAST_DAY" if args.len() == 2 => {
29910 let date = args.remove(0);
29911 let _unit = args.remove(0); // Strip the unit (MONTH is default)
29912 Ok(Expression::Function(Box::new(Function::new(
29913 "LAST_DAY".to_string(),
29914 vec![date],
29915 ))))
29916 }
29917
29918 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
29919 "GENERATE_ARRAY" => {
29920 let start = args.get(0).cloned();
29921 let end = args.get(1).cloned();
29922 let step = args.get(2).cloned();
29923 Ok(Expression::GenerateSeries(Box::new(
29924 crate::expressions::GenerateSeries {
29925 start: start.map(Box::new),
29926 end: end.map(Box::new),
29927 step: step.map(Box::new),
29928 is_end_exclusive: None,
29929 },
29930 )))
29931 }
29932
29933 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
29934 "GENERATE_TIMESTAMP_ARRAY" => {
29935 let start = args.get(0).cloned();
29936 let end = args.get(1).cloned();
29937 let step = args.get(2).cloned();
29938
29939 if matches!(target, DialectType::DuckDB) {
29940 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
29941 // Only cast string literals - leave columns/expressions as-is
29942 let maybe_cast_ts = |expr: Expression| -> Expression {
29943 if matches!(&expr, Expression::Literal(Literal::String(_))) {
29944 Expression::Cast(Box::new(Cast {
29945 this: expr,
29946 to: DataType::Timestamp {
29947 precision: None,
29948 timezone: false,
29949 },
29950 trailing_comments: vec![],
29951 double_colon_syntax: false,
29952 format: None,
29953 default: None,
29954 inferred_type: None,
29955 }))
29956 } else {
29957 expr
29958 }
29959 };
29960 let cast_start = start.map(maybe_cast_ts);
29961 let cast_end = end.map(maybe_cast_ts);
29962 Ok(Expression::GenerateSeries(Box::new(
29963 crate::expressions::GenerateSeries {
29964 start: cast_start.map(Box::new),
29965 end: cast_end.map(Box::new),
29966 step: step.map(Box::new),
29967 is_end_exclusive: None,
29968 },
29969 )))
29970 } else {
29971 Ok(Expression::GenerateSeries(Box::new(
29972 crate::expressions::GenerateSeries {
29973 start: start.map(Box::new),
29974 end: end.map(Box::new),
29975 step: step.map(Box::new),
29976 is_end_exclusive: None,
29977 },
29978 )))
29979 }
29980 }
29981
29982 // TO_JSON(x) -> target-specific (from Spark/Hive)
29983 "TO_JSON" => {
29984 match target {
29985 DialectType::Presto | DialectType::Trino => {
29986 // JSON_FORMAT(CAST(x AS JSON))
29987 let arg = args
29988 .into_iter()
29989 .next()
29990 .unwrap_or(Expression::Null(crate::expressions::Null));
29991 let cast_json = Expression::Cast(Box::new(Cast {
29992 this: arg,
29993 to: DataType::Custom {
29994 name: "JSON".to_string(),
29995 },
29996 trailing_comments: vec![],
29997 double_colon_syntax: false,
29998 format: None,
29999 default: None,
30000 inferred_type: None,
30001 }));
30002 Ok(Expression::Function(Box::new(Function::new(
30003 "JSON_FORMAT".to_string(),
30004 vec![cast_json],
30005 ))))
30006 }
30007 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
30008 "TO_JSON_STRING".to_string(),
30009 args,
30010 )))),
30011 DialectType::DuckDB => {
30012 // CAST(TO_JSON(x) AS TEXT)
30013 let arg = args
30014 .into_iter()
30015 .next()
30016 .unwrap_or(Expression::Null(crate::expressions::Null));
30017 let to_json = Expression::Function(Box::new(Function::new(
30018 "TO_JSON".to_string(),
30019 vec![arg],
30020 )));
30021 Ok(Expression::Cast(Box::new(Cast {
30022 this: to_json,
30023 to: DataType::Text,
30024 trailing_comments: vec![],
30025 double_colon_syntax: false,
30026 format: None,
30027 default: None,
30028 inferred_type: None,
30029 })))
30030 }
30031 _ => Ok(Expression::Function(Box::new(Function::new(
30032 "TO_JSON".to_string(),
30033 args,
30034 )))),
30035 }
30036 }
30037
30038 // TO_JSON_STRING(x) -> target-specific
30039 "TO_JSON_STRING" => {
30040 match target {
30041 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
30042 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
30043 ),
30044 DialectType::Presto | DialectType::Trino => {
30045 // JSON_FORMAT(CAST(x AS JSON))
30046 let arg = args
30047 .into_iter()
30048 .next()
30049 .unwrap_or(Expression::Null(crate::expressions::Null));
30050 let cast_json = Expression::Cast(Box::new(Cast {
30051 this: arg,
30052 to: DataType::Custom {
30053 name: "JSON".to_string(),
30054 },
30055 trailing_comments: vec![],
30056 double_colon_syntax: false,
30057 format: None,
30058 default: None,
30059 inferred_type: None,
30060 }));
30061 Ok(Expression::Function(Box::new(Function::new(
30062 "JSON_FORMAT".to_string(),
30063 vec![cast_json],
30064 ))))
30065 }
30066 DialectType::DuckDB => {
30067 // CAST(TO_JSON(x) AS TEXT)
30068 let arg = args
30069 .into_iter()
30070 .next()
30071 .unwrap_or(Expression::Null(crate::expressions::Null));
30072 let to_json = Expression::Function(Box::new(Function::new(
30073 "TO_JSON".to_string(),
30074 vec![arg],
30075 )));
30076 Ok(Expression::Cast(Box::new(Cast {
30077 this: to_json,
30078 to: DataType::Text,
30079 trailing_comments: vec![],
30080 double_colon_syntax: false,
30081 format: None,
30082 default: None,
30083 inferred_type: None,
30084 })))
30085 }
30086 DialectType::Snowflake => {
30087 // TO_JSON(x)
30088 Ok(Expression::Function(Box::new(Function::new(
30089 "TO_JSON".to_string(),
30090 args,
30091 ))))
30092 }
30093 _ => Ok(Expression::Function(Box::new(Function::new(
30094 "TO_JSON_STRING".to_string(),
30095 args,
30096 )))),
30097 }
30098 }
30099
30100 // SAFE_ADD(x, y) -> SafeAdd expression
30101 "SAFE_ADD" if args.len() == 2 => {
30102 let x = args.remove(0);
30103 let y = args.remove(0);
30104 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
30105 this: Box::new(x),
30106 expression: Box::new(y),
30107 })))
30108 }
30109
30110 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
30111 "SAFE_SUBTRACT" if args.len() == 2 => {
30112 let x = args.remove(0);
30113 let y = args.remove(0);
30114 Ok(Expression::SafeSubtract(Box::new(
30115 crate::expressions::SafeSubtract {
30116 this: Box::new(x),
30117 expression: Box::new(y),
30118 },
30119 )))
30120 }
30121
30122 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
30123 "SAFE_MULTIPLY" if args.len() == 2 => {
30124 let x = args.remove(0);
30125 let y = args.remove(0);
30126 Ok(Expression::SafeMultiply(Box::new(
30127 crate::expressions::SafeMultiply {
30128 this: Box::new(x),
30129 expression: Box::new(y),
30130 },
30131 )))
30132 }
30133
30134 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
30135 "REGEXP_CONTAINS" if args.len() == 2 => {
30136 let str_expr = args.remove(0);
30137 let pattern = args.remove(0);
30138 Ok(Expression::RegexpLike(Box::new(
30139 crate::expressions::RegexpFunc {
30140 this: str_expr,
30141 pattern,
30142 flags: None,
30143 },
30144 )))
30145 }
30146
30147 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
30148 "CONTAINS_SUBSTR" if args.len() == 2 => {
30149 let a = args.remove(0);
30150 let b = args.remove(0);
30151 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
30152 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
30153 Ok(Expression::Function(Box::new(Function::new(
30154 "CONTAINS".to_string(),
30155 vec![lower_a, lower_b],
30156 ))))
30157 }
30158
30159 // INT64(x) -> CAST(x AS BIGINT)
30160 "INT64" if args.len() == 1 => {
30161 let arg = args.remove(0);
30162 Ok(Expression::Cast(Box::new(Cast {
30163 this: arg,
30164 to: DataType::BigInt { length: None },
30165 trailing_comments: vec![],
30166 double_colon_syntax: false,
30167 format: None,
30168 default: None,
30169 inferred_type: None,
30170 })))
30171 }
30172
30173 // INSTR(str, substr) -> target-specific
30174 "INSTR" if args.len() >= 2 => {
30175 let str_expr = args.remove(0);
30176 let substr = args.remove(0);
30177 if matches!(target, DialectType::Snowflake) {
30178 // CHARINDEX(substr, str)
30179 Ok(Expression::Function(Box::new(Function::new(
30180 "CHARINDEX".to_string(),
30181 vec![substr, str_expr],
30182 ))))
30183 } else if matches!(target, DialectType::BigQuery) {
30184 // Keep as INSTR
30185 Ok(Expression::Function(Box::new(Function::new(
30186 "INSTR".to_string(),
30187 vec![str_expr, substr],
30188 ))))
30189 } else {
30190 // Default: keep as INSTR
30191 Ok(Expression::Function(Box::new(Function::new(
30192 "INSTR".to_string(),
30193 vec![str_expr, substr],
30194 ))))
30195 }
30196 }
30197
30198 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
30199 "DATE_TRUNC" if args.len() == 2 => {
30200 let expr = args.remove(0);
30201 let unit_expr = args.remove(0);
30202 let unit_str = get_unit_str(&unit_expr);
30203
30204 match target {
30205 DialectType::DuckDB
30206 | DialectType::Snowflake
30207 | DialectType::PostgreSQL
30208 | DialectType::Presto
30209 | DialectType::Trino
30210 | DialectType::Databricks
30211 | DialectType::Spark
30212 | DialectType::Redshift
30213 | DialectType::ClickHouse
30214 | DialectType::TSQL => {
30215 // Standard: DATE_TRUNC('UNIT', expr)
30216 Ok(Expression::Function(Box::new(Function::new(
30217 "DATE_TRUNC".to_string(),
30218 vec![Expression::Literal(Literal::String(unit_str)), expr],
30219 ))))
30220 }
30221 _ => {
30222 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
30223 Ok(Expression::Function(Box::new(Function::new(
30224 "DATE_TRUNC".to_string(),
30225 vec![expr, unit_expr],
30226 ))))
30227 }
30228 }
30229 }
30230
30231 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
30232 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
30233 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
30234 let ts = args.remove(0);
30235 let unit_expr = args.remove(0);
30236 let tz = if !args.is_empty() {
30237 Some(args.remove(0))
30238 } else {
30239 None
30240 };
30241 let unit_str = get_unit_str(&unit_expr);
30242
30243 match target {
30244 DialectType::DuckDB => {
30245 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
30246 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
30247 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
30248 let is_coarse = matches!(
30249 unit_str.as_str(),
30250 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
30251 );
30252 // For DATETIME_TRUNC, cast string args to TIMESTAMP
30253 let cast_ts = if name == "DATETIME_TRUNC" {
30254 match ts {
30255 Expression::Literal(Literal::String(ref _s)) => {
30256 Expression::Cast(Box::new(Cast {
30257 this: ts,
30258 to: DataType::Timestamp {
30259 precision: None,
30260 timezone: false,
30261 },
30262 trailing_comments: vec![],
30263 double_colon_syntax: false,
30264 format: None,
30265 default: None,
30266 inferred_type: None,
30267 }))
30268 }
30269 _ => Self::maybe_cast_ts_to_tz(ts, &name),
30270 }
30271 } else {
30272 Self::maybe_cast_ts_to_tz(ts, &name)
30273 };
30274
30275 if let Some(tz_arg) = tz {
30276 if is_coarse {
30277 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
30278 let at_tz = Expression::AtTimeZone(Box::new(
30279 crate::expressions::AtTimeZone {
30280 this: cast_ts,
30281 zone: tz_arg.clone(),
30282 },
30283 ));
30284 let date_trunc = Expression::Function(Box::new(Function::new(
30285 "DATE_TRUNC".to_string(),
30286 vec![Expression::Literal(Literal::String(unit_str)), at_tz],
30287 )));
30288 Ok(Expression::AtTimeZone(Box::new(
30289 crate::expressions::AtTimeZone {
30290 this: date_trunc,
30291 zone: tz_arg,
30292 },
30293 )))
30294 } else {
30295 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
30296 Ok(Expression::Function(Box::new(Function::new(
30297 "DATE_TRUNC".to_string(),
30298 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
30299 ))))
30300 }
30301 } else {
30302 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
30303 Ok(Expression::Function(Box::new(Function::new(
30304 "DATE_TRUNC".to_string(),
30305 vec![Expression::Literal(Literal::String(unit_str)), cast_ts],
30306 ))))
30307 }
30308 }
30309 DialectType::Databricks | DialectType::Spark => {
30310 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
30311 Ok(Expression::Function(Box::new(Function::new(
30312 "DATE_TRUNC".to_string(),
30313 vec![Expression::Literal(Literal::String(unit_str)), ts],
30314 ))))
30315 }
30316 _ => {
30317 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
30318 let unit = Expression::Literal(Literal::String(unit_str));
30319 let mut date_trunc_args = vec![unit, ts];
30320 if let Some(tz_arg) = tz {
30321 date_trunc_args.push(tz_arg);
30322 }
30323 Ok(Expression::Function(Box::new(Function::new(
30324 "TIMESTAMP_TRUNC".to_string(),
30325 date_trunc_args,
30326 ))))
30327 }
30328 }
30329 }
30330
30331 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
30332 "TIME" => {
30333 if args.len() == 3 {
30334 // TIME(h, m, s) constructor
30335 match target {
30336 DialectType::TSQL => {
30337 // TIMEFROMPARTS(h, m, s, 0, 0)
30338 args.push(Expression::number(0));
30339 args.push(Expression::number(0));
30340 Ok(Expression::Function(Box::new(Function::new(
30341 "TIMEFROMPARTS".to_string(),
30342 args,
30343 ))))
30344 }
30345 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
30346 "MAKETIME".to_string(),
30347 args,
30348 )))),
30349 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
30350 Function::new("MAKE_TIME".to_string(), args),
30351 ))),
30352 _ => Ok(Expression::Function(Box::new(Function::new(
30353 "TIME".to_string(),
30354 args,
30355 )))),
30356 }
30357 } else if args.len() == 1 {
30358 let arg = args.remove(0);
30359 if matches!(target, DialectType::Spark) {
30360 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
30361 Ok(Expression::Cast(Box::new(Cast {
30362 this: arg,
30363 to: DataType::Timestamp {
30364 timezone: false,
30365 precision: None,
30366 },
30367 trailing_comments: vec![],
30368 double_colon_syntax: false,
30369 format: None,
30370 default: None,
30371 inferred_type: None,
30372 })))
30373 } else {
30374 // Most targets: CAST(x AS TIME)
30375 Ok(Expression::Cast(Box::new(Cast {
30376 this: arg,
30377 to: DataType::Time {
30378 precision: None,
30379 timezone: false,
30380 },
30381 trailing_comments: vec![],
30382 double_colon_syntax: false,
30383 format: None,
30384 default: None,
30385 inferred_type: None,
30386 })))
30387 }
30388 } else if args.len() == 2 {
30389 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
30390 let expr = args.remove(0);
30391 let tz = args.remove(0);
30392 let cast_tstz = Expression::Cast(Box::new(Cast {
30393 this: expr,
30394 to: DataType::Timestamp {
30395 timezone: true,
30396 precision: None,
30397 },
30398 trailing_comments: vec![],
30399 double_colon_syntax: false,
30400 format: None,
30401 default: None,
30402 inferred_type: None,
30403 }));
30404 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
30405 this: cast_tstz,
30406 zone: tz,
30407 }));
30408 Ok(Expression::Cast(Box::new(Cast {
30409 this: at_tz,
30410 to: DataType::Time {
30411 precision: None,
30412 timezone: false,
30413 },
30414 trailing_comments: vec![],
30415 double_colon_syntax: false,
30416 format: None,
30417 default: None,
30418 inferred_type: None,
30419 })))
30420 } else {
30421 Ok(Expression::Function(Box::new(Function::new(
30422 "TIME".to_string(),
30423 args,
30424 ))))
30425 }
30426 }
30427
30428 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
30429 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
30430 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
30431 // DATETIME(y, m, d, h, min, s) -> target-specific
30432 "DATETIME" => {
30433 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
30434 if matches!(target, DialectType::BigQuery) {
30435 if args.len() == 2 {
30436 let has_time_literal =
30437 matches!(&args[1], Expression::Literal(Literal::Time(_)));
30438 if has_time_literal {
30439 let first = args.remove(0);
30440 let second = args.remove(0);
30441 let time_as_cast = match second {
30442 Expression::Literal(Literal::Time(s)) => {
30443 Expression::Cast(Box::new(Cast {
30444 this: Expression::Literal(Literal::String(s)),
30445 to: DataType::Time {
30446 precision: None,
30447 timezone: false,
30448 },
30449 trailing_comments: vec![],
30450 double_colon_syntax: false,
30451 format: None,
30452 default: None,
30453 inferred_type: None,
30454 }))
30455 }
30456 other => other,
30457 };
30458 return Ok(Expression::Function(Box::new(Function::new(
30459 "DATETIME".to_string(),
30460 vec![first, time_as_cast],
30461 ))));
30462 }
30463 }
30464 return Ok(Expression::Function(Box::new(Function::new(
30465 "DATETIME".to_string(),
30466 args,
30467 ))));
30468 }
30469
30470 if args.len() == 1 {
30471 let arg = args.remove(0);
30472 Ok(Expression::Cast(Box::new(Cast {
30473 this: arg,
30474 to: DataType::Timestamp {
30475 timezone: false,
30476 precision: None,
30477 },
30478 trailing_comments: vec![],
30479 double_colon_syntax: false,
30480 format: None,
30481 default: None,
30482 inferred_type: None,
30483 })))
30484 } else if args.len() == 2 {
30485 let first = args.remove(0);
30486 let second = args.remove(0);
30487 // Check if second arg is a TIME literal
30488 let is_time_literal = matches!(&second, Expression::Literal(Literal::Time(_)));
30489 if is_time_literal {
30490 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
30491 let cast_date = Expression::Cast(Box::new(Cast {
30492 this: first,
30493 to: DataType::Date,
30494 trailing_comments: vec![],
30495 double_colon_syntax: false,
30496 format: None,
30497 default: None,
30498 inferred_type: None,
30499 }));
30500 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
30501 let time_as_string = match second {
30502 Expression::Literal(Literal::Time(s)) => {
30503 Expression::Literal(Literal::String(s))
30504 }
30505 other => other,
30506 };
30507 let cast_time = Expression::Cast(Box::new(Cast {
30508 this: time_as_string,
30509 to: DataType::Time {
30510 precision: None,
30511 timezone: false,
30512 },
30513 trailing_comments: vec![],
30514 double_colon_syntax: false,
30515 format: None,
30516 default: None,
30517 inferred_type: None,
30518 }));
30519 let add_expr =
30520 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
30521 Ok(Expression::Cast(Box::new(Cast {
30522 this: add_expr,
30523 to: DataType::Timestamp {
30524 timezone: false,
30525 precision: None,
30526 },
30527 trailing_comments: vec![],
30528 double_colon_syntax: false,
30529 format: None,
30530 default: None,
30531 inferred_type: None,
30532 })))
30533 } else {
30534 // DATETIME('string', 'timezone')
30535 let cast_tstz = Expression::Cast(Box::new(Cast {
30536 this: first,
30537 to: DataType::Timestamp {
30538 timezone: true,
30539 precision: None,
30540 },
30541 trailing_comments: vec![],
30542 double_colon_syntax: false,
30543 format: None,
30544 default: None,
30545 inferred_type: None,
30546 }));
30547 let at_tz =
30548 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
30549 this: cast_tstz,
30550 zone: second,
30551 }));
30552 Ok(Expression::Cast(Box::new(Cast {
30553 this: at_tz,
30554 to: DataType::Timestamp {
30555 timezone: false,
30556 precision: None,
30557 },
30558 trailing_comments: vec![],
30559 double_colon_syntax: false,
30560 format: None,
30561 default: None,
30562 inferred_type: None,
30563 })))
30564 }
30565 } else if args.len() >= 3 {
30566 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
30567 // For other targets, use MAKE_TIMESTAMP or similar
30568 if matches!(target, DialectType::Snowflake) {
30569 Ok(Expression::Function(Box::new(Function::new(
30570 "TIMESTAMP_FROM_PARTS".to_string(),
30571 args,
30572 ))))
30573 } else {
30574 Ok(Expression::Function(Box::new(Function::new(
30575 "DATETIME".to_string(),
30576 args,
30577 ))))
30578 }
30579 } else {
30580 Ok(Expression::Function(Box::new(Function::new(
30581 "DATETIME".to_string(),
30582 args,
30583 ))))
30584 }
30585 }
30586
30587 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
30588 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
30589 "TIMESTAMP" => {
30590 if args.len() == 1 {
30591 let arg = args.remove(0);
30592 Ok(Expression::Cast(Box::new(Cast {
30593 this: arg,
30594 to: DataType::Timestamp {
30595 timezone: true,
30596 precision: None,
30597 },
30598 trailing_comments: vec![],
30599 double_colon_syntax: false,
30600 format: None,
30601 default: None,
30602 inferred_type: None,
30603 })))
30604 } else if args.len() == 2 {
30605 let arg = args.remove(0);
30606 let tz = args.remove(0);
30607 let cast_ts = Expression::Cast(Box::new(Cast {
30608 this: arg,
30609 to: DataType::Timestamp {
30610 timezone: false,
30611 precision: None,
30612 },
30613 trailing_comments: vec![],
30614 double_colon_syntax: false,
30615 format: None,
30616 default: None,
30617 inferred_type: None,
30618 }));
30619 if matches!(target, DialectType::Snowflake) {
30620 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
30621 Ok(Expression::Function(Box::new(Function::new(
30622 "CONVERT_TIMEZONE".to_string(),
30623 vec![tz, cast_ts],
30624 ))))
30625 } else {
30626 Ok(Expression::AtTimeZone(Box::new(
30627 crate::expressions::AtTimeZone {
30628 this: cast_ts,
30629 zone: tz,
30630 },
30631 )))
30632 }
30633 } else {
30634 Ok(Expression::Function(Box::new(Function::new(
30635 "TIMESTAMP".to_string(),
30636 args,
30637 ))))
30638 }
30639 }
30640
30641 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
30642 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
30643 "STRING" => {
30644 if args.len() == 1 {
30645 let arg = args.remove(0);
30646 let cast_type = match target {
30647 DialectType::DuckDB => DataType::Text,
30648 _ => DataType::VarChar {
30649 length: None,
30650 parenthesized_length: false,
30651 },
30652 };
30653 Ok(Expression::Cast(Box::new(Cast {
30654 this: arg,
30655 to: cast_type,
30656 trailing_comments: vec![],
30657 double_colon_syntax: false,
30658 format: None,
30659 default: None,
30660 inferred_type: None,
30661 })))
30662 } else if args.len() == 2 {
30663 let arg = args.remove(0);
30664 let tz = args.remove(0);
30665 let cast_type = match target {
30666 DialectType::DuckDB => DataType::Text,
30667 _ => DataType::VarChar {
30668 length: None,
30669 parenthesized_length: false,
30670 },
30671 };
30672 if matches!(target, DialectType::Snowflake) {
30673 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
30674 let convert_tz = Expression::Function(Box::new(Function::new(
30675 "CONVERT_TIMEZONE".to_string(),
30676 vec![
30677 Expression::Literal(Literal::String("UTC".to_string())),
30678 tz,
30679 arg,
30680 ],
30681 )));
30682 Ok(Expression::Cast(Box::new(Cast {
30683 this: convert_tz,
30684 to: cast_type,
30685 trailing_comments: vec![],
30686 double_colon_syntax: false,
30687 format: None,
30688 default: None,
30689 inferred_type: None,
30690 })))
30691 } else {
30692 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
30693 let cast_ts = Expression::Cast(Box::new(Cast {
30694 this: arg,
30695 to: DataType::Timestamp {
30696 timezone: false,
30697 precision: None,
30698 },
30699 trailing_comments: vec![],
30700 double_colon_syntax: false,
30701 format: None,
30702 default: None,
30703 inferred_type: None,
30704 }));
30705 let at_utc =
30706 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
30707 this: cast_ts,
30708 zone: Expression::Literal(Literal::String("UTC".to_string())),
30709 }));
30710 let at_tz =
30711 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
30712 this: at_utc,
30713 zone: tz,
30714 }));
30715 Ok(Expression::Cast(Box::new(Cast {
30716 this: at_tz,
30717 to: cast_type,
30718 trailing_comments: vec![],
30719 double_colon_syntax: false,
30720 format: None,
30721 default: None,
30722 inferred_type: None,
30723 })))
30724 }
30725 } else {
30726 Ok(Expression::Function(Box::new(Function::new(
30727 "STRING".to_string(),
30728 args,
30729 ))))
30730 }
30731 }
30732
30733 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
30734 "UNIX_SECONDS" if args.len() == 1 => {
30735 let ts = args.remove(0);
30736 match target {
30737 DialectType::DuckDB => {
30738 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
30739 let cast_ts = Self::ensure_cast_timestamptz(ts);
30740 let epoch = Expression::Function(Box::new(Function::new(
30741 "EPOCH".to_string(),
30742 vec![cast_ts],
30743 )));
30744 Ok(Expression::Cast(Box::new(Cast {
30745 this: epoch,
30746 to: DataType::BigInt { length: None },
30747 trailing_comments: vec![],
30748 double_colon_syntax: false,
30749 format: None,
30750 default: None,
30751 inferred_type: None,
30752 })))
30753 }
30754 DialectType::Snowflake => {
30755 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
30756 let epoch = Expression::Cast(Box::new(Cast {
30757 this: Expression::Literal(Literal::String(
30758 "1970-01-01 00:00:00+00".to_string(),
30759 )),
30760 to: DataType::Timestamp {
30761 timezone: true,
30762 precision: None,
30763 },
30764 trailing_comments: vec![],
30765 double_colon_syntax: false,
30766 format: None,
30767 default: None,
30768 inferred_type: None,
30769 }));
30770 Ok(Expression::TimestampDiff(Box::new(
30771 crate::expressions::TimestampDiff {
30772 this: Box::new(epoch),
30773 expression: Box::new(ts),
30774 unit: Some("SECONDS".to_string()),
30775 },
30776 )))
30777 }
30778 _ => Ok(Expression::Function(Box::new(Function::new(
30779 "UNIX_SECONDS".to_string(),
30780 vec![ts],
30781 )))),
30782 }
30783 }
30784
30785 "UNIX_MILLIS" if args.len() == 1 => {
30786 let ts = args.remove(0);
30787 match target {
30788 DialectType::DuckDB => {
30789 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
30790 let cast_ts = Self::ensure_cast_timestamptz(ts);
30791 Ok(Expression::Function(Box::new(Function::new(
30792 "EPOCH_MS".to_string(),
30793 vec![cast_ts],
30794 ))))
30795 }
30796 _ => Ok(Expression::Function(Box::new(Function::new(
30797 "UNIX_MILLIS".to_string(),
30798 vec![ts],
30799 )))),
30800 }
30801 }
30802
30803 "UNIX_MICROS" if args.len() == 1 => {
30804 let ts = args.remove(0);
30805 match target {
30806 DialectType::DuckDB => {
30807 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
30808 let cast_ts = Self::ensure_cast_timestamptz(ts);
30809 Ok(Expression::Function(Box::new(Function::new(
30810 "EPOCH_US".to_string(),
30811 vec![cast_ts],
30812 ))))
30813 }
30814 _ => Ok(Expression::Function(Box::new(Function::new(
30815 "UNIX_MICROS".to_string(),
30816 vec![ts],
30817 )))),
30818 }
30819 }
30820
30821 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
30822 "ARRAY_CONCAT" | "LIST_CONCAT" => {
30823 match target {
30824 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
30825 // CONCAT(arr1, arr2, ...)
30826 Ok(Expression::Function(Box::new(Function::new(
30827 "CONCAT".to_string(),
30828 args,
30829 ))))
30830 }
30831 DialectType::Presto | DialectType::Trino => {
30832 // CONCAT(arr1, arr2, ...)
30833 Ok(Expression::Function(Box::new(Function::new(
30834 "CONCAT".to_string(),
30835 args,
30836 ))))
30837 }
30838 DialectType::Snowflake => {
30839 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
30840 if args.len() == 1 {
30841 // ARRAY_CAT requires 2 args, add empty array as []
30842 let empty_arr = Expression::ArrayFunc(Box::new(
30843 crate::expressions::ArrayConstructor {
30844 expressions: vec![],
30845 bracket_notation: true,
30846 use_list_keyword: false,
30847 },
30848 ));
30849 let mut new_args = args;
30850 new_args.push(empty_arr);
30851 Ok(Expression::Function(Box::new(Function::new(
30852 "ARRAY_CAT".to_string(),
30853 new_args,
30854 ))))
30855 } else if args.is_empty() {
30856 Ok(Expression::Function(Box::new(Function::new(
30857 "ARRAY_CAT".to_string(),
30858 args,
30859 ))))
30860 } else {
30861 let mut it = args.into_iter().rev();
30862 let mut result = it.next().unwrap();
30863 for arr in it {
30864 result = Expression::Function(Box::new(Function::new(
30865 "ARRAY_CAT".to_string(),
30866 vec![arr, result],
30867 )));
30868 }
30869 Ok(result)
30870 }
30871 }
30872 DialectType::PostgreSQL => {
30873 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
30874 if args.len() <= 1 {
30875 Ok(Expression::Function(Box::new(Function::new(
30876 "ARRAY_CAT".to_string(),
30877 args,
30878 ))))
30879 } else {
30880 let mut it = args.into_iter().rev();
30881 let mut result = it.next().unwrap();
30882 for arr in it {
30883 result = Expression::Function(Box::new(Function::new(
30884 "ARRAY_CAT".to_string(),
30885 vec![arr, result],
30886 )));
30887 }
30888 Ok(result)
30889 }
30890 }
30891 DialectType::Redshift => {
30892 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
30893 if args.len() <= 2 {
30894 Ok(Expression::Function(Box::new(Function::new(
30895 "ARRAY_CONCAT".to_string(),
30896 args,
30897 ))))
30898 } else {
30899 let mut it = args.into_iter().rev();
30900 let mut result = it.next().unwrap();
30901 for arr in it {
30902 result = Expression::Function(Box::new(Function::new(
30903 "ARRAY_CONCAT".to_string(),
30904 vec![arr, result],
30905 )));
30906 }
30907 Ok(result)
30908 }
30909 }
30910 DialectType::DuckDB => {
30911 // LIST_CONCAT supports multiple args natively in DuckDB
30912 Ok(Expression::Function(Box::new(Function::new(
30913 "LIST_CONCAT".to_string(),
30914 args,
30915 ))))
30916 }
30917 _ => Ok(Expression::Function(Box::new(Function::new(
30918 "ARRAY_CONCAT".to_string(),
30919 args,
30920 )))),
30921 }
30922 }
30923
30924 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
30925 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
30926 let arg = args.remove(0);
30927 match target {
30928 DialectType::Snowflake => {
30929 let array_agg =
30930 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
30931 this: arg,
30932 distinct: false,
30933 filter: None,
30934 order_by: vec![],
30935 name: None,
30936 ignore_nulls: None,
30937 having_max: None,
30938 limit: None,
30939 inferred_type: None,
30940 }));
30941 Ok(Expression::Function(Box::new(Function::new(
30942 "ARRAY_FLATTEN".to_string(),
30943 vec![array_agg],
30944 ))))
30945 }
30946 _ => Ok(Expression::Function(Box::new(Function::new(
30947 "ARRAY_CONCAT_AGG".to_string(),
30948 vec![arg],
30949 )))),
30950 }
30951 }
30952
30953 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
30954 "MD5" if args.len() == 1 => {
30955 let arg = args.remove(0);
30956 match target {
30957 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
30958 // UNHEX(MD5(x))
30959 let md5 = Expression::Function(Box::new(Function::new(
30960 "MD5".to_string(),
30961 vec![arg],
30962 )));
30963 Ok(Expression::Function(Box::new(Function::new(
30964 "UNHEX".to_string(),
30965 vec![md5],
30966 ))))
30967 }
30968 DialectType::Snowflake => {
30969 // MD5_BINARY(x)
30970 Ok(Expression::Function(Box::new(Function::new(
30971 "MD5_BINARY".to_string(),
30972 vec![arg],
30973 ))))
30974 }
30975 _ => Ok(Expression::Function(Box::new(Function::new(
30976 "MD5".to_string(),
30977 vec![arg],
30978 )))),
30979 }
30980 }
30981
30982 "SHA1" if args.len() == 1 => {
30983 let arg = args.remove(0);
30984 match target {
30985 DialectType::DuckDB => {
30986 // UNHEX(SHA1(x))
30987 let sha1 = Expression::Function(Box::new(Function::new(
30988 "SHA1".to_string(),
30989 vec![arg],
30990 )));
30991 Ok(Expression::Function(Box::new(Function::new(
30992 "UNHEX".to_string(),
30993 vec![sha1],
30994 ))))
30995 }
30996 _ => Ok(Expression::Function(Box::new(Function::new(
30997 "SHA1".to_string(),
30998 vec![arg],
30999 )))),
31000 }
31001 }
31002
31003 "SHA256" if args.len() == 1 => {
31004 let arg = args.remove(0);
31005 match target {
31006 DialectType::DuckDB => {
31007 // UNHEX(SHA256(x))
31008 let sha = Expression::Function(Box::new(Function::new(
31009 "SHA256".to_string(),
31010 vec![arg],
31011 )));
31012 Ok(Expression::Function(Box::new(Function::new(
31013 "UNHEX".to_string(),
31014 vec![sha],
31015 ))))
31016 }
31017 DialectType::Snowflake => {
31018 // SHA2_BINARY(x, 256)
31019 Ok(Expression::Function(Box::new(Function::new(
31020 "SHA2_BINARY".to_string(),
31021 vec![arg, Expression::number(256)],
31022 ))))
31023 }
31024 DialectType::Redshift | DialectType::Spark => {
31025 // SHA2(x, 256)
31026 Ok(Expression::Function(Box::new(Function::new(
31027 "SHA2".to_string(),
31028 vec![arg, Expression::number(256)],
31029 ))))
31030 }
31031 _ => Ok(Expression::Function(Box::new(Function::new(
31032 "SHA256".to_string(),
31033 vec![arg],
31034 )))),
31035 }
31036 }
31037
31038 "SHA512" if args.len() == 1 => {
31039 let arg = args.remove(0);
31040 match target {
31041 DialectType::Snowflake => {
31042 // SHA2_BINARY(x, 512)
31043 Ok(Expression::Function(Box::new(Function::new(
31044 "SHA2_BINARY".to_string(),
31045 vec![arg, Expression::number(512)],
31046 ))))
31047 }
31048 DialectType::Redshift | DialectType::Spark => {
31049 // SHA2(x, 512)
31050 Ok(Expression::Function(Box::new(Function::new(
31051 "SHA2".to_string(),
31052 vec![arg, Expression::number(512)],
31053 ))))
31054 }
31055 _ => Ok(Expression::Function(Box::new(Function::new(
31056 "SHA512".to_string(),
31057 vec![arg],
31058 )))),
31059 }
31060 }
31061
31062 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
31063 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
31064 let str_expr = args.remove(0);
31065 let pattern = args.remove(0);
31066
31067 // Check if pattern contains capturing groups (parentheses)
31068 let has_groups = match &pattern {
31069 Expression::Literal(Literal::String(s)) => s.contains('(') && s.contains(')'),
31070 _ => false,
31071 };
31072
31073 match target {
31074 DialectType::DuckDB => {
31075 let group = if has_groups {
31076 Expression::number(1)
31077 } else {
31078 Expression::number(0)
31079 };
31080 Ok(Expression::Function(Box::new(Function::new(
31081 "REGEXP_EXTRACT_ALL".to_string(),
31082 vec![str_expr, pattern, group],
31083 ))))
31084 }
31085 DialectType::Spark | DialectType::Databricks => {
31086 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
31087 if has_groups {
31088 Ok(Expression::Function(Box::new(Function::new(
31089 "REGEXP_EXTRACT_ALL".to_string(),
31090 vec![str_expr, pattern],
31091 ))))
31092 } else {
31093 Ok(Expression::Function(Box::new(Function::new(
31094 "REGEXP_EXTRACT_ALL".to_string(),
31095 vec![str_expr, pattern, Expression::number(0)],
31096 ))))
31097 }
31098 }
31099 DialectType::Presto | DialectType::Trino => {
31100 if has_groups {
31101 Ok(Expression::Function(Box::new(Function::new(
31102 "REGEXP_EXTRACT_ALL".to_string(),
31103 vec![str_expr, pattern, Expression::number(1)],
31104 ))))
31105 } else {
31106 Ok(Expression::Function(Box::new(Function::new(
31107 "REGEXP_EXTRACT_ALL".to_string(),
31108 vec![str_expr, pattern],
31109 ))))
31110 }
31111 }
31112 DialectType::Snowflake => {
31113 if has_groups {
31114 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
31115 Ok(Expression::Function(Box::new(Function::new(
31116 "REGEXP_EXTRACT_ALL".to_string(),
31117 vec![
31118 str_expr,
31119 pattern,
31120 Expression::number(1),
31121 Expression::number(1),
31122 Expression::Literal(Literal::String("c".to_string())),
31123 Expression::number(1),
31124 ],
31125 ))))
31126 } else {
31127 Ok(Expression::Function(Box::new(Function::new(
31128 "REGEXP_EXTRACT_ALL".to_string(),
31129 vec![str_expr, pattern],
31130 ))))
31131 }
31132 }
31133 _ => Ok(Expression::Function(Box::new(Function::new(
31134 "REGEXP_EXTRACT_ALL".to_string(),
31135 vec![str_expr, pattern],
31136 )))),
31137 }
31138 }
31139
31140 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
31141 "MOD" if args.len() == 2 => {
31142 match target {
31143 DialectType::PostgreSQL
31144 | DialectType::DuckDB
31145 | DialectType::Presto
31146 | DialectType::Trino
31147 | DialectType::Athena
31148 | DialectType::Snowflake => {
31149 let x = args.remove(0);
31150 let y = args.remove(0);
31151 // Wrap complex expressions in parens to preserve precedence
31152 let needs_paren = |e: &Expression| {
31153 matches!(
31154 e,
31155 Expression::Add(_)
31156 | Expression::Sub(_)
31157 | Expression::Mul(_)
31158 | Expression::Div(_)
31159 )
31160 };
31161 let x = if needs_paren(&x) {
31162 Expression::Paren(Box::new(crate::expressions::Paren {
31163 this: x,
31164 trailing_comments: vec![],
31165 }))
31166 } else {
31167 x
31168 };
31169 let y = if needs_paren(&y) {
31170 Expression::Paren(Box::new(crate::expressions::Paren {
31171 this: y,
31172 trailing_comments: vec![],
31173 }))
31174 } else {
31175 y
31176 };
31177 Ok(Expression::Mod(Box::new(
31178 crate::expressions::BinaryOp::new(x, y),
31179 )))
31180 }
31181 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
31182 // Hive/Spark: a % b
31183 let x = args.remove(0);
31184 let y = args.remove(0);
31185 let needs_paren = |e: &Expression| {
31186 matches!(
31187 e,
31188 Expression::Add(_)
31189 | Expression::Sub(_)
31190 | Expression::Mul(_)
31191 | Expression::Div(_)
31192 )
31193 };
31194 let x = if needs_paren(&x) {
31195 Expression::Paren(Box::new(crate::expressions::Paren {
31196 this: x,
31197 trailing_comments: vec![],
31198 }))
31199 } else {
31200 x
31201 };
31202 let y = if needs_paren(&y) {
31203 Expression::Paren(Box::new(crate::expressions::Paren {
31204 this: y,
31205 trailing_comments: vec![],
31206 }))
31207 } else {
31208 y
31209 };
31210 Ok(Expression::Mod(Box::new(
31211 crate::expressions::BinaryOp::new(x, y),
31212 )))
31213 }
31214 _ => Ok(Expression::Function(Box::new(Function::new(
31215 "MOD".to_string(),
31216 args,
31217 )))),
31218 }
31219 }
31220
31221 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
31222 "ARRAY_FILTER" if args.len() == 2 => {
31223 let name = match target {
31224 DialectType::DuckDB => "LIST_FILTER",
31225 DialectType::StarRocks => "ARRAY_FILTER",
31226 _ => "FILTER",
31227 };
31228 Ok(Expression::Function(Box::new(Function::new(
31229 name.to_string(),
31230 args,
31231 ))))
31232 }
31233 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
31234 "FILTER" if args.len() == 2 => {
31235 let name = match target {
31236 DialectType::DuckDB => "LIST_FILTER",
31237 DialectType::StarRocks => "ARRAY_FILTER",
31238 _ => "FILTER",
31239 };
31240 Ok(Expression::Function(Box::new(Function::new(
31241 name.to_string(),
31242 args,
31243 ))))
31244 }
31245 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
31246 "REDUCE" if args.len() >= 3 => {
31247 let name = match target {
31248 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
31249 _ => "REDUCE",
31250 };
31251 Ok(Expression::Function(Box::new(Function::new(
31252 name.to_string(),
31253 args,
31254 ))))
31255 }
31256 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
31257 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
31258 Function::new("ARRAY_REVERSE".to_string(), args),
31259 ))),
31260
31261 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
31262 "CONCAT" if args.len() > 2 => match target {
31263 DialectType::DuckDB => {
31264 let mut it = args.into_iter();
31265 let mut result = it.next().unwrap();
31266 for arg in it {
31267 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
31268 this: Box::new(result),
31269 expression: Box::new(arg),
31270 safe: None,
31271 }));
31272 }
31273 Ok(result)
31274 }
31275 _ => Ok(Expression::Function(Box::new(Function::new(
31276 "CONCAT".to_string(),
31277 args,
31278 )))),
31279 },
31280
31281 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
31282 "GENERATE_DATE_ARRAY" => {
31283 if matches!(target, DialectType::BigQuery) {
31284 // BQ->BQ: add default interval if not present
31285 if args.len() == 2 {
31286 let start = args.remove(0);
31287 let end = args.remove(0);
31288 let default_interval =
31289 Expression::Interval(Box::new(crate::expressions::Interval {
31290 this: Some(Expression::Literal(Literal::String("1".to_string()))),
31291 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31292 unit: crate::expressions::IntervalUnit::Day,
31293 use_plural: false,
31294 }),
31295 }));
31296 Ok(Expression::Function(Box::new(Function::new(
31297 "GENERATE_DATE_ARRAY".to_string(),
31298 vec![start, end, default_interval],
31299 ))))
31300 } else {
31301 Ok(Expression::Function(Box::new(Function::new(
31302 "GENERATE_DATE_ARRAY".to_string(),
31303 args,
31304 ))))
31305 }
31306 } else if matches!(target, DialectType::DuckDB) {
31307 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
31308 let start = args.get(0).cloned();
31309 let end = args.get(1).cloned();
31310 let step = args.get(2).cloned().or_else(|| {
31311 Some(Expression::Interval(Box::new(
31312 crate::expressions::Interval {
31313 this: Some(Expression::Literal(Literal::String("1".to_string()))),
31314 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31315 unit: crate::expressions::IntervalUnit::Day,
31316 use_plural: false,
31317 }),
31318 },
31319 )))
31320 });
31321
31322 // Wrap start/end in CAST(... AS DATE) only for string literals
31323 let maybe_cast_date = |expr: Expression| -> Expression {
31324 if matches!(&expr, Expression::Literal(Literal::String(_))) {
31325 Expression::Cast(Box::new(Cast {
31326 this: expr,
31327 to: DataType::Date,
31328 trailing_comments: vec![],
31329 double_colon_syntax: false,
31330 format: None,
31331 default: None,
31332 inferred_type: None,
31333 }))
31334 } else {
31335 expr
31336 }
31337 };
31338 let cast_start = start.map(maybe_cast_date);
31339 let cast_end = end.map(maybe_cast_date);
31340
31341 let gen_series =
31342 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
31343 start: cast_start.map(Box::new),
31344 end: cast_end.map(Box::new),
31345 step: step.map(Box::new),
31346 is_end_exclusive: None,
31347 }));
31348
31349 // Wrap in CAST(... AS DATE[])
31350 Ok(Expression::Cast(Box::new(Cast {
31351 this: gen_series,
31352 to: DataType::Array {
31353 element_type: Box::new(DataType::Date),
31354 dimension: None,
31355 },
31356 trailing_comments: vec![],
31357 double_colon_syntax: false,
31358 format: None,
31359 default: None,
31360 inferred_type: None,
31361 })))
31362 } else if matches!(target, DialectType::Snowflake) {
31363 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
31364 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
31365 if args.len() == 2 {
31366 let start = args.remove(0);
31367 let end = args.remove(0);
31368 let default_interval =
31369 Expression::Interval(Box::new(crate::expressions::Interval {
31370 this: Some(Expression::Literal(Literal::String("1".to_string()))),
31371 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31372 unit: crate::expressions::IntervalUnit::Day,
31373 use_plural: false,
31374 }),
31375 }));
31376 Ok(Expression::Function(Box::new(Function::new(
31377 "GENERATE_DATE_ARRAY".to_string(),
31378 vec![start, end, default_interval],
31379 ))))
31380 } else {
31381 Ok(Expression::Function(Box::new(Function::new(
31382 "GENERATE_DATE_ARRAY".to_string(),
31383 args,
31384 ))))
31385 }
31386 } else {
31387 // Convert to GenerateSeries for other targets
31388 let start = args.get(0).cloned();
31389 let end = args.get(1).cloned();
31390 let step = args.get(2).cloned().or_else(|| {
31391 Some(Expression::Interval(Box::new(
31392 crate::expressions::Interval {
31393 this: Some(Expression::Literal(Literal::String("1".to_string()))),
31394 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31395 unit: crate::expressions::IntervalUnit::Day,
31396 use_plural: false,
31397 }),
31398 },
31399 )))
31400 });
31401 Ok(Expression::GenerateSeries(Box::new(
31402 crate::expressions::GenerateSeries {
31403 start: start.map(Box::new),
31404 end: end.map(Box::new),
31405 step: step.map(Box::new),
31406 is_end_exclusive: None,
31407 },
31408 )))
31409 }
31410 }
31411
31412 // PARSE_DATE(format, str) -> target-specific
31413 "PARSE_DATE" if args.len() == 2 => {
31414 let format = args.remove(0);
31415 let str_expr = args.remove(0);
31416 match target {
31417 DialectType::DuckDB => {
31418 // CAST(STRPTIME(str, duck_format) AS DATE)
31419 let duck_format = Self::bq_format_to_duckdb(&format);
31420 let strptime = Expression::Function(Box::new(Function::new(
31421 "STRPTIME".to_string(),
31422 vec![str_expr, duck_format],
31423 )));
31424 Ok(Expression::Cast(Box::new(Cast {
31425 this: strptime,
31426 to: DataType::Date,
31427 trailing_comments: vec![],
31428 double_colon_syntax: false,
31429 format: None,
31430 default: None,
31431 inferred_type: None,
31432 })))
31433 }
31434 DialectType::Snowflake => {
31435 // _POLYGLOT_DATE(str, snowflake_format)
31436 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
31437 let sf_format = Self::bq_format_to_snowflake(&format);
31438 Ok(Expression::Function(Box::new(Function::new(
31439 "_POLYGLOT_DATE".to_string(),
31440 vec![str_expr, sf_format],
31441 ))))
31442 }
31443 _ => Ok(Expression::Function(Box::new(Function::new(
31444 "PARSE_DATE".to_string(),
31445 vec![format, str_expr],
31446 )))),
31447 }
31448 }
31449
31450 // PARSE_TIMESTAMP(format, str) -> target-specific
31451 "PARSE_TIMESTAMP" if args.len() >= 2 => {
31452 let format = args.remove(0);
31453 let str_expr = args.remove(0);
31454 let tz = if !args.is_empty() {
31455 Some(args.remove(0))
31456 } else {
31457 None
31458 };
31459 match target {
31460 DialectType::DuckDB => {
31461 let duck_format = Self::bq_format_to_duckdb(&format);
31462 let strptime = Expression::Function(Box::new(Function::new(
31463 "STRPTIME".to_string(),
31464 vec![str_expr, duck_format],
31465 )));
31466 Ok(strptime)
31467 }
31468 _ => {
31469 let mut result_args = vec![format, str_expr];
31470 if let Some(tz_arg) = tz {
31471 result_args.push(tz_arg);
31472 }
31473 Ok(Expression::Function(Box::new(Function::new(
31474 "PARSE_TIMESTAMP".to_string(),
31475 result_args,
31476 ))))
31477 }
31478 }
31479 }
31480
31481 // FORMAT_DATE(format, date) -> target-specific
31482 "FORMAT_DATE" if args.len() == 2 => {
31483 let format = args.remove(0);
31484 let date_expr = args.remove(0);
31485 match target {
31486 DialectType::DuckDB => {
31487 // STRFTIME(CAST(date AS DATE), format)
31488 let cast_date = Expression::Cast(Box::new(Cast {
31489 this: date_expr,
31490 to: DataType::Date,
31491 trailing_comments: vec![],
31492 double_colon_syntax: false,
31493 format: None,
31494 default: None,
31495 inferred_type: None,
31496 }));
31497 Ok(Expression::Function(Box::new(Function::new(
31498 "STRFTIME".to_string(),
31499 vec![cast_date, format],
31500 ))))
31501 }
31502 _ => Ok(Expression::Function(Box::new(Function::new(
31503 "FORMAT_DATE".to_string(),
31504 vec![format, date_expr],
31505 )))),
31506 }
31507 }
31508
31509 // FORMAT_DATETIME(format, datetime) -> target-specific
31510 "FORMAT_DATETIME" if args.len() == 2 => {
31511 let format = args.remove(0);
31512 let dt_expr = args.remove(0);
31513
31514 if matches!(target, DialectType::BigQuery) {
31515 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
31516 let norm_format = Self::bq_format_normalize_bq(&format);
31517 // Also strip DATETIME keyword from typed literals
31518 let norm_dt = match dt_expr {
31519 Expression::Literal(Literal::Timestamp(s)) => {
31520 Expression::Cast(Box::new(Cast {
31521 this: Expression::Literal(Literal::String(s)),
31522 to: DataType::Custom {
31523 name: "DATETIME".to_string(),
31524 },
31525 trailing_comments: vec![],
31526 double_colon_syntax: false,
31527 format: None,
31528 default: None,
31529 inferred_type: None,
31530 }))
31531 }
31532 other => other,
31533 };
31534 return Ok(Expression::Function(Box::new(Function::new(
31535 "FORMAT_DATETIME".to_string(),
31536 vec![norm_format, norm_dt],
31537 ))));
31538 }
31539
31540 match target {
31541 DialectType::DuckDB => {
31542 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
31543 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
31544 let duck_format = Self::bq_format_to_duckdb(&format);
31545 Ok(Expression::Function(Box::new(Function::new(
31546 "STRFTIME".to_string(),
31547 vec![cast_dt, duck_format],
31548 ))))
31549 }
31550 _ => Ok(Expression::Function(Box::new(Function::new(
31551 "FORMAT_DATETIME".to_string(),
31552 vec![format, dt_expr],
31553 )))),
31554 }
31555 }
31556
31557 // FORMAT_TIMESTAMP(format, ts) -> target-specific
31558 "FORMAT_TIMESTAMP" if args.len() == 2 => {
31559 let format = args.remove(0);
31560 let ts_expr = args.remove(0);
31561 match target {
31562 DialectType::DuckDB => {
31563 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
31564 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
31565 let cast_ts = Expression::Cast(Box::new(Cast {
31566 this: cast_tstz,
31567 to: DataType::Timestamp {
31568 timezone: false,
31569 precision: None,
31570 },
31571 trailing_comments: vec![],
31572 double_colon_syntax: false,
31573 format: None,
31574 default: None,
31575 inferred_type: None,
31576 }));
31577 Ok(Expression::Function(Box::new(Function::new(
31578 "STRFTIME".to_string(),
31579 vec![cast_ts, format],
31580 ))))
31581 }
31582 DialectType::Snowflake => {
31583 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
31584 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
31585 let cast_ts = Expression::Cast(Box::new(Cast {
31586 this: cast_tstz,
31587 to: DataType::Timestamp {
31588 timezone: false,
31589 precision: None,
31590 },
31591 trailing_comments: vec![],
31592 double_colon_syntax: false,
31593 format: None,
31594 default: None,
31595 inferred_type: None,
31596 }));
31597 let sf_format = Self::bq_format_to_snowflake(&format);
31598 Ok(Expression::Function(Box::new(Function::new(
31599 "TO_CHAR".to_string(),
31600 vec![cast_ts, sf_format],
31601 ))))
31602 }
31603 _ => Ok(Expression::Function(Box::new(Function::new(
31604 "FORMAT_TIMESTAMP".to_string(),
31605 vec![format, ts_expr],
31606 )))),
31607 }
31608 }
31609
31610 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
31611 "UNIX_DATE" if args.len() == 1 => {
31612 let date = args.remove(0);
31613 match target {
31614 DialectType::DuckDB => {
31615 let epoch = Expression::Cast(Box::new(Cast {
31616 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
31617 to: DataType::Date,
31618 trailing_comments: vec![],
31619 double_colon_syntax: false,
31620 format: None,
31621 default: None,
31622 inferred_type: None,
31623 }));
31624 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
31625 // Need to convert DATE literal to CAST
31626 let norm_date = Self::date_literal_to_cast(date);
31627 Ok(Expression::Function(Box::new(Function::new(
31628 "DATE_DIFF".to_string(),
31629 vec![
31630 Expression::Literal(Literal::String("DAY".to_string())),
31631 epoch,
31632 norm_date,
31633 ],
31634 ))))
31635 }
31636 _ => Ok(Expression::Function(Box::new(Function::new(
31637 "UNIX_DATE".to_string(),
31638 vec![date],
31639 )))),
31640 }
31641 }
31642
31643 // UNIX_SECONDS(ts) -> target-specific
31644 "UNIX_SECONDS" if args.len() == 1 => {
31645 let ts = args.remove(0);
31646 match target {
31647 DialectType::DuckDB => {
31648 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
31649 let norm_ts = Self::ts_literal_to_cast_tz(ts);
31650 let epoch = Expression::Function(Box::new(Function::new(
31651 "EPOCH".to_string(),
31652 vec![norm_ts],
31653 )));
31654 Ok(Expression::Cast(Box::new(Cast {
31655 this: epoch,
31656 to: DataType::BigInt { length: None },
31657 trailing_comments: vec![],
31658 double_colon_syntax: false,
31659 format: None,
31660 default: None,
31661 inferred_type: None,
31662 })))
31663 }
31664 DialectType::Snowflake => {
31665 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
31666 let epoch = Expression::Cast(Box::new(Cast {
31667 this: Expression::Literal(Literal::String(
31668 "1970-01-01 00:00:00+00".to_string(),
31669 )),
31670 to: DataType::Timestamp {
31671 timezone: true,
31672 precision: None,
31673 },
31674 trailing_comments: vec![],
31675 double_colon_syntax: false,
31676 format: None,
31677 default: None,
31678 inferred_type: None,
31679 }));
31680 Ok(Expression::Function(Box::new(Function::new(
31681 "TIMESTAMPDIFF".to_string(),
31682 vec![
31683 Expression::Identifier(Identifier::new("SECONDS".to_string())),
31684 epoch,
31685 ts,
31686 ],
31687 ))))
31688 }
31689 _ => Ok(Expression::Function(Box::new(Function::new(
31690 "UNIX_SECONDS".to_string(),
31691 vec![ts],
31692 )))),
31693 }
31694 }
31695
31696 // UNIX_MILLIS(ts) -> target-specific
31697 "UNIX_MILLIS" if args.len() == 1 => {
31698 let ts = args.remove(0);
31699 match target {
31700 DialectType::DuckDB => {
31701 let norm_ts = Self::ts_literal_to_cast_tz(ts);
31702 Ok(Expression::Function(Box::new(Function::new(
31703 "EPOCH_MS".to_string(),
31704 vec![norm_ts],
31705 ))))
31706 }
31707 _ => Ok(Expression::Function(Box::new(Function::new(
31708 "UNIX_MILLIS".to_string(),
31709 vec![ts],
31710 )))),
31711 }
31712 }
31713
31714 // UNIX_MICROS(ts) -> target-specific
31715 "UNIX_MICROS" if args.len() == 1 => {
31716 let ts = args.remove(0);
31717 match target {
31718 DialectType::DuckDB => {
31719 let norm_ts = Self::ts_literal_to_cast_tz(ts);
31720 Ok(Expression::Function(Box::new(Function::new(
31721 "EPOCH_US".to_string(),
31722 vec![norm_ts],
31723 ))))
31724 }
31725 _ => Ok(Expression::Function(Box::new(Function::new(
31726 "UNIX_MICROS".to_string(),
31727 vec![ts],
31728 )))),
31729 }
31730 }
31731
31732 // INSTR(str, substr) -> target-specific
31733 "INSTR" => {
31734 if matches!(target, DialectType::BigQuery) {
31735 // BQ->BQ: keep as INSTR
31736 Ok(Expression::Function(Box::new(Function::new(
31737 "INSTR".to_string(),
31738 args,
31739 ))))
31740 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
31741 // Snowflake: CHARINDEX(substr, str) - swap args
31742 let str_expr = args.remove(0);
31743 let substr = args.remove(0);
31744 Ok(Expression::Function(Box::new(Function::new(
31745 "CHARINDEX".to_string(),
31746 vec![substr, str_expr],
31747 ))))
31748 } else {
31749 // Keep as INSTR for other targets
31750 Ok(Expression::Function(Box::new(Function::new(
31751 "INSTR".to_string(),
31752 args,
31753 ))))
31754 }
31755 }
31756
31757 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
31758 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
31759 if matches!(target, DialectType::BigQuery) {
31760 // BQ->BQ: always output with parens (function form), keep any timezone arg
31761 Ok(Expression::Function(Box::new(Function::new(name, args))))
31762 } else if name == "CURRENT_DATE" && args.len() == 1 {
31763 // CURRENT_DATE('UTC') - has timezone arg
31764 let tz_arg = args.remove(0);
31765 match target {
31766 DialectType::DuckDB => {
31767 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
31768 let ct = Expression::CurrentTimestamp(
31769 crate::expressions::CurrentTimestamp {
31770 precision: None,
31771 sysdate: false,
31772 },
31773 );
31774 let at_tz =
31775 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
31776 this: ct,
31777 zone: tz_arg,
31778 }));
31779 Ok(Expression::Cast(Box::new(Cast {
31780 this: at_tz,
31781 to: DataType::Date,
31782 trailing_comments: vec![],
31783 double_colon_syntax: false,
31784 format: None,
31785 default: None,
31786 inferred_type: None,
31787 })))
31788 }
31789 DialectType::Snowflake => {
31790 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
31791 let ct = Expression::Function(Box::new(Function::new(
31792 "CURRENT_TIMESTAMP".to_string(),
31793 vec![],
31794 )));
31795 let convert = Expression::Function(Box::new(Function::new(
31796 "CONVERT_TIMEZONE".to_string(),
31797 vec![tz_arg, ct],
31798 )));
31799 Ok(Expression::Cast(Box::new(Cast {
31800 this: convert,
31801 to: DataType::Date,
31802 trailing_comments: vec![],
31803 double_colon_syntax: false,
31804 format: None,
31805 default: None,
31806 inferred_type: None,
31807 })))
31808 }
31809 _ => {
31810 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
31811 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
31812 Ok(Expression::AtTimeZone(Box::new(
31813 crate::expressions::AtTimeZone {
31814 this: cd,
31815 zone: tz_arg,
31816 },
31817 )))
31818 }
31819 }
31820 } else if (name == "CURRENT_TIMESTAMP"
31821 || name == "CURRENT_TIME"
31822 || name == "CURRENT_DATE")
31823 && args.is_empty()
31824 && matches!(
31825 target,
31826 DialectType::PostgreSQL
31827 | DialectType::DuckDB
31828 | DialectType::Presto
31829 | DialectType::Trino
31830 )
31831 {
31832 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
31833 if name == "CURRENT_TIMESTAMP" {
31834 Ok(Expression::CurrentTimestamp(
31835 crate::expressions::CurrentTimestamp {
31836 precision: None,
31837 sysdate: false,
31838 },
31839 ))
31840 } else if name == "CURRENT_DATE" {
31841 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
31842 } else {
31843 // CURRENT_TIME
31844 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
31845 precision: None,
31846 }))
31847 }
31848 } else {
31849 // All other targets: keep as function (with parens)
31850 Ok(Expression::Function(Box::new(Function::new(name, args))))
31851 }
31852 }
31853
31854 // JSON_QUERY(json, path) -> target-specific
31855 "JSON_QUERY" if args.len() == 2 => {
31856 match target {
31857 DialectType::DuckDB | DialectType::SQLite => {
31858 // json -> path syntax
31859 let json_expr = args.remove(0);
31860 let path = args.remove(0);
31861 Ok(Expression::JsonExtract(Box::new(
31862 crate::expressions::JsonExtractFunc {
31863 this: json_expr,
31864 path,
31865 returning: None,
31866 arrow_syntax: true,
31867 hash_arrow_syntax: false,
31868 wrapper_option: None,
31869 quotes_option: None,
31870 on_scalar_string: false,
31871 on_error: None,
31872 },
31873 )))
31874 }
31875 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
31876 Ok(Expression::Function(Box::new(Function::new(
31877 "GET_JSON_OBJECT".to_string(),
31878 args,
31879 ))))
31880 }
31881 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
31882 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
31883 )),
31884 _ => Ok(Expression::Function(Box::new(Function::new(
31885 "JSON_QUERY".to_string(),
31886 args,
31887 )))),
31888 }
31889 }
31890
31891 // JSON_VALUE_ARRAY(json, path) -> target-specific
31892 "JSON_VALUE_ARRAY" if args.len() == 2 => {
31893 match target {
31894 DialectType::DuckDB => {
31895 // CAST(json -> path AS TEXT[])
31896 let json_expr = args.remove(0);
31897 let path = args.remove(0);
31898 let arrow = Expression::JsonExtract(Box::new(
31899 crate::expressions::JsonExtractFunc {
31900 this: json_expr,
31901 path,
31902 returning: None,
31903 arrow_syntax: true,
31904 hash_arrow_syntax: false,
31905 wrapper_option: None,
31906 quotes_option: None,
31907 on_scalar_string: false,
31908 on_error: None,
31909 },
31910 ));
31911 Ok(Expression::Cast(Box::new(Cast {
31912 this: arrow,
31913 to: DataType::Array {
31914 element_type: Box::new(DataType::Text),
31915 dimension: None,
31916 },
31917 trailing_comments: vec![],
31918 double_colon_syntax: false,
31919 format: None,
31920 default: None,
31921 inferred_type: None,
31922 })))
31923 }
31924 DialectType::Snowflake => {
31925 let json_expr = args.remove(0);
31926 let path_expr = args.remove(0);
31927 // Convert JSON path from $.path to just path
31928 let sf_path = if let Expression::Literal(Literal::String(ref s)) = path_expr
31929 {
31930 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
31931 Expression::Literal(Literal::String(trimmed.to_string()))
31932 } else {
31933 path_expr
31934 };
31935 let parse_json = Expression::Function(Box::new(Function::new(
31936 "PARSE_JSON".to_string(),
31937 vec![json_expr],
31938 )));
31939 let get_path = Expression::Function(Box::new(Function::new(
31940 "GET_PATH".to_string(),
31941 vec![parse_json, sf_path],
31942 )));
31943 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
31944 let cast_expr = Expression::Cast(Box::new(Cast {
31945 this: Expression::Identifier(Identifier::new("x")),
31946 to: DataType::VarChar {
31947 length: None,
31948 parenthesized_length: false,
31949 },
31950 trailing_comments: vec![],
31951 double_colon_syntax: false,
31952 format: None,
31953 default: None,
31954 inferred_type: None,
31955 }));
31956 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
31957 parameters: vec![Identifier::new("x")],
31958 body: cast_expr,
31959 colon: false,
31960 parameter_types: vec![],
31961 }));
31962 Ok(Expression::Function(Box::new(Function::new(
31963 "TRANSFORM".to_string(),
31964 vec![get_path, lambda],
31965 ))))
31966 }
31967 _ => Ok(Expression::Function(Box::new(Function::new(
31968 "JSON_VALUE_ARRAY".to_string(),
31969 args,
31970 )))),
31971 }
31972 }
31973
31974 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
31975 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
31976 // This is different from Hive/Spark where 3rd arg is "group_index"
31977 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
31978 match target {
31979 DialectType::DuckDB
31980 | DialectType::Presto
31981 | DialectType::Trino
31982 | DialectType::Athena => {
31983 if args.len() == 2 {
31984 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
31985 args.push(Expression::number(1));
31986 Ok(Expression::Function(Box::new(Function::new(
31987 "REGEXP_EXTRACT".to_string(),
31988 args,
31989 ))))
31990 } else if args.len() == 3 {
31991 let val = args.remove(0);
31992 let regex = args.remove(0);
31993 let position = args.remove(0);
31994 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
31995 if is_pos_1 {
31996 Ok(Expression::Function(Box::new(Function::new(
31997 "REGEXP_EXTRACT".to_string(),
31998 vec![val, regex, Expression::number(1)],
31999 ))))
32000 } else {
32001 let substring_expr = Expression::Function(Box::new(Function::new(
32002 "SUBSTRING".to_string(),
32003 vec![val, position],
32004 )));
32005 let nullif_expr = Expression::Function(Box::new(Function::new(
32006 "NULLIF".to_string(),
32007 vec![
32008 substring_expr,
32009 Expression::Literal(Literal::String(String::new())),
32010 ],
32011 )));
32012 Ok(Expression::Function(Box::new(Function::new(
32013 "REGEXP_EXTRACT".to_string(),
32014 vec![nullif_expr, regex, Expression::number(1)],
32015 ))))
32016 }
32017 } else if args.len() == 4 {
32018 let val = args.remove(0);
32019 let regex = args.remove(0);
32020 let position = args.remove(0);
32021 let occurrence = args.remove(0);
32022 let is_pos_1 = matches!(&position, Expression::Literal(Literal::Number(n)) if n == "1");
32023 let is_occ_1 = matches!(&occurrence, Expression::Literal(Literal::Number(n)) if n == "1");
32024 if is_pos_1 && is_occ_1 {
32025 Ok(Expression::Function(Box::new(Function::new(
32026 "REGEXP_EXTRACT".to_string(),
32027 vec![val, regex, Expression::number(1)],
32028 ))))
32029 } else {
32030 let subject = if is_pos_1 {
32031 val
32032 } else {
32033 let substring_expr = Expression::Function(Box::new(
32034 Function::new("SUBSTRING".to_string(), vec![val, position]),
32035 ));
32036 Expression::Function(Box::new(Function::new(
32037 "NULLIF".to_string(),
32038 vec![
32039 substring_expr,
32040 Expression::Literal(Literal::String(String::new())),
32041 ],
32042 )))
32043 };
32044 let extract_all = Expression::Function(Box::new(Function::new(
32045 "REGEXP_EXTRACT_ALL".to_string(),
32046 vec![subject, regex, Expression::number(1)],
32047 )));
32048 Ok(Expression::Function(Box::new(Function::new(
32049 "ARRAY_EXTRACT".to_string(),
32050 vec![extract_all, occurrence],
32051 ))))
32052 }
32053 } else {
32054 Ok(Expression::Function(Box::new(Function {
32055 name: f.name,
32056 args,
32057 distinct: f.distinct,
32058 trailing_comments: f.trailing_comments,
32059 use_bracket_syntax: f.use_bracket_syntax,
32060 no_parens: f.no_parens,
32061 quoted: f.quoted,
32062 span: None,
32063 inferred_type: None,
32064 })))
32065 }
32066 }
32067 DialectType::Snowflake => {
32068 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
32069 Ok(Expression::Function(Box::new(Function::new(
32070 "REGEXP_SUBSTR".to_string(),
32071 args,
32072 ))))
32073 }
32074 _ => {
32075 // For other targets (Hive/Spark/BigQuery): pass through as-is
32076 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
32077 Ok(Expression::Function(Box::new(Function {
32078 name: f.name,
32079 args,
32080 distinct: f.distinct,
32081 trailing_comments: f.trailing_comments,
32082 use_bracket_syntax: f.use_bracket_syntax,
32083 no_parens: f.no_parens,
32084 quoted: f.quoted,
32085 span: None,
32086 inferred_type: None,
32087 })))
32088 }
32089 }
32090 }
32091
32092 // BigQuery STRUCT(args) -> target-specific struct expression
32093 "STRUCT" => {
32094 // Convert Function args to Struct fields
32095 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
32096 for (i, arg) in args.into_iter().enumerate() {
32097 match arg {
32098 Expression::Alias(a) => {
32099 // Named field: expr AS name
32100 fields.push((Some(a.alias.name.clone()), a.this));
32101 }
32102 other => {
32103 // Unnamed field: for Spark/Hive, keep as None
32104 // For Snowflake, auto-name as _N
32105 // For DuckDB, use column name for column refs, _N for others
32106 if matches!(target, DialectType::Snowflake) {
32107 fields.push((Some(format!("_{}", i)), other));
32108 } else if matches!(target, DialectType::DuckDB) {
32109 let auto_name = match &other {
32110 Expression::Column(col) => col.name.name.clone(),
32111 _ => format!("_{}", i),
32112 };
32113 fields.push((Some(auto_name), other));
32114 } else {
32115 fields.push((None, other));
32116 }
32117 }
32118 }
32119 }
32120
32121 match target {
32122 DialectType::Snowflake => {
32123 // OBJECT_CONSTRUCT('name', value, ...)
32124 let mut oc_args = Vec::new();
32125 for (name, val) in &fields {
32126 if let Some(n) = name {
32127 oc_args.push(Expression::Literal(Literal::String(n.clone())));
32128 oc_args.push(val.clone());
32129 } else {
32130 oc_args.push(val.clone());
32131 }
32132 }
32133 Ok(Expression::Function(Box::new(Function::new(
32134 "OBJECT_CONSTRUCT".to_string(),
32135 oc_args,
32136 ))))
32137 }
32138 DialectType::DuckDB => {
32139 // {'name': value, ...}
32140 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
32141 fields,
32142 })))
32143 }
32144 DialectType::Hive => {
32145 // STRUCT(val1, val2, ...) - strip aliases
32146 let hive_fields: Vec<(Option<String>, Expression)> =
32147 fields.into_iter().map(|(_, v)| (None, v)).collect();
32148 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
32149 fields: hive_fields,
32150 })))
32151 }
32152 DialectType::Spark | DialectType::Databricks => {
32153 // Use Expression::Struct to bypass Spark target transform auto-naming
32154 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
32155 fields,
32156 })))
32157 }
32158 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
32159 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
32160 let all_named =
32161 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
32162 let all_types_inferable = all_named
32163 && fields
32164 .iter()
32165 .all(|(_, val)| Self::can_infer_presto_type(val));
32166 let row_args: Vec<Expression> =
32167 fields.iter().map(|(_, v)| v.clone()).collect();
32168 let row_expr = Expression::Function(Box::new(Function::new(
32169 "ROW".to_string(),
32170 row_args,
32171 )));
32172 if all_named && all_types_inferable {
32173 // Build ROW type with inferred types
32174 let mut row_type_fields = Vec::new();
32175 for (name, val) in &fields {
32176 if let Some(n) = name {
32177 let type_str = Self::infer_sql_type_for_presto(val);
32178 row_type_fields.push(crate::expressions::StructField::new(
32179 n.clone(),
32180 crate::expressions::DataType::Custom { name: type_str },
32181 ));
32182 }
32183 }
32184 let row_type = crate::expressions::DataType::Struct {
32185 fields: row_type_fields,
32186 nested: true,
32187 };
32188 Ok(Expression::Cast(Box::new(Cast {
32189 this: row_expr,
32190 to: row_type,
32191 trailing_comments: Vec::new(),
32192 double_colon_syntax: false,
32193 format: None,
32194 default: None,
32195 inferred_type: None,
32196 })))
32197 } else {
32198 Ok(row_expr)
32199 }
32200 }
32201 _ => {
32202 // Default: keep as STRUCT function with original args
32203 let mut new_args = Vec::new();
32204 for (name, val) in fields {
32205 if let Some(n) = name {
32206 new_args.push(Expression::Alias(Box::new(
32207 crate::expressions::Alias::new(val, Identifier::new(n)),
32208 )));
32209 } else {
32210 new_args.push(val);
32211 }
32212 }
32213 Ok(Expression::Function(Box::new(Function::new(
32214 "STRUCT".to_string(),
32215 new_args,
32216 ))))
32217 }
32218 }
32219 }
32220
32221 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
32222 "ROUND" if args.len() == 3 => {
32223 let x = args.remove(0);
32224 let n = args.remove(0);
32225 let mode = args.remove(0);
32226 // Check if mode is 'ROUND_HALF_EVEN'
32227 let is_half_even = matches!(&mode, Expression::Literal(Literal::String(s)) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN"));
32228 if is_half_even && matches!(target, DialectType::DuckDB) {
32229 Ok(Expression::Function(Box::new(Function::new(
32230 "ROUND_EVEN".to_string(),
32231 vec![x, n],
32232 ))))
32233 } else {
32234 // Pass through with all args
32235 Ok(Expression::Function(Box::new(Function::new(
32236 "ROUND".to_string(),
32237 vec![x, n, mode],
32238 ))))
32239 }
32240 }
32241
32242 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
32243 "MAKE_INTERVAL" => {
32244 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
32245 // The positional args are: year, month
32246 // Named args are: day =>, minute =>, etc.
32247 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
32248 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
32249 // For BigQuery->BigQuery: reorder named args (day before minute)
32250 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
32251 let mut parts: Vec<(String, String)> = Vec::new();
32252 let mut pos_idx = 0;
32253 let pos_units = ["year", "month"];
32254 for arg in &args {
32255 if let Expression::NamedArgument(na) = arg {
32256 // Named arg like minute => 5
32257 let unit = na.name.name.clone();
32258 if let Expression::Literal(Literal::Number(n)) = &na.value {
32259 parts.push((unit, n.clone()));
32260 }
32261 } else if pos_idx < pos_units.len() {
32262 if let Expression::Literal(Literal::Number(n)) = arg {
32263 parts.push((pos_units[pos_idx].to_string(), n.clone()));
32264 }
32265 pos_idx += 1;
32266 }
32267 }
32268 // Don't sort - preserve original argument order
32269 let separator = if matches!(target, DialectType::Snowflake) {
32270 ", "
32271 } else {
32272 " "
32273 };
32274 let interval_str = parts
32275 .iter()
32276 .map(|(u, v)| format!("{} {}", v, u))
32277 .collect::<Vec<_>>()
32278 .join(separator);
32279 Ok(Expression::Interval(Box::new(
32280 crate::expressions::Interval {
32281 this: Some(Expression::Literal(Literal::String(interval_str))),
32282 unit: None,
32283 },
32284 )))
32285 } else if matches!(target, DialectType::BigQuery) {
32286 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
32287 let mut positional = Vec::new();
32288 let mut named: Vec<(
32289 String,
32290 Expression,
32291 crate::expressions::NamedArgSeparator,
32292 )> = Vec::new();
32293 let _pos_units = ["year", "month"];
32294 let mut _pos_idx = 0;
32295 for arg in args {
32296 if let Expression::NamedArgument(na) = arg {
32297 named.push((na.name.name.clone(), na.value, na.separator));
32298 } else {
32299 positional.push(arg);
32300 _pos_idx += 1;
32301 }
32302 }
32303 // Sort named args by: day, hour, minute, second
32304 let unit_order = |u: &str| -> usize {
32305 match u.to_ascii_lowercase().as_str() {
32306 "day" => 0,
32307 "hour" => 1,
32308 "minute" => 2,
32309 "second" => 3,
32310 _ => 4,
32311 }
32312 };
32313 named.sort_by_key(|(u, _, _)| unit_order(u));
32314 let mut result_args = positional;
32315 for (name, value, sep) in named {
32316 result_args.push(Expression::NamedArgument(Box::new(
32317 crate::expressions::NamedArgument {
32318 name: Identifier::new(&name),
32319 value,
32320 separator: sep,
32321 },
32322 )));
32323 }
32324 Ok(Expression::Function(Box::new(Function::new(
32325 "MAKE_INTERVAL".to_string(),
32326 result_args,
32327 ))))
32328 } else {
32329 Ok(Expression::Function(Box::new(Function::new(
32330 "MAKE_INTERVAL".to_string(),
32331 args,
32332 ))))
32333 }
32334 }
32335
32336 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
32337 "ARRAY_TO_STRING" if args.len() == 3 => {
32338 let arr = args.remove(0);
32339 let sep = args.remove(0);
32340 let null_text = args.remove(0);
32341 match target {
32342 DialectType::DuckDB => {
32343 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
32344 let _lambda_param =
32345 Expression::Identifier(crate::expressions::Identifier::new("x"));
32346 let coalesce =
32347 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
32348 original_name: None,
32349 expressions: vec![
32350 Expression::Identifier(crate::expressions::Identifier::new(
32351 "x",
32352 )),
32353 null_text,
32354 ],
32355 inferred_type: None,
32356 }));
32357 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
32358 parameters: vec![crate::expressions::Identifier::new("x")],
32359 body: coalesce,
32360 colon: false,
32361 parameter_types: vec![],
32362 }));
32363 let list_transform = Expression::Function(Box::new(Function::new(
32364 "LIST_TRANSFORM".to_string(),
32365 vec![arr, lambda],
32366 )));
32367 Ok(Expression::Function(Box::new(Function::new(
32368 "ARRAY_TO_STRING".to_string(),
32369 vec![list_transform, sep],
32370 ))))
32371 }
32372 _ => Ok(Expression::Function(Box::new(Function::new(
32373 "ARRAY_TO_STRING".to_string(),
32374 vec![arr, sep, null_text],
32375 )))),
32376 }
32377 }
32378
32379 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
32380 "LENGTH" if args.len() == 1 => {
32381 let arg = args.remove(0);
32382 match target {
32383 DialectType::DuckDB => {
32384 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
32385 let typeof_func = Expression::Function(Box::new(Function::new(
32386 "TYPEOF".to_string(),
32387 vec![arg.clone()],
32388 )));
32389 let blob_cast = Expression::Cast(Box::new(Cast {
32390 this: arg.clone(),
32391 to: DataType::VarBinary { length: None },
32392 trailing_comments: vec![],
32393 double_colon_syntax: false,
32394 format: None,
32395 default: None,
32396 inferred_type: None,
32397 }));
32398 let octet_length = Expression::Function(Box::new(Function::new(
32399 "OCTET_LENGTH".to_string(),
32400 vec![blob_cast],
32401 )));
32402 let text_cast = Expression::Cast(Box::new(Cast {
32403 this: arg,
32404 to: DataType::Text,
32405 trailing_comments: vec![],
32406 double_colon_syntax: false,
32407 format: None,
32408 default: None,
32409 inferred_type: None,
32410 }));
32411 let length_text = Expression::Function(Box::new(Function::new(
32412 "LENGTH".to_string(),
32413 vec![text_cast],
32414 )));
32415 Ok(Expression::Case(Box::new(crate::expressions::Case {
32416 operand: Some(typeof_func),
32417 whens: vec![(
32418 Expression::Literal(Literal::String("BLOB".to_string())),
32419 octet_length,
32420 )],
32421 else_: Some(length_text),
32422 comments: Vec::new(),
32423 inferred_type: None,
32424 })))
32425 }
32426 _ => Ok(Expression::Function(Box::new(Function::new(
32427 "LENGTH".to_string(),
32428 vec![arg],
32429 )))),
32430 }
32431 }
32432
32433 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
32434 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
32435 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
32436 // The args should be [x, fraction] with the null handling stripped
32437 // For DuckDB: QUANTILE_CONT(x, fraction)
32438 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
32439 match target {
32440 DialectType::DuckDB => {
32441 // Strip down to just 2 args, rename to QUANTILE_CONT
32442 let x = args[0].clone();
32443 let frac = args[1].clone();
32444 Ok(Expression::Function(Box::new(Function::new(
32445 "QUANTILE_CONT".to_string(),
32446 vec![x, frac],
32447 ))))
32448 }
32449 _ => Ok(Expression::Function(Box::new(Function::new(
32450 "PERCENTILE_CONT".to_string(),
32451 args,
32452 )))),
32453 }
32454 }
32455
32456 // All others: pass through
32457 _ => Ok(Expression::Function(Box::new(Function {
32458 name: f.name,
32459 args,
32460 distinct: f.distinct,
32461 trailing_comments: f.trailing_comments,
32462 use_bracket_syntax: f.use_bracket_syntax,
32463 no_parens: f.no_parens,
32464 quoted: f.quoted,
32465 span: None,
32466 inferred_type: None,
32467 }))),
32468 }
32469 }
32470
32471 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
32472 /// Returns false for column references and other non-literal expressions where the type is unknown.
32473 fn can_infer_presto_type(expr: &Expression) -> bool {
32474 match expr {
32475 Expression::Literal(_) => true,
32476 Expression::Boolean(_) => true,
32477 Expression::Array(_) | Expression::ArrayFunc(_) => true,
32478 Expression::Struct(_) | Expression::StructFunc(_) => true,
32479 Expression::Function(f) => {
32480 f.name.eq_ignore_ascii_case("STRUCT")
32481 || f.name.eq_ignore_ascii_case("ROW")
32482 || f.name.eq_ignore_ascii_case("CURRENT_DATE")
32483 || f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
32484 || f.name.eq_ignore_ascii_case("NOW")
32485 }
32486 Expression::Cast(_) => true,
32487 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
32488 _ => false,
32489 }
32490 }
32491
32492 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
32493 fn infer_sql_type_for_presto(expr: &Expression) -> String {
32494 use crate::expressions::Literal;
32495 match expr {
32496 Expression::Literal(Literal::String(_)) => "VARCHAR".to_string(),
32497 Expression::Literal(Literal::Number(n)) => {
32498 if n.contains('.') {
32499 "DOUBLE".to_string()
32500 } else {
32501 "INTEGER".to_string()
32502 }
32503 }
32504 Expression::Boolean(_) => "BOOLEAN".to_string(),
32505 Expression::Literal(Literal::Date(_)) => "DATE".to_string(),
32506 Expression::Literal(Literal::Timestamp(_)) => "TIMESTAMP".to_string(),
32507 Expression::Literal(Literal::Datetime(_)) => "TIMESTAMP".to_string(),
32508 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
32509 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
32510 Expression::Function(f) => {
32511 if f.name.eq_ignore_ascii_case("STRUCT") || f.name.eq_ignore_ascii_case("ROW") {
32512 "ROW".to_string()
32513 } else if f.name.eq_ignore_ascii_case("CURRENT_DATE") {
32514 "DATE".to_string()
32515 } else if f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP") || f.name.eq_ignore_ascii_case("NOW") {
32516 "TIMESTAMP".to_string()
32517 } else {
32518 "VARCHAR".to_string()
32519 }
32520 }
32521 Expression::Cast(c) => {
32522 // If already cast, use the target type
32523 Self::data_type_to_presto_string(&c.to)
32524 }
32525 _ => "VARCHAR".to_string(),
32526 }
32527 }
32528
32529 /// Convert a DataType to its Presto/Trino string representation for ROW type
32530 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
32531 use crate::expressions::DataType;
32532 match dt {
32533 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
32534 "VARCHAR".to_string()
32535 }
32536 DataType::Int { .. }
32537 | DataType::BigInt { .. }
32538 | DataType::SmallInt { .. }
32539 | DataType::TinyInt { .. } => "INTEGER".to_string(),
32540 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
32541 DataType::Boolean => "BOOLEAN".to_string(),
32542 DataType::Date => "DATE".to_string(),
32543 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
32544 DataType::Struct { fields, .. } => {
32545 let field_strs: Vec<String> = fields
32546 .iter()
32547 .map(|f| {
32548 format!(
32549 "{} {}",
32550 f.name,
32551 Self::data_type_to_presto_string(&f.data_type)
32552 )
32553 })
32554 .collect();
32555 format!("ROW({})", field_strs.join(", "))
32556 }
32557 DataType::Array { element_type, .. } => {
32558 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
32559 }
32560 DataType::Custom { name } => {
32561 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
32562 name.clone()
32563 }
32564 _ => "VARCHAR".to_string(),
32565 }
32566 }
32567
32568 /// Convert IntervalUnit to string
32569 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> &'static str {
32570 match unit {
32571 crate::expressions::IntervalUnit::Year => "YEAR",
32572 crate::expressions::IntervalUnit::Quarter => "QUARTER",
32573 crate::expressions::IntervalUnit::Month => "MONTH",
32574 crate::expressions::IntervalUnit::Week => "WEEK",
32575 crate::expressions::IntervalUnit::Day => "DAY",
32576 crate::expressions::IntervalUnit::Hour => "HOUR",
32577 crate::expressions::IntervalUnit::Minute => "MINUTE",
32578 crate::expressions::IntervalUnit::Second => "SECOND",
32579 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
32580 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
32581 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
32582 }
32583 }
32584
32585 /// Extract unit string from an expression (uppercased)
32586 fn get_unit_str_static(expr: &Expression) -> String {
32587 use crate::expressions::Literal;
32588 match expr {
32589 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
32590 Expression::Literal(Literal::String(s)) => s.to_ascii_uppercase(),
32591 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
32592 Expression::Function(f) => {
32593 let base = f.name.to_ascii_uppercase();
32594 if !f.args.is_empty() {
32595 let inner = Self::get_unit_str_static(&f.args[0]);
32596 format!("{}({})", base, inner)
32597 } else {
32598 base
32599 }
32600 }
32601 _ => "DAY".to_string(),
32602 }
32603 }
32604
32605 /// Parse unit string to IntervalUnit
32606 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
32607 match s {
32608 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
32609 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
32610 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
32611 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
32612 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
32613 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
32614 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
32615 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
32616 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
32617 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
32618 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
32619 _ => crate::expressions::IntervalUnit::Day,
32620 }
32621 }
32622
32623 /// Convert expression to simple string for interval building
32624 fn expr_to_string_static(expr: &Expression) -> String {
32625 use crate::expressions::Literal;
32626 match expr {
32627 Expression::Literal(Literal::Number(s)) => s.clone(),
32628 Expression::Literal(Literal::String(s)) => s.clone(),
32629 Expression::Identifier(id) => id.name.clone(),
32630 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
32631 _ => "1".to_string(),
32632 }
32633 }
32634
32635 /// Extract a simple string representation from a literal expression
32636 fn expr_to_string(expr: &Expression) -> String {
32637 use crate::expressions::Literal;
32638 match expr {
32639 Expression::Literal(Literal::Number(s)) => s.clone(),
32640 Expression::Literal(Literal::String(s)) => s.clone(),
32641 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
32642 Expression::Identifier(id) => id.name.clone(),
32643 _ => "1".to_string(),
32644 }
32645 }
32646
32647 /// Quote an interval value expression as a string literal if it's a number (or negated number)
32648 fn quote_interval_val(expr: &Expression) -> Expression {
32649 use crate::expressions::Literal;
32650 match expr {
32651 Expression::Literal(Literal::Number(n)) => {
32652 Expression::Literal(Literal::String(n.clone()))
32653 }
32654 Expression::Literal(Literal::String(_)) => expr.clone(),
32655 Expression::Neg(inner) => {
32656 if let Expression::Literal(Literal::Number(n)) = &inner.this {
32657 Expression::Literal(Literal::String(format!("-{}", n)))
32658 } else {
32659 expr.clone()
32660 }
32661 }
32662 _ => expr.clone(),
32663 }
32664 }
32665
32666 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
32667 fn timestamp_string_has_timezone(ts: &str) -> bool {
32668 let trimmed = ts.trim();
32669 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
32670 if let Some(last_space) = trimmed.rfind(' ') {
32671 let suffix = &trimmed[last_space + 1..];
32672 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
32673 let rest = &suffix[1..];
32674 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
32675 return true;
32676 }
32677 }
32678 }
32679 // Check for named timezone abbreviations
32680 let ts_lower = trimmed.to_ascii_lowercase();
32681 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
32682 for abbrev in &tz_abbrevs {
32683 if ts_lower.ends_with(abbrev) {
32684 return true;
32685 }
32686 }
32687 false
32688 }
32689
32690 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
32691 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
32692 use crate::expressions::{Cast, DataType, Literal};
32693 match expr {
32694 Expression::Literal(Literal::Timestamp(s)) => {
32695 let tz = func_name.starts_with("TIMESTAMP");
32696 Expression::Cast(Box::new(Cast {
32697 this: Expression::Literal(Literal::String(s)),
32698 to: if tz {
32699 DataType::Timestamp {
32700 timezone: true,
32701 precision: None,
32702 }
32703 } else {
32704 DataType::Timestamp {
32705 timezone: false,
32706 precision: None,
32707 }
32708 },
32709 trailing_comments: vec![],
32710 double_colon_syntax: false,
32711 format: None,
32712 default: None,
32713 inferred_type: None,
32714 }))
32715 }
32716 other => other,
32717 }
32718 }
32719
32720 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
32721 fn maybe_cast_ts(expr: Expression) -> Expression {
32722 use crate::expressions::{Cast, DataType, Literal};
32723 match expr {
32724 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
32725 this: Expression::Literal(Literal::String(s)),
32726 to: DataType::Timestamp {
32727 timezone: false,
32728 precision: None,
32729 },
32730 trailing_comments: vec![],
32731 double_colon_syntax: false,
32732 format: None,
32733 default: None,
32734 inferred_type: None,
32735 })),
32736 other => other,
32737 }
32738 }
32739
32740 /// Convert DATE 'x' literal to CAST('x' AS DATE)
32741 fn date_literal_to_cast(expr: Expression) -> Expression {
32742 use crate::expressions::{Cast, DataType, Literal};
32743 match expr {
32744 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
32745 this: Expression::Literal(Literal::String(s)),
32746 to: DataType::Date,
32747 trailing_comments: vec![],
32748 double_colon_syntax: false,
32749 format: None,
32750 default: None,
32751 inferred_type: None,
32752 })),
32753 other => other,
32754 }
32755 }
32756
32757 /// Ensure an expression that should be a date is CAST(... AS DATE).
32758 /// Handles both DATE literals and string literals that look like dates.
32759 fn ensure_cast_date(expr: Expression) -> Expression {
32760 use crate::expressions::{Cast, DataType, Literal};
32761 match expr {
32762 Expression::Literal(Literal::Date(s)) => Expression::Cast(Box::new(Cast {
32763 this: Expression::Literal(Literal::String(s)),
32764 to: DataType::Date,
32765 trailing_comments: vec![],
32766 double_colon_syntax: false,
32767 format: None,
32768 default: None,
32769 inferred_type: None,
32770 })),
32771 Expression::Literal(Literal::String(ref _s)) => {
32772 // String literal that should be a date -> CAST('s' AS DATE)
32773 Expression::Cast(Box::new(Cast {
32774 this: expr,
32775 to: DataType::Date,
32776 trailing_comments: vec![],
32777 double_colon_syntax: false,
32778 format: None,
32779 default: None,
32780 inferred_type: None,
32781 }))
32782 }
32783 // Already a CAST or other expression -> leave as-is
32784 other => other,
32785 }
32786 }
32787
32788 /// Force CAST(expr AS DATE) for any expression (not just literals)
32789 /// Skips if the expression is already a CAST to DATE
32790 fn force_cast_date(expr: Expression) -> Expression {
32791 use crate::expressions::{Cast, DataType};
32792 // If it's already a CAST to DATE, don't double-wrap
32793 if let Expression::Cast(ref c) = expr {
32794 if matches!(c.to, DataType::Date) {
32795 return expr;
32796 }
32797 }
32798 Expression::Cast(Box::new(Cast {
32799 this: expr,
32800 to: DataType::Date,
32801 trailing_comments: vec![],
32802 double_colon_syntax: false,
32803 format: None,
32804 default: None,
32805 inferred_type: None,
32806 }))
32807 }
32808
32809 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
32810 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
32811 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
32812 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
32813
32814 fn ensure_to_date_preserved(expr: Expression) -> Expression {
32815 use crate::expressions::{Function, Literal};
32816 if matches!(expr, Expression::Literal(Literal::String(_))) {
32817 Expression::Function(Box::new(Function::new(
32818 Self::PRESERVED_TO_DATE.to_string(),
32819 vec![expr],
32820 )))
32821 } else {
32822 expr
32823 }
32824 }
32825
32826 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
32827 fn try_cast_date(expr: Expression) -> Expression {
32828 use crate::expressions::{Cast, DataType};
32829 Expression::TryCast(Box::new(Cast {
32830 this: expr,
32831 to: DataType::Date,
32832 trailing_comments: vec![],
32833 double_colon_syntax: false,
32834 format: None,
32835 default: None,
32836 inferred_type: None,
32837 }))
32838 }
32839
32840 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
32841 fn double_cast_timestamp_date(expr: Expression) -> Expression {
32842 use crate::expressions::{Cast, DataType};
32843 let inner = Expression::Cast(Box::new(Cast {
32844 this: expr,
32845 to: DataType::Timestamp {
32846 timezone: false,
32847 precision: None,
32848 },
32849 trailing_comments: vec![],
32850 double_colon_syntax: false,
32851 format: None,
32852 default: None,
32853 inferred_type: None,
32854 }));
32855 Expression::Cast(Box::new(Cast {
32856 this: inner,
32857 to: DataType::Date,
32858 trailing_comments: vec![],
32859 double_colon_syntax: false,
32860 format: None,
32861 default: None,
32862 inferred_type: None,
32863 }))
32864 }
32865
32866 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
32867 fn double_cast_datetime_date(expr: Expression) -> Expression {
32868 use crate::expressions::{Cast, DataType};
32869 let inner = Expression::Cast(Box::new(Cast {
32870 this: expr,
32871 to: DataType::Custom {
32872 name: "DATETIME".to_string(),
32873 },
32874 trailing_comments: vec![],
32875 double_colon_syntax: false,
32876 format: None,
32877 default: None,
32878 inferred_type: None,
32879 }));
32880 Expression::Cast(Box::new(Cast {
32881 this: inner,
32882 to: DataType::Date,
32883 trailing_comments: vec![],
32884 double_colon_syntax: false,
32885 format: None,
32886 default: None,
32887 inferred_type: None,
32888 }))
32889 }
32890
32891 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
32892 fn double_cast_datetime2_date(expr: Expression) -> Expression {
32893 use crate::expressions::{Cast, DataType};
32894 let inner = Expression::Cast(Box::new(Cast {
32895 this: expr,
32896 to: DataType::Custom {
32897 name: "DATETIME2".to_string(),
32898 },
32899 trailing_comments: vec![],
32900 double_colon_syntax: false,
32901 format: None,
32902 default: None,
32903 inferred_type: None,
32904 }));
32905 Expression::Cast(Box::new(Cast {
32906 this: inner,
32907 to: DataType::Date,
32908 trailing_comments: vec![],
32909 double_colon_syntax: false,
32910 format: None,
32911 default: None,
32912 inferred_type: None,
32913 }))
32914 }
32915
32916 /// Convert Hive/Java-style date format strings to C-style (strftime) format
32917 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
32918 fn hive_format_to_c_format(fmt: &str) -> String {
32919 let mut result = String::new();
32920 let chars: Vec<char> = fmt.chars().collect();
32921 let mut i = 0;
32922 while i < chars.len() {
32923 match chars[i] {
32924 'y' => {
32925 let mut count = 0;
32926 while i < chars.len() && chars[i] == 'y' {
32927 count += 1;
32928 i += 1;
32929 }
32930 if count >= 4 {
32931 result.push_str("%Y");
32932 } else if count == 2 {
32933 result.push_str("%y");
32934 } else {
32935 result.push_str("%Y");
32936 }
32937 }
32938 'M' => {
32939 let mut count = 0;
32940 while i < chars.len() && chars[i] == 'M' {
32941 count += 1;
32942 i += 1;
32943 }
32944 if count >= 3 {
32945 result.push_str("%b");
32946 } else if count == 2 {
32947 result.push_str("%m");
32948 } else {
32949 result.push_str("%m");
32950 }
32951 }
32952 'd' => {
32953 let mut _count = 0;
32954 while i < chars.len() && chars[i] == 'd' {
32955 _count += 1;
32956 i += 1;
32957 }
32958 result.push_str("%d");
32959 }
32960 'H' => {
32961 let mut _count = 0;
32962 while i < chars.len() && chars[i] == 'H' {
32963 _count += 1;
32964 i += 1;
32965 }
32966 result.push_str("%H");
32967 }
32968 'h' => {
32969 let mut _count = 0;
32970 while i < chars.len() && chars[i] == 'h' {
32971 _count += 1;
32972 i += 1;
32973 }
32974 result.push_str("%I");
32975 }
32976 'm' => {
32977 let mut _count = 0;
32978 while i < chars.len() && chars[i] == 'm' {
32979 _count += 1;
32980 i += 1;
32981 }
32982 result.push_str("%M");
32983 }
32984 's' => {
32985 let mut _count = 0;
32986 while i < chars.len() && chars[i] == 's' {
32987 _count += 1;
32988 i += 1;
32989 }
32990 result.push_str("%S");
32991 }
32992 'S' => {
32993 // Fractional seconds - skip
32994 while i < chars.len() && chars[i] == 'S' {
32995 i += 1;
32996 }
32997 result.push_str("%f");
32998 }
32999 'a' => {
33000 // AM/PM
33001 while i < chars.len() && chars[i] == 'a' {
33002 i += 1;
33003 }
33004 result.push_str("%p");
33005 }
33006 'E' => {
33007 let mut count = 0;
33008 while i < chars.len() && chars[i] == 'E' {
33009 count += 1;
33010 i += 1;
33011 }
33012 if count >= 4 {
33013 result.push_str("%A");
33014 } else {
33015 result.push_str("%a");
33016 }
33017 }
33018 '\'' => {
33019 // Quoted literal text - pass through the quotes and content
33020 result.push('\'');
33021 i += 1;
33022 while i < chars.len() && chars[i] != '\'' {
33023 result.push(chars[i]);
33024 i += 1;
33025 }
33026 if i < chars.len() {
33027 result.push('\'');
33028 i += 1;
33029 }
33030 }
33031 c => {
33032 result.push(c);
33033 i += 1;
33034 }
33035 }
33036 }
33037 result
33038 }
33039
33040 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
33041 fn hive_format_to_presto_format(fmt: &str) -> String {
33042 let c_fmt = Self::hive_format_to_c_format(fmt);
33043 // Presto uses %T for HH:MM:SS
33044 c_fmt.replace("%H:%M:%S", "%T")
33045 }
33046
33047 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
33048 fn ensure_cast_timestamp(expr: Expression) -> Expression {
33049 use crate::expressions::{Cast, DataType, Literal};
33050 match expr {
33051 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
33052 this: Expression::Literal(Literal::String(s)),
33053 to: DataType::Timestamp {
33054 timezone: false,
33055 precision: None,
33056 },
33057 trailing_comments: vec![],
33058 double_colon_syntax: false,
33059 format: None,
33060 default: None,
33061 inferred_type: None,
33062 })),
33063 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
33064 this: expr,
33065 to: DataType::Timestamp {
33066 timezone: false,
33067 precision: None,
33068 },
33069 trailing_comments: vec![],
33070 double_colon_syntax: false,
33071 format: None,
33072 default: None,
33073 inferred_type: None,
33074 })),
33075 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
33076 this: Expression::Literal(Literal::String(s)),
33077 to: DataType::Timestamp {
33078 timezone: false,
33079 precision: None,
33080 },
33081 trailing_comments: vec![],
33082 double_colon_syntax: false,
33083 format: None,
33084 default: None,
33085 inferred_type: None,
33086 })),
33087 other => other,
33088 }
33089 }
33090
33091 /// Force CAST to TIMESTAMP for any expression (not just literals)
33092 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
33093 fn force_cast_timestamp(expr: Expression) -> Expression {
33094 use crate::expressions::{Cast, DataType};
33095 // Don't double-wrap if already a CAST to TIMESTAMP
33096 if let Expression::Cast(ref c) = expr {
33097 if matches!(c.to, DataType::Timestamp { .. }) {
33098 return expr;
33099 }
33100 }
33101 Expression::Cast(Box::new(Cast {
33102 this: expr,
33103 to: DataType::Timestamp {
33104 timezone: false,
33105 precision: None,
33106 },
33107 trailing_comments: vec![],
33108 double_colon_syntax: false,
33109 format: None,
33110 default: None,
33111 inferred_type: None,
33112 }))
33113 }
33114
33115 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
33116 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
33117 use crate::expressions::{Cast, DataType, Literal};
33118 match expr {
33119 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
33120 this: Expression::Literal(Literal::String(s)),
33121 to: DataType::Timestamp {
33122 timezone: true,
33123 precision: None,
33124 },
33125 trailing_comments: vec![],
33126 double_colon_syntax: false,
33127 format: None,
33128 default: None,
33129 inferred_type: None,
33130 })),
33131 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
33132 this: expr,
33133 to: DataType::Timestamp {
33134 timezone: true,
33135 precision: None,
33136 },
33137 trailing_comments: vec![],
33138 double_colon_syntax: false,
33139 format: None,
33140 default: None,
33141 inferred_type: None,
33142 })),
33143 Expression::Literal(Literal::Datetime(s)) => Expression::Cast(Box::new(Cast {
33144 this: Expression::Literal(Literal::String(s)),
33145 to: DataType::Timestamp {
33146 timezone: true,
33147 precision: None,
33148 },
33149 trailing_comments: vec![],
33150 double_colon_syntax: false,
33151 format: None,
33152 default: None,
33153 inferred_type: None,
33154 })),
33155 other => other,
33156 }
33157 }
33158
33159 /// Ensure expression is CAST to DATETIME (for BigQuery)
33160 fn ensure_cast_datetime(expr: Expression) -> Expression {
33161 use crate::expressions::{Cast, DataType, Literal};
33162 match expr {
33163 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
33164 this: expr,
33165 to: DataType::Custom {
33166 name: "DATETIME".to_string(),
33167 },
33168 trailing_comments: vec![],
33169 double_colon_syntax: false,
33170 format: None,
33171 default: None,
33172 inferred_type: None,
33173 })),
33174 other => other,
33175 }
33176 }
33177
33178 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
33179 fn force_cast_datetime(expr: Expression) -> Expression {
33180 use crate::expressions::{Cast, DataType};
33181 if let Expression::Cast(ref c) = expr {
33182 if let DataType::Custom { ref name } = c.to {
33183 if name.eq_ignore_ascii_case("DATETIME") {
33184 return expr;
33185 }
33186 }
33187 }
33188 Expression::Cast(Box::new(Cast {
33189 this: expr,
33190 to: DataType::Custom {
33191 name: "DATETIME".to_string(),
33192 },
33193 trailing_comments: vec![],
33194 double_colon_syntax: false,
33195 format: None,
33196 default: None,
33197 inferred_type: None,
33198 }))
33199 }
33200
33201 /// Ensure expression is CAST to DATETIME2 (for TSQL)
33202 fn ensure_cast_datetime2(expr: Expression) -> Expression {
33203 use crate::expressions::{Cast, DataType, Literal};
33204 match expr {
33205 Expression::Literal(Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
33206 this: expr,
33207 to: DataType::Custom {
33208 name: "DATETIME2".to_string(),
33209 },
33210 trailing_comments: vec![],
33211 double_colon_syntax: false,
33212 format: None,
33213 default: None,
33214 inferred_type: None,
33215 })),
33216 other => other,
33217 }
33218 }
33219
33220 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
33221 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
33222 use crate::expressions::{Cast, DataType, Literal};
33223 match expr {
33224 Expression::Literal(Literal::Timestamp(s)) => Expression::Cast(Box::new(Cast {
33225 this: Expression::Literal(Literal::String(s)),
33226 to: DataType::Timestamp {
33227 timezone: true,
33228 precision: None,
33229 },
33230 trailing_comments: vec![],
33231 double_colon_syntax: false,
33232 format: None,
33233 default: None,
33234 inferred_type: None,
33235 })),
33236 other => other,
33237 }
33238 }
33239
33240 /// Convert BigQuery format string to Snowflake format string
33241 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
33242 use crate::expressions::Literal;
33243 if let Expression::Literal(Literal::String(s)) = format_expr {
33244 let sf = s
33245 .replace("%Y", "yyyy")
33246 .replace("%m", "mm")
33247 .replace("%d", "DD")
33248 .replace("%H", "HH24")
33249 .replace("%M", "MI")
33250 .replace("%S", "SS")
33251 .replace("%b", "mon")
33252 .replace("%B", "Month")
33253 .replace("%e", "FMDD");
33254 Expression::Literal(Literal::String(sf))
33255 } else {
33256 format_expr.clone()
33257 }
33258 }
33259
33260 /// Convert BigQuery format string to DuckDB format string
33261 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
33262 use crate::expressions::Literal;
33263 if let Expression::Literal(Literal::String(s)) = format_expr {
33264 let duck = s
33265 .replace("%T", "%H:%M:%S")
33266 .replace("%F", "%Y-%m-%d")
33267 .replace("%D", "%m/%d/%y")
33268 .replace("%x", "%m/%d/%y")
33269 .replace("%c", "%a %b %-d %H:%M:%S %Y")
33270 .replace("%e", "%-d")
33271 .replace("%E6S", "%S.%f");
33272 Expression::Literal(Literal::String(duck))
33273 } else {
33274 format_expr.clone()
33275 }
33276 }
33277
33278 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
33279 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
33280 use crate::expressions::Literal;
33281 if let Expression::Literal(Literal::String(s)) = format_expr {
33282 // Replace format elements from longest to shortest to avoid partial matches
33283 let result = s
33284 .replace("YYYYMMDD", "%Y%m%d")
33285 .replace("YYYY", "%Y")
33286 .replace("YY", "%y")
33287 .replace("MONTH", "%B")
33288 .replace("MON", "%b")
33289 .replace("MM", "%m")
33290 .replace("DD", "%d")
33291 .replace("HH24", "%H")
33292 .replace("HH12", "%I")
33293 .replace("HH", "%I")
33294 .replace("MI", "%M")
33295 .replace("SSTZH", "%S%z")
33296 .replace("SS", "%S")
33297 .replace("TZH", "%z");
33298 Expression::Literal(Literal::String(result))
33299 } else {
33300 format_expr.clone()
33301 }
33302 }
33303
33304 /// Normalize BigQuery format strings for BQ->BQ output
33305 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
33306 use crate::expressions::Literal;
33307 if let Expression::Literal(Literal::String(s)) = format_expr {
33308 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
33309 Expression::Literal(Literal::String(norm))
33310 } else {
33311 format_expr.clone()
33312 }
33313 }
33314}
33315
33316#[cfg(test)]
33317mod tests {
33318 use super::*;
33319
33320 #[test]
33321 fn test_dialect_type_from_str() {
33322 assert_eq!(
33323 "postgres".parse::<DialectType>().unwrap(),
33324 DialectType::PostgreSQL
33325 );
33326 assert_eq!(
33327 "postgresql".parse::<DialectType>().unwrap(),
33328 DialectType::PostgreSQL
33329 );
33330 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
33331 assert_eq!(
33332 "bigquery".parse::<DialectType>().unwrap(),
33333 DialectType::BigQuery
33334 );
33335 }
33336
33337 #[test]
33338 fn test_basic_transpile() {
33339 let dialect = Dialect::get(DialectType::Generic);
33340 let result = dialect
33341 .transpile_to("SELECT 1", DialectType::PostgreSQL)
33342 .unwrap();
33343 assert_eq!(result.len(), 1);
33344 assert_eq!(result[0], "SELECT 1");
33345 }
33346
33347 #[test]
33348 fn test_function_transformation_mysql() {
33349 // NVL should be transformed to IFNULL in MySQL
33350 let dialect = Dialect::get(DialectType::Generic);
33351 let result = dialect
33352 .transpile_to("SELECT NVL(a, b)", DialectType::MySQL)
33353 .unwrap();
33354 assert_eq!(result[0], "SELECT IFNULL(a, b)");
33355 }
33356
33357 #[test]
33358 fn test_get_path_duckdb() {
33359 // Test: step by step
33360 let snowflake = Dialect::get(DialectType::Snowflake);
33361
33362 // Step 1: Parse and check what Snowflake produces as intermediate
33363 let result_sf_sf = snowflake
33364 .transpile_to(
33365 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
33366 DialectType::Snowflake,
33367 )
33368 .unwrap();
33369 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
33370
33371 // Step 2: DuckDB target
33372 let result_sf_dk = snowflake
33373 .transpile_to(
33374 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
33375 DialectType::DuckDB,
33376 )
33377 .unwrap();
33378 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
33379
33380 // Step 3: GET_PATH directly
33381 let result_gp = snowflake
33382 .transpile_to(
33383 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
33384 DialectType::DuckDB,
33385 )
33386 .unwrap();
33387 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
33388 }
33389
33390 #[test]
33391 fn test_function_transformation_postgres() {
33392 // IFNULL should be transformed to COALESCE in PostgreSQL
33393 let dialect = Dialect::get(DialectType::Generic);
33394 let result = dialect
33395 .transpile_to("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
33396 .unwrap();
33397 assert_eq!(result[0], "SELECT COALESCE(a, b)");
33398
33399 // NVL should also be transformed to COALESCE
33400 let result = dialect
33401 .transpile_to("SELECT NVL(a, b)", DialectType::PostgreSQL)
33402 .unwrap();
33403 assert_eq!(result[0], "SELECT COALESCE(a, b)");
33404 }
33405
33406 #[test]
33407 fn test_hive_cast_to_trycast() {
33408 // Hive CAST should become TRY_CAST for targets that support it
33409 let hive = Dialect::get(DialectType::Hive);
33410 let result = hive
33411 .transpile_to("CAST(1 AS INT)", DialectType::DuckDB)
33412 .unwrap();
33413 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
33414
33415 let result = hive
33416 .transpile_to("CAST(1 AS INT)", DialectType::Presto)
33417 .unwrap();
33418 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
33419 }
33420
33421 #[test]
33422 fn test_hive_array_identity() {
33423 // Hive ARRAY<DATE> should preserve angle bracket syntax
33424 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
33425 let hive = Dialect::get(DialectType::Hive);
33426
33427 // Test via transpile_to (this works)
33428 let result = hive.transpile_to(sql, DialectType::Hive).unwrap();
33429 eprintln!("Hive ARRAY via transpile_to: {}", result[0]);
33430 assert!(
33431 result[0].contains("ARRAY<DATE>"),
33432 "transpile_to: Expected ARRAY<DATE>, got: {}",
33433 result[0]
33434 );
33435
33436 // Test via parse -> transform -> generate (identity test path)
33437 let ast = hive.parse(sql).unwrap();
33438 let transformed = hive.transform(ast[0].clone()).unwrap();
33439 let output = hive.generate(&transformed).unwrap();
33440 eprintln!("Hive ARRAY via identity path: {}", output);
33441 assert!(
33442 output.contains("ARRAY<DATE>"),
33443 "identity path: Expected ARRAY<DATE>, got: {}",
33444 output
33445 );
33446 }
33447
33448 #[test]
33449 fn test_starrocks_delete_between_expansion() {
33450 // StarRocks doesn't support BETWEEN in DELETE statements
33451 let dialect = Dialect::get(DialectType::Generic);
33452
33453 // BETWEEN should be expanded to >= AND <= in DELETE
33454 let result = dialect
33455 .transpile_to(
33456 "DELETE FROM t WHERE a BETWEEN b AND c",
33457 DialectType::StarRocks,
33458 )
33459 .unwrap();
33460 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
33461
33462 // NOT BETWEEN should be expanded to < OR > in DELETE
33463 let result = dialect
33464 .transpile_to(
33465 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
33466 DialectType::StarRocks,
33467 )
33468 .unwrap();
33469 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
33470
33471 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
33472 let result = dialect
33473 .transpile_to(
33474 "SELECT * FROM t WHERE a BETWEEN b AND c",
33475 DialectType::StarRocks,
33476 )
33477 .unwrap();
33478 assert!(
33479 result[0].contains("BETWEEN"),
33480 "BETWEEN should be preserved in SELECT"
33481 );
33482 }
33483
33484 #[test]
33485 fn test_snowflake_ltrim_rtrim_parse() {
33486 let sf = Dialect::get(DialectType::Snowflake);
33487 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
33488 let result = sf.transpile_to(sql, DialectType::DuckDB);
33489 match &result {
33490 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
33491 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
33492 }
33493 assert!(
33494 result.is_ok(),
33495 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
33496 result.err()
33497 );
33498 }
33499
33500 #[test]
33501 fn test_duckdb_count_if_parse() {
33502 let duck = Dialect::get(DialectType::DuckDB);
33503 let sql = "COUNT_IF(x)";
33504 let result = duck.transpile_to(sql, DialectType::DuckDB);
33505 match &result {
33506 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
33507 Err(e) => eprintln!("COUNT_IF error: {}", e),
33508 }
33509 assert!(
33510 result.is_ok(),
33511 "Expected successful parse of COUNT_IF(x), got error: {:?}",
33512 result.err()
33513 );
33514 }
33515
33516 #[test]
33517 fn test_tsql_cast_tinyint_parse() {
33518 let tsql = Dialect::get(DialectType::TSQL);
33519 let sql = "CAST(X AS TINYINT)";
33520 let result = tsql.transpile_to(sql, DialectType::DuckDB);
33521 match &result {
33522 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
33523 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
33524 }
33525 assert!(
33526 result.is_ok(),
33527 "Expected successful transpile, got error: {:?}",
33528 result.err()
33529 );
33530 }
33531
33532 #[test]
33533 fn test_pg_hash_bitwise_xor() {
33534 let dialect = Dialect::get(DialectType::PostgreSQL);
33535 let result = dialect
33536 .transpile_to("x # y", DialectType::PostgreSQL)
33537 .unwrap();
33538 assert_eq!(result[0], "x # y");
33539 }
33540
33541 #[test]
33542 fn test_pg_array_to_duckdb() {
33543 let dialect = Dialect::get(DialectType::PostgreSQL);
33544 let result = dialect
33545 .transpile_to("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
33546 .unwrap();
33547 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
33548 }
33549
33550 #[test]
33551 fn test_array_remove_bigquery() {
33552 let dialect = Dialect::get(DialectType::Generic);
33553 let result = dialect
33554 .transpile_to("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
33555 .unwrap();
33556 assert_eq!(
33557 result[0],
33558 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
33559 );
33560 }
33561
33562 #[test]
33563 fn test_map_clickhouse_case() {
33564 let dialect = Dialect::get(DialectType::Generic);
33565 let parsed = dialect
33566 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
33567 .unwrap();
33568 eprintln!("MAP parsed: {:?}", parsed);
33569 let result = dialect
33570 .transpile_to(
33571 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
33572 DialectType::ClickHouse,
33573 )
33574 .unwrap();
33575 eprintln!("MAP result: {}", result[0]);
33576 }
33577
33578 #[test]
33579 fn test_generate_date_array_presto() {
33580 let dialect = Dialect::get(DialectType::Generic);
33581 let result = dialect.transpile_to(
33582 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
33583 DialectType::Presto,
33584 ).unwrap();
33585 eprintln!("GDA -> Presto: {}", result[0]);
33586 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
33587 }
33588
33589 #[test]
33590 fn test_generate_date_array_postgres() {
33591 let dialect = Dialect::get(DialectType::Generic);
33592 let result = dialect.transpile_to(
33593 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
33594 DialectType::PostgreSQL,
33595 ).unwrap();
33596 eprintln!("GDA -> PostgreSQL: {}", result[0]);
33597 }
33598
33599 #[test]
33600 fn test_generate_date_array_snowflake() {
33601 std::thread::Builder::new()
33602 .stack_size(16 * 1024 * 1024)
33603 .spawn(|| {
33604 let dialect = Dialect::get(DialectType::Generic);
33605 let result = dialect.transpile_to(
33606 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
33607 DialectType::Snowflake,
33608 ).unwrap();
33609 eprintln!("GDA -> Snowflake: {}", result[0]);
33610 })
33611 .unwrap()
33612 .join()
33613 .unwrap();
33614 }
33615
33616 #[test]
33617 fn test_array_length_generate_date_array_snowflake() {
33618 let dialect = Dialect::get(DialectType::Generic);
33619 let result = dialect.transpile_to(
33620 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
33621 DialectType::Snowflake,
33622 ).unwrap();
33623 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
33624 }
33625
33626 #[test]
33627 fn test_generate_date_array_mysql() {
33628 let dialect = Dialect::get(DialectType::Generic);
33629 let result = dialect.transpile_to(
33630 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
33631 DialectType::MySQL,
33632 ).unwrap();
33633 eprintln!("GDA -> MySQL: {}", result[0]);
33634 }
33635
33636 #[test]
33637 fn test_generate_date_array_redshift() {
33638 let dialect = Dialect::get(DialectType::Generic);
33639 let result = dialect.transpile_to(
33640 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
33641 DialectType::Redshift,
33642 ).unwrap();
33643 eprintln!("GDA -> Redshift: {}", result[0]);
33644 }
33645
33646 #[test]
33647 fn test_generate_date_array_tsql() {
33648 let dialect = Dialect::get(DialectType::Generic);
33649 let result = dialect.transpile_to(
33650 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
33651 DialectType::TSQL,
33652 ).unwrap();
33653 eprintln!("GDA -> TSQL: {}", result[0]);
33654 }
33655
33656 #[test]
33657 fn test_struct_colon_syntax() {
33658 let dialect = Dialect::get(DialectType::Generic);
33659 // Test without colon first
33660 let result = dialect.transpile_to(
33661 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
33662 DialectType::ClickHouse,
33663 );
33664 match result {
33665 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
33666 Err(e) => eprintln!("STRUCT no colon error: {}", e),
33667 }
33668 // Now test with colon
33669 let result = dialect.transpile_to(
33670 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
33671 DialectType::ClickHouse,
33672 );
33673 match result {
33674 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
33675 Err(e) => eprintln!("STRUCT colon error: {}", e),
33676 }
33677 }
33678
33679 #[test]
33680 fn test_generate_date_array_cte_wrapped_mysql() {
33681 let dialect = Dialect::get(DialectType::Generic);
33682 let result = dialect.transpile_to(
33683 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
33684 DialectType::MySQL,
33685 ).unwrap();
33686 eprintln!("GDA CTE -> MySQL: {}", result[0]);
33687 }
33688
33689 #[test]
33690 fn test_generate_date_array_cte_wrapped_tsql() {
33691 let dialect = Dialect::get(DialectType::Generic);
33692 let result = dialect.transpile_to(
33693 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
33694 DialectType::TSQL,
33695 ).unwrap();
33696 eprintln!("GDA CTE -> TSQL: {}", result[0]);
33697 }
33698
33699 #[test]
33700 fn test_decode_literal_no_null_check() {
33701 // Oracle DECODE with all literals should produce simple equality, no IS NULL
33702 let dialect = Dialect::get(DialectType::Oracle);
33703 let result = dialect
33704 .transpile_to("SELECT decode(1,2,3,4)", DialectType::DuckDB)
33705 .unwrap();
33706 assert_eq!(
33707 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
33708 "Literal DECODE should not have IS NULL checks"
33709 );
33710 }
33711
33712 #[test]
33713 fn test_decode_column_vs_literal_no_null_check() {
33714 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
33715 let dialect = Dialect::get(DialectType::Oracle);
33716 let result = dialect
33717 .transpile_to("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
33718 .unwrap();
33719 assert_eq!(
33720 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
33721 "Column vs literal DECODE should not have IS NULL checks"
33722 );
33723 }
33724
33725 #[test]
33726 fn test_decode_column_vs_column_keeps_null_check() {
33727 // Oracle DECODE with column vs column should keep null-safe comparison
33728 let dialect = Dialect::get(DialectType::Oracle);
33729 let result = dialect
33730 .transpile_to("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
33731 .unwrap();
33732 assert!(
33733 result[0].contains("IS NULL"),
33734 "Column vs column DECODE should have IS NULL checks, got: {}",
33735 result[0]
33736 );
33737 }
33738
33739 #[test]
33740 fn test_decode_null_search() {
33741 // Oracle DECODE with NULL search should use IS NULL
33742 let dialect = Dialect::get(DialectType::Oracle);
33743 let result = dialect
33744 .transpile_to("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
33745 .unwrap();
33746 assert_eq!(
33747 result[0],
33748 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
33749 );
33750 }
33751
33752 // =========================================================================
33753 // REGEXP function transpilation tests
33754 // =========================================================================
33755
33756 #[test]
33757 fn test_regexp_substr_snowflake_to_duckdb_2arg() {
33758 let dialect = Dialect::get(DialectType::Snowflake);
33759 let result = dialect
33760 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern')", DialectType::DuckDB)
33761 .unwrap();
33762 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
33763 }
33764
33765 #[test]
33766 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos1() {
33767 let dialect = Dialect::get(DialectType::Snowflake);
33768 let result = dialect
33769 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern', 1)", DialectType::DuckDB)
33770 .unwrap();
33771 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
33772 }
33773
33774 #[test]
33775 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos_gt1() {
33776 let dialect = Dialect::get(DialectType::Snowflake);
33777 let result = dialect
33778 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern', 3)", DialectType::DuckDB)
33779 .unwrap();
33780 assert_eq!(
33781 result[0],
33782 "SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(s, 3), ''), 'pattern')"
33783 );
33784 }
33785
33786 #[test]
33787 fn test_regexp_substr_snowflake_to_duckdb_4arg_occ_gt1() {
33788 let dialect = Dialect::get(DialectType::Snowflake);
33789 let result = dialect
33790 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern', 1, 3)", DialectType::DuckDB)
33791 .unwrap();
33792 assert_eq!(
33793 result[0],
33794 "SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, 'pattern'), 3)"
33795 );
33796 }
33797
33798 #[test]
33799 fn test_regexp_substr_snowflake_to_duckdb_5arg_e_flag() {
33800 let dialect = Dialect::get(DialectType::Snowflake);
33801 let result = dialect
33802 .transpile_to(
33803 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')",
33804 DialectType::DuckDB,
33805 )
33806 .unwrap();
33807 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
33808 }
33809
33810 #[test]
33811 fn test_regexp_substr_snowflake_to_duckdb_6arg_group0() {
33812 let dialect = Dialect::get(DialectType::Snowflake);
33813 let result = dialect
33814 .transpile_to(
33815 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
33816 DialectType::DuckDB,
33817 )
33818 .unwrap();
33819 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
33820 }
33821
33822 #[test]
33823 fn test_regexp_substr_snowflake_identity_strip_group0() {
33824 let dialect = Dialect::get(DialectType::Snowflake);
33825 let result = dialect
33826 .transpile_to(
33827 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
33828 DialectType::Snowflake,
33829 )
33830 .unwrap();
33831 assert_eq!(
33832 result[0],
33833 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')"
33834 );
33835 }
33836
33837 #[test]
33838 fn test_regexp_substr_all_snowflake_to_duckdb_2arg() {
33839 let dialect = Dialect::get(DialectType::Snowflake);
33840 let result = dialect
33841 .transpile_to("SELECT REGEXP_SUBSTR_ALL(s, 'pattern')", DialectType::DuckDB)
33842 .unwrap();
33843 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
33844 }
33845
33846 #[test]
33847 fn test_regexp_substr_all_snowflake_to_duckdb_3arg_pos_gt1() {
33848 let dialect = Dialect::get(DialectType::Snowflake);
33849 let result = dialect
33850 .transpile_to(
33851 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 3)",
33852 DialectType::DuckDB,
33853 )
33854 .unwrap();
33855 assert_eq!(
33856 result[0],
33857 "SELECT REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')"
33858 );
33859 }
33860
33861 #[test]
33862 fn test_regexp_substr_all_snowflake_to_duckdb_5arg_e_flag() {
33863 let dialect = Dialect::get(DialectType::Snowflake);
33864 let result = dialect
33865 .transpile_to(
33866 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')",
33867 DialectType::DuckDB,
33868 )
33869 .unwrap();
33870 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
33871 }
33872
33873 #[test]
33874 fn test_regexp_substr_all_snowflake_to_duckdb_6arg_group0() {
33875 let dialect = Dialect::get(DialectType::Snowflake);
33876 let result = dialect
33877 .transpile_to(
33878 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
33879 DialectType::DuckDB,
33880 )
33881 .unwrap();
33882 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
33883 }
33884
33885 #[test]
33886 fn test_regexp_substr_all_snowflake_identity_strip_group0() {
33887 let dialect = Dialect::get(DialectType::Snowflake);
33888 let result = dialect
33889 .transpile_to(
33890 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
33891 DialectType::Snowflake,
33892 )
33893 .unwrap();
33894 assert_eq!(
33895 result[0],
33896 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')"
33897 );
33898 }
33899
33900 #[test]
33901 fn test_regexp_count_snowflake_to_duckdb_2arg() {
33902 let dialect = Dialect::get(DialectType::Snowflake);
33903 let result = dialect
33904 .transpile_to("SELECT REGEXP_COUNT(s, 'pattern')", DialectType::DuckDB)
33905 .unwrap();
33906 assert_eq!(
33907 result[0],
33908 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, 'pattern')) END"
33909 );
33910 }
33911
33912 #[test]
33913 fn test_regexp_count_snowflake_to_duckdb_3arg() {
33914 let dialect = Dialect::get(DialectType::Snowflake);
33915 let result = dialect
33916 .transpile_to("SELECT REGEXP_COUNT(s, 'pattern', 3)", DialectType::DuckDB)
33917 .unwrap();
33918 assert_eq!(
33919 result[0],
33920 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')) END"
33921 );
33922 }
33923
33924 #[test]
33925 fn test_regexp_count_snowflake_to_duckdb_4arg_flags() {
33926 let dialect = Dialect::get(DialectType::Snowflake);
33927 let result = dialect
33928 .transpile_to(
33929 "SELECT REGEXP_COUNT(s, 'pattern', 1, 'i')",
33930 DialectType::DuckDB,
33931 )
33932 .unwrap();
33933 assert_eq!(
33934 result[0],
33935 "SELECT CASE WHEN '(?i)' || 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 1), '(?i)' || 'pattern')) END"
33936 );
33937 }
33938
33939 #[test]
33940 fn test_regexp_count_snowflake_to_duckdb_4arg_flags_literal_string() {
33941 let dialect = Dialect::get(DialectType::Snowflake);
33942 let result = dialect
33943 .transpile_to(
33944 "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')",
33945 DialectType::DuckDB,
33946 )
33947 .unwrap();
33948 assert_eq!(
33949 result[0],
33950 "SELECT CASE WHEN '(?im)' || 'L' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('Hello World', 1), '(?im)' || 'L')) END"
33951 );
33952 }
33953
33954 #[test]
33955 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos1_occ1() {
33956 let dialect = Dialect::get(DialectType::Snowflake);
33957 let result = dialect
33958 .transpile_to(
33959 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 1, 1)",
33960 DialectType::DuckDB,
33961 )
33962 .unwrap();
33963 assert_eq!(result[0], "SELECT REGEXP_REPLACE(s, 'pattern', 'repl')");
33964 }
33965
33966 #[test]
33967 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ0() {
33968 let dialect = Dialect::get(DialectType::Snowflake);
33969 let result = dialect
33970 .transpile_to(
33971 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 0)",
33972 DialectType::DuckDB,
33973 )
33974 .unwrap();
33975 assert_eq!(
33976 result[0],
33977 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl', 'g')"
33978 );
33979 }
33980
33981 #[test]
33982 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ1() {
33983 let dialect = Dialect::get(DialectType::Snowflake);
33984 let result = dialect
33985 .transpile_to(
33986 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 1)",
33987 DialectType::DuckDB,
33988 )
33989 .unwrap();
33990 assert_eq!(
33991 result[0],
33992 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl')"
33993 );
33994 }
33995
33996 #[test]
33997 fn test_rlike_snowflake_to_duckdb_2arg() {
33998 let dialect = Dialect::get(DialectType::Snowflake);
33999 let result = dialect
34000 .transpile_to("SELECT RLIKE(a, b)", DialectType::DuckDB)
34001 .unwrap();
34002 assert_eq!(
34003 result[0],
34004 "SELECT REGEXP_MATCHES(a, '^(' || (b) || ')$')"
34005 );
34006 }
34007
34008 #[test]
34009 fn test_rlike_snowflake_to_duckdb_3arg_flags() {
34010 let dialect = Dialect::get(DialectType::Snowflake);
34011 let result = dialect
34012 .transpile_to("SELECT RLIKE(a, b, 'i')", DialectType::DuckDB)
34013 .unwrap();
34014 assert_eq!(
34015 result[0],
34016 "SELECT REGEXP_MATCHES(a, '^(' || (b) || ')$', 'i')"
34017 );
34018 }
34019
34020 #[test]
34021 fn test_regexp_extract_all_bigquery_to_snowflake_no_capture() {
34022 let dialect = Dialect::get(DialectType::BigQuery);
34023 let result = dialect
34024 .transpile_to(
34025 "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')",
34026 DialectType::Snowflake,
34027 )
34028 .unwrap();
34029 assert_eq!(result[0], "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')");
34030 }
34031
34032 #[test]
34033 fn test_regexp_extract_all_bigquery_to_snowflake_with_capture() {
34034 let dialect = Dialect::get(DialectType::BigQuery);
34035 let result = dialect
34036 .transpile_to(
34037 "SELECT REGEXP_EXTRACT_ALL(s, '(a)[0-9]')",
34038 DialectType::Snowflake,
34039 )
34040 .unwrap();
34041 assert_eq!(
34042 result[0],
34043 "SELECT REGEXP_SUBSTR_ALL(s, '(a)[0-9]', 1, 1, 'c', 1)"
34044 );
34045 }
34046
34047 #[test]
34048 fn test_regexp_instr_snowflake_to_duckdb_2arg() {
34049 let handle = std::thread::Builder::new()
34050 .stack_size(16 * 1024 * 1024)
34051 .spawn(|| {
34052 let dialect = Dialect::get(DialectType::Snowflake);
34053 let result = dialect
34054 .transpile_to("SELECT REGEXP_INSTR(s, 'pattern')", DialectType::DuckDB)
34055 .unwrap();
34056 // Should produce a CASE WHEN expression
34057 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN in result: {}", result[0]);
34058 assert!(result[0].contains("LIST_SUM"), "Expected LIST_SUM in result: {}", result[0]);
34059 })
34060 .unwrap();
34061 handle.join().unwrap();
34062 }
34063
34064 #[test]
34065 fn test_array_except_generic_to_duckdb() {
34066 // Use larger stack to avoid overflow from deeply nested expression Drop
34067 let handle = std::thread::Builder::new()
34068 .stack_size(16 * 1024 * 1024)
34069 .spawn(|| {
34070 let dialect = Dialect::get(DialectType::Generic);
34071 let result = dialect
34072 .transpile_to("SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))", DialectType::DuckDB)
34073 .unwrap();
34074 eprintln!("ARRAY_EXCEPT Generic->DuckDB: {}", result[0]);
34075 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN: {}", result[0]);
34076 assert!(result[0].contains("LIST_TRANSFORM"), "Expected LIST_TRANSFORM: {}", result[0]);
34077 assert!(result[0].contains("LIST_FILTER"), "Expected LIST_FILTER: {}", result[0]);
34078 assert!(result[0].contains("LIST_ZIP"), "Expected LIST_ZIP: {}", result[0]);
34079 assert!(result[0].contains("GENERATE_SERIES"), "Expected GENERATE_SERIES: {}", result[0]);
34080 assert!(result[0].contains("IS NOT DISTINCT FROM"), "Expected IS NOT DISTINCT FROM: {}", result[0]);
34081 })
34082 .unwrap();
34083 handle.join().unwrap();
34084 }
34085
34086 #[test]
34087 fn test_array_except_generic_to_snowflake() {
34088 let dialect = Dialect::get(DialectType::Generic);
34089 let result = dialect
34090 .transpile_to("SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))", DialectType::Snowflake)
34091 .unwrap();
34092 eprintln!("ARRAY_EXCEPT Generic->Snowflake: {}", result[0]);
34093 assert_eq!(result[0], "SELECT ARRAY_EXCEPT([1, 2, 3], [2])");
34094 }
34095
34096 #[test]
34097 fn test_array_except_generic_to_presto() {
34098 let dialect = Dialect::get(DialectType::Generic);
34099 let result = dialect
34100 .transpile_to("SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))", DialectType::Presto)
34101 .unwrap();
34102 eprintln!("ARRAY_EXCEPT Generic->Presto: {}", result[0]);
34103 assert_eq!(result[0], "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])");
34104 }
34105
34106 #[test]
34107 fn test_array_except_snowflake_to_duckdb() {
34108 let handle = std::thread::Builder::new()
34109 .stack_size(16 * 1024 * 1024)
34110 .spawn(|| {
34111 let dialect = Dialect::get(DialectType::Snowflake);
34112 let result = dialect
34113 .transpile_to("SELECT ARRAY_EXCEPT([1, 2, 3], [2])", DialectType::DuckDB)
34114 .unwrap();
34115 eprintln!("ARRAY_EXCEPT Snowflake->DuckDB: {}", result[0]);
34116 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN: {}", result[0]);
34117 assert!(result[0].contains("LIST_TRANSFORM"), "Expected LIST_TRANSFORM: {}", result[0]);
34118 })
34119 .unwrap();
34120 handle.join().unwrap();
34121 }
34122
34123 #[test]
34124 fn test_array_contains_snowflake_to_snowflake() {
34125 let dialect = Dialect::get(DialectType::Snowflake);
34126 let result = dialect
34127 .transpile_to("SELECT ARRAY_CONTAINS(x, [1, NULL, 3])", DialectType::Snowflake)
34128 .unwrap();
34129 eprintln!("ARRAY_CONTAINS Snowflake->Snowflake: {}", result[0]);
34130 assert_eq!(result[0], "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])");
34131 }
34132
34133 #[test]
34134 fn test_array_contains_snowflake_to_duckdb() {
34135 let dialect = Dialect::get(DialectType::Snowflake);
34136 let result = dialect
34137 .transpile_to("SELECT ARRAY_CONTAINS(x, [1, NULL, 3])", DialectType::DuckDB)
34138 .unwrap();
34139 eprintln!("ARRAY_CONTAINS Snowflake->DuckDB: {}", result[0]);
34140 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN: {}", result[0]);
34141 assert!(result[0].contains("NULLIF"), "Expected NULLIF: {}", result[0]);
34142 assert!(result[0].contains("ARRAY_CONTAINS"), "Expected ARRAY_CONTAINS: {}", result[0]);
34143 }
34144
34145 #[test]
34146 fn test_array_distinct_snowflake_to_duckdb() {
34147 let dialect = Dialect::get(DialectType::Snowflake);
34148 let result = dialect
34149 .transpile_to("SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])", DialectType::DuckDB)
34150 .unwrap();
34151 eprintln!("ARRAY_DISTINCT Snowflake->DuckDB: {}", result[0]);
34152 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN: {}", result[0]);
34153 assert!(result[0].contains("LIST_DISTINCT"), "Expected LIST_DISTINCT: {}", result[0]);
34154 assert!(result[0].contains("LIST_APPEND"), "Expected LIST_APPEND: {}", result[0]);
34155 assert!(result[0].contains("LIST_FILTER"), "Expected LIST_FILTER: {}", result[0]);
34156 }
34157}