polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile_to`](Dialect::transpile_to) another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, FunctionBody};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Token, Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_ascii_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 0,
341 0,
342 )),
343 }
344 }
345}
346
347/// Trait that each concrete SQL dialect must implement.
348///
349/// `DialectImpl` provides the configuration hooks and per-expression transform logic
350/// that distinguish one dialect from another. Implementors supply:
351///
352/// - A [`DialectType`] identifier.
353/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
354/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
355/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
356/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
357/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
358///
359/// The default implementations are no-ops, so a minimal dialect only needs to provide
360/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
361/// standard SQL.
362pub trait DialectImpl {
363 /// Returns the [`DialectType`] that identifies this dialect.
364 fn dialect_type(&self) -> DialectType;
365
366 /// Returns the tokenizer configuration for this dialect.
367 ///
368 /// Override to customize identifier quoting characters, string escape rules,
369 /// comment styles, and other lexing behavior.
370 fn tokenizer_config(&self) -> TokenizerConfig {
371 TokenizerConfig::default()
372 }
373
374 /// Returns the generator configuration for this dialect.
375 ///
376 /// Override to customize identifier quoting style, function name casing,
377 /// keyword casing, and other SQL generation behavior.
378 fn generator_config(&self) -> GeneratorConfig {
379 GeneratorConfig::default()
380 }
381
382 /// Returns a generator configuration tailored to a specific expression.
383 ///
384 /// Override this for hybrid dialects like Athena that route to different SQL engines
385 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
386 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
387 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
388 self.generator_config()
389 }
390
391 /// Transforms a single expression node for this dialect, without recursing into children.
392 ///
393 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
394 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
395 /// typically include function renaming, operator substitution, and type mapping.
396 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
397 Ok(expr)
398 }
399
400 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
401 ///
402 /// Override this to apply structural rewrites that must see the entire tree at once,
403 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
404 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
405 fn preprocess(&self, expr: Expression) -> Result<Expression> {
406 Ok(expr)
407 }
408}
409
410/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
411/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
412///
413/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
414/// and then nested element/field types are recursed into. This ensures that dialect-level
415/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
416fn transform_data_type_recursive<F>(
417 dt: crate::expressions::DataType,
418 transform_fn: &F,
419) -> Result<crate::expressions::DataType>
420where
421 F: Fn(Expression) -> Result<Expression>,
422{
423 use crate::expressions::DataType;
424 // First, transform the outermost type through the expression system
425 let dt_expr = transform_fn(Expression::DataType(dt))?;
426 let dt = match dt_expr {
427 Expression::DataType(d) => d,
428 _ => {
429 return Ok(match dt_expr {
430 _ => DataType::Custom {
431 name: "UNKNOWN".to_string(),
432 },
433 })
434 }
435 };
436 // Then recurse into nested types
437 match dt {
438 DataType::Array {
439 element_type,
440 dimension,
441 } => {
442 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
443 Ok(DataType::Array {
444 element_type: Box::new(inner),
445 dimension,
446 })
447 }
448 DataType::List { element_type } => {
449 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
450 Ok(DataType::List {
451 element_type: Box::new(inner),
452 })
453 }
454 DataType::Struct { fields, nested } => {
455 let mut new_fields = Vec::new();
456 for mut field in fields {
457 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
458 new_fields.push(field);
459 }
460 Ok(DataType::Struct {
461 fields: new_fields,
462 nested,
463 })
464 }
465 DataType::Map {
466 key_type,
467 value_type,
468 } => {
469 let k = transform_data_type_recursive(*key_type, transform_fn)?;
470 let v = transform_data_type_recursive(*value_type, transform_fn)?;
471 Ok(DataType::Map {
472 key_type: Box::new(k),
473 value_type: Box::new(v),
474 })
475 }
476 other => Ok(other),
477 }
478}
479
480/// Convert DuckDB C-style format strings to Presto C-style format strings.
481/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
482#[cfg(feature = "transpile")]
483fn duckdb_to_presto_format(fmt: &str) -> String {
484 // Order matters: handle longer patterns first to avoid partial replacements
485 let mut result = fmt.to_string();
486 // First pass: mark multi-char patterns with placeholders
487 result = result.replace("%-m", "\x01NOPADM\x01");
488 result = result.replace("%-d", "\x01NOPADD\x01");
489 result = result.replace("%-I", "\x01NOPADI\x01");
490 result = result.replace("%-H", "\x01NOPADH\x01");
491 result = result.replace("%H:%M:%S", "\x01HMS\x01");
492 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
493 // Now convert individual specifiers
494 result = result.replace("%M", "%i");
495 result = result.replace("%S", "%s");
496 // Restore multi-char patterns with Presto equivalents
497 result = result.replace("\x01NOPADM\x01", "%c");
498 result = result.replace("\x01NOPADD\x01", "%e");
499 result = result.replace("\x01NOPADI\x01", "%l");
500 result = result.replace("\x01NOPADH\x01", "%k");
501 result = result.replace("\x01HMS\x01", "%T");
502 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
503 result
504}
505
506/// Convert DuckDB C-style format strings to BigQuery format strings.
507/// BigQuery uses a mix of strftime-like directives.
508#[cfg(feature = "transpile")]
509fn duckdb_to_bigquery_format(fmt: &str) -> String {
510 let mut result = fmt.to_string();
511 // Handle longer patterns first
512 result = result.replace("%-d", "%e");
513 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
514 result = result.replace("%Y-%m-%d", "%F");
515 result = result.replace("%H:%M:%S", "%T");
516 result
517}
518
519/// Applies a transform function bottom-up through an entire expression tree.
520///
521/// This is the core tree-rewriting engine used by the dialect system. It performs
522/// a post-order (children-first) traversal: for each node, all children are recursively
523/// transformed before the node itself is passed to `transform_fn`. This bottom-up
524/// strategy means that when `transform_fn` sees a node, its children have already
525/// been rewritten, which simplifies pattern matching on sub-expressions.
526///
527/// The function handles all expression variants including SELECT clauses (FROM, WHERE,
528/// GROUP BY, HAVING, ORDER BY, QUALIFY, WITH/CTEs, WINDOW), binary operators,
529/// function calls, CASE expressions, date/time functions, and more.
530///
531/// # Arguments
532///
533/// * `expr` - The root expression to transform (consumed).
534/// * `transform_fn` - A closure that receives each expression node (after its children
535/// have been transformed) and returns a possibly-rewritten expression.
536///
537/// # Errors
538///
539/// Returns an error if `transform_fn` returns an error for any node.
540pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
541where
542 F: Fn(Expression) -> Result<Expression>,
543{
544 use crate::expressions::BinaryOp;
545
546 // Helper macro to transform binary ops with Box<BinaryOp>
547 macro_rules! transform_binary {
548 ($variant:ident, $op:expr) => {{
549 let left = transform_recursive($op.left, transform_fn)?;
550 let right = transform_recursive($op.right, transform_fn)?;
551 Expression::$variant(Box::new(BinaryOp {
552 left,
553 right,
554 left_comments: $op.left_comments,
555 operator_comments: $op.operator_comments,
556 trailing_comments: $op.trailing_comments,
557 inferred_type: $op.inferred_type,
558 }))
559 }};
560 }
561
562 // Fast path: leaf nodes never need child traversal, apply transform directly
563 if matches!(
564 &expr,
565 Expression::Literal(_)
566 | Expression::Boolean(_)
567 | Expression::Null(_)
568 | Expression::Identifier(_)
569 | Expression::Star(_)
570 | Expression::Parameter(_)
571 | Expression::Placeholder(_)
572 | Expression::SessionParameter(_)
573 ) {
574 return transform_fn(expr);
575 }
576
577 // First recursively transform children, then apply the transform function
578 let expr = match expr {
579 Expression::Select(mut select) => {
580 select.expressions = select
581 .expressions
582 .into_iter()
583 .map(|e| transform_recursive(e, transform_fn))
584 .collect::<Result<Vec<_>>>()?;
585
586 // Transform FROM clause
587 if let Some(mut from) = select.from.take() {
588 from.expressions = from
589 .expressions
590 .into_iter()
591 .map(|e| transform_recursive(e, transform_fn))
592 .collect::<Result<Vec<_>>>()?;
593 select.from = Some(from);
594 }
595
596 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
597 select.joins = select
598 .joins
599 .into_iter()
600 .map(|mut join| {
601 join.this = transform_recursive(join.this, transform_fn)?;
602 if let Some(on) = join.on.take() {
603 join.on = Some(transform_recursive(on, transform_fn)?);
604 }
605 // Wrap join in Expression::Join to allow transform_fn to transform it
606 match transform_fn(Expression::Join(Box::new(join)))? {
607 Expression::Join(j) => Ok(*j),
608 _ => Err(crate::error::Error::parse(
609 "Join transformation returned non-join expression",
610 0,
611 0,
612 0,
613 0,
614 )),
615 }
616 })
617 .collect::<Result<Vec<_>>>()?;
618
619 // Transform LATERAL VIEW expressions (Hive/Spark)
620 select.lateral_views = select
621 .lateral_views
622 .into_iter()
623 .map(|mut lv| {
624 lv.this = transform_recursive(lv.this, transform_fn)?;
625 Ok(lv)
626 })
627 .collect::<Result<Vec<_>>>()?;
628
629 // Transform WHERE clause
630 if let Some(mut where_clause) = select.where_clause.take() {
631 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
632 select.where_clause = Some(where_clause);
633 }
634
635 // Transform GROUP BY
636 if let Some(mut group_by) = select.group_by.take() {
637 group_by.expressions = group_by
638 .expressions
639 .into_iter()
640 .map(|e| transform_recursive(e, transform_fn))
641 .collect::<Result<Vec<_>>>()?;
642 select.group_by = Some(group_by);
643 }
644
645 // Transform HAVING
646 if let Some(mut having) = select.having.take() {
647 having.this = transform_recursive(having.this, transform_fn)?;
648 select.having = Some(having);
649 }
650
651 // Transform WITH (CTEs)
652 if let Some(mut with) = select.with.take() {
653 with.ctes = with
654 .ctes
655 .into_iter()
656 .map(|mut cte| {
657 let original = cte.this.clone();
658 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
659 cte
660 })
661 .collect();
662 select.with = Some(with);
663 }
664
665 // Transform ORDER BY
666 if let Some(mut order) = select.order_by.take() {
667 order.expressions = order
668 .expressions
669 .into_iter()
670 .map(|o| {
671 let mut o = o;
672 let original = o.this.clone();
673 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
674 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
675 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
676 Ok(Expression::Ordered(transformed)) => *transformed,
677 Ok(_) | Err(_) => o,
678 }
679 })
680 .collect();
681 select.order_by = Some(order);
682 }
683
684 // Transform WINDOW clause order_by
685 if let Some(ref mut windows) = select.windows {
686 for nw in windows.iter_mut() {
687 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
688 .into_iter()
689 .map(|o| {
690 let mut o = o;
691 let original = o.this.clone();
692 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
693 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
694 Ok(Expression::Ordered(transformed)) => *transformed,
695 Ok(_) | Err(_) => o,
696 }
697 })
698 .collect();
699 }
700 }
701
702 // Transform QUALIFY
703 if let Some(mut qual) = select.qualify.take() {
704 qual.this = transform_recursive(qual.this, transform_fn)?;
705 select.qualify = Some(qual);
706 }
707
708 Expression::Select(select)
709 }
710 Expression::Function(mut f) => {
711 f.args = f
712 .args
713 .into_iter()
714 .map(|e| transform_recursive(e, transform_fn))
715 .collect::<Result<Vec<_>>>()?;
716 Expression::Function(f)
717 }
718 Expression::AggregateFunction(mut f) => {
719 f.args = f
720 .args
721 .into_iter()
722 .map(|e| transform_recursive(e, transform_fn))
723 .collect::<Result<Vec<_>>>()?;
724 if let Some(filter) = f.filter {
725 f.filter = Some(transform_recursive(filter, transform_fn)?);
726 }
727 Expression::AggregateFunction(f)
728 }
729 Expression::WindowFunction(mut wf) => {
730 wf.this = transform_recursive(wf.this, transform_fn)?;
731 wf.over.partition_by = wf
732 .over
733 .partition_by
734 .into_iter()
735 .map(|e| transform_recursive(e, transform_fn))
736 .collect::<Result<Vec<_>>>()?;
737 // Transform order_by items through Expression::Ordered wrapper
738 wf.over.order_by = wf
739 .over
740 .order_by
741 .into_iter()
742 .map(|o| {
743 let mut o = o;
744 o.this = transform_recursive(o.this, transform_fn)?;
745 match transform_fn(Expression::Ordered(Box::new(o)))? {
746 Expression::Ordered(transformed) => Ok(*transformed),
747 _ => Err(crate::error::Error::parse(
748 "Ordered transformation returned non-Ordered expression",
749 0,
750 0,
751 0,
752 0,
753 )),
754 }
755 })
756 .collect::<Result<Vec<_>>>()?;
757 Expression::WindowFunction(wf)
758 }
759 Expression::Alias(mut a) => {
760 a.this = transform_recursive(a.this, transform_fn)?;
761 Expression::Alias(a)
762 }
763 Expression::Cast(mut c) => {
764 c.this = transform_recursive(c.this, transform_fn)?;
765 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
766 c.to = transform_data_type_recursive(c.to, transform_fn)?;
767 Expression::Cast(c)
768 }
769 Expression::And(op) => transform_binary!(And, *op),
770 Expression::Or(op) => transform_binary!(Or, *op),
771 Expression::Add(op) => transform_binary!(Add, *op),
772 Expression::Sub(op) => transform_binary!(Sub, *op),
773 Expression::Mul(op) => transform_binary!(Mul, *op),
774 Expression::Div(op) => transform_binary!(Div, *op),
775 Expression::Eq(op) => transform_binary!(Eq, *op),
776 Expression::Lt(op) => transform_binary!(Lt, *op),
777 Expression::Gt(op) => transform_binary!(Gt, *op),
778 Expression::Paren(mut p) => {
779 p.this = transform_recursive(p.this, transform_fn)?;
780 Expression::Paren(p)
781 }
782 Expression::Coalesce(mut f) => {
783 f.expressions = f
784 .expressions
785 .into_iter()
786 .map(|e| transform_recursive(e, transform_fn))
787 .collect::<Result<Vec<_>>>()?;
788 Expression::Coalesce(f)
789 }
790 Expression::IfNull(mut f) => {
791 f.this = transform_recursive(f.this, transform_fn)?;
792 f.expression = transform_recursive(f.expression, transform_fn)?;
793 Expression::IfNull(f)
794 }
795 Expression::Nvl(mut f) => {
796 f.this = transform_recursive(f.this, transform_fn)?;
797 f.expression = transform_recursive(f.expression, transform_fn)?;
798 Expression::Nvl(f)
799 }
800 Expression::In(mut i) => {
801 i.this = transform_recursive(i.this, transform_fn)?;
802 i.expressions = i
803 .expressions
804 .into_iter()
805 .map(|e| transform_recursive(e, transform_fn))
806 .collect::<Result<Vec<_>>>()?;
807 if let Some(query) = i.query {
808 i.query = Some(transform_recursive(query, transform_fn)?);
809 }
810 Expression::In(i)
811 }
812 Expression::Not(mut n) => {
813 n.this = transform_recursive(n.this, transform_fn)?;
814 Expression::Not(n)
815 }
816 Expression::ArraySlice(mut s) => {
817 s.this = transform_recursive(s.this, transform_fn)?;
818 if let Some(start) = s.start {
819 s.start = Some(transform_recursive(start, transform_fn)?);
820 }
821 if let Some(end) = s.end {
822 s.end = Some(transform_recursive(end, transform_fn)?);
823 }
824 Expression::ArraySlice(s)
825 }
826 Expression::Subscript(mut s) => {
827 s.this = transform_recursive(s.this, transform_fn)?;
828 s.index = transform_recursive(s.index, transform_fn)?;
829 Expression::Subscript(s)
830 }
831 Expression::Array(mut a) => {
832 a.expressions = a
833 .expressions
834 .into_iter()
835 .map(|e| transform_recursive(e, transform_fn))
836 .collect::<Result<Vec<_>>>()?;
837 Expression::Array(a)
838 }
839 Expression::Struct(mut s) => {
840 let mut new_fields = Vec::new();
841 for (name, expr) in s.fields {
842 let transformed = transform_recursive(expr, transform_fn)?;
843 new_fields.push((name, transformed));
844 }
845 s.fields = new_fields;
846 Expression::Struct(s)
847 }
848 Expression::NamedArgument(mut na) => {
849 na.value = transform_recursive(na.value, transform_fn)?;
850 Expression::NamedArgument(na)
851 }
852 Expression::MapFunc(mut m) => {
853 m.keys = m
854 .keys
855 .into_iter()
856 .map(|e| transform_recursive(e, transform_fn))
857 .collect::<Result<Vec<_>>>()?;
858 m.values = m
859 .values
860 .into_iter()
861 .map(|e| transform_recursive(e, transform_fn))
862 .collect::<Result<Vec<_>>>()?;
863 Expression::MapFunc(m)
864 }
865 Expression::ArrayFunc(mut a) => {
866 a.expressions = a
867 .expressions
868 .into_iter()
869 .map(|e| transform_recursive(e, transform_fn))
870 .collect::<Result<Vec<_>>>()?;
871 Expression::ArrayFunc(a)
872 }
873 Expression::Lambda(mut l) => {
874 l.body = transform_recursive(l.body, transform_fn)?;
875 Expression::Lambda(l)
876 }
877 Expression::JsonExtract(mut f) => {
878 f.this = transform_recursive(f.this, transform_fn)?;
879 f.path = transform_recursive(f.path, transform_fn)?;
880 Expression::JsonExtract(f)
881 }
882 Expression::JsonExtractScalar(mut f) => {
883 f.this = transform_recursive(f.this, transform_fn)?;
884 f.path = transform_recursive(f.path, transform_fn)?;
885 Expression::JsonExtractScalar(f)
886 }
887
888 // ===== UnaryFunc-based expressions =====
889 // These all have a single `this: Expression` child
890 Expression::Length(mut f) => {
891 f.this = transform_recursive(f.this, transform_fn)?;
892 Expression::Length(f)
893 }
894 Expression::Upper(mut f) => {
895 f.this = transform_recursive(f.this, transform_fn)?;
896 Expression::Upper(f)
897 }
898 Expression::Lower(mut f) => {
899 f.this = transform_recursive(f.this, transform_fn)?;
900 Expression::Lower(f)
901 }
902 Expression::LTrim(mut f) => {
903 f.this = transform_recursive(f.this, transform_fn)?;
904 Expression::LTrim(f)
905 }
906 Expression::RTrim(mut f) => {
907 f.this = transform_recursive(f.this, transform_fn)?;
908 Expression::RTrim(f)
909 }
910 Expression::Reverse(mut f) => {
911 f.this = transform_recursive(f.this, transform_fn)?;
912 Expression::Reverse(f)
913 }
914 Expression::Abs(mut f) => {
915 f.this = transform_recursive(f.this, transform_fn)?;
916 Expression::Abs(f)
917 }
918 Expression::Ceil(mut f) => {
919 f.this = transform_recursive(f.this, transform_fn)?;
920 Expression::Ceil(f)
921 }
922 Expression::Floor(mut f) => {
923 f.this = transform_recursive(f.this, transform_fn)?;
924 Expression::Floor(f)
925 }
926 Expression::Sign(mut f) => {
927 f.this = transform_recursive(f.this, transform_fn)?;
928 Expression::Sign(f)
929 }
930 Expression::Sqrt(mut f) => {
931 f.this = transform_recursive(f.this, transform_fn)?;
932 Expression::Sqrt(f)
933 }
934 Expression::Cbrt(mut f) => {
935 f.this = transform_recursive(f.this, transform_fn)?;
936 Expression::Cbrt(f)
937 }
938 Expression::Ln(mut f) => {
939 f.this = transform_recursive(f.this, transform_fn)?;
940 Expression::Ln(f)
941 }
942 Expression::Log(mut f) => {
943 f.this = transform_recursive(f.this, transform_fn)?;
944 if let Some(base) = f.base {
945 f.base = Some(transform_recursive(base, transform_fn)?);
946 }
947 Expression::Log(f)
948 }
949 Expression::Exp(mut f) => {
950 f.this = transform_recursive(f.this, transform_fn)?;
951 Expression::Exp(f)
952 }
953 Expression::Date(mut f) => {
954 f.this = transform_recursive(f.this, transform_fn)?;
955 Expression::Date(f)
956 }
957 Expression::Stddev(mut f) => {
958 f.this = transform_recursive(f.this, transform_fn)?;
959 Expression::Stddev(f)
960 }
961 Expression::Variance(mut f) => {
962 f.this = transform_recursive(f.this, transform_fn)?;
963 Expression::Variance(f)
964 }
965
966 // ===== BinaryFunc-based expressions =====
967 Expression::ModFunc(mut f) => {
968 f.this = transform_recursive(f.this, transform_fn)?;
969 f.expression = transform_recursive(f.expression, transform_fn)?;
970 Expression::ModFunc(f)
971 }
972 Expression::Power(mut f) => {
973 f.this = transform_recursive(f.this, transform_fn)?;
974 f.expression = transform_recursive(f.expression, transform_fn)?;
975 Expression::Power(f)
976 }
977 Expression::MapFromArrays(mut f) => {
978 f.this = transform_recursive(f.this, transform_fn)?;
979 f.expression = transform_recursive(f.expression, transform_fn)?;
980 Expression::MapFromArrays(f)
981 }
982 Expression::ElementAt(mut f) => {
983 f.this = transform_recursive(f.this, transform_fn)?;
984 f.expression = transform_recursive(f.expression, transform_fn)?;
985 Expression::ElementAt(f)
986 }
987 Expression::MapContainsKey(mut f) => {
988 f.this = transform_recursive(f.this, transform_fn)?;
989 f.expression = transform_recursive(f.expression, transform_fn)?;
990 Expression::MapContainsKey(f)
991 }
992 Expression::Left(mut f) => {
993 f.this = transform_recursive(f.this, transform_fn)?;
994 f.length = transform_recursive(f.length, transform_fn)?;
995 Expression::Left(f)
996 }
997 Expression::Right(mut f) => {
998 f.this = transform_recursive(f.this, transform_fn)?;
999 f.length = transform_recursive(f.length, transform_fn)?;
1000 Expression::Right(f)
1001 }
1002 Expression::Repeat(mut f) => {
1003 f.this = transform_recursive(f.this, transform_fn)?;
1004 f.times = transform_recursive(f.times, transform_fn)?;
1005 Expression::Repeat(f)
1006 }
1007
1008 // ===== Complex function expressions =====
1009 Expression::Substring(mut f) => {
1010 f.this = transform_recursive(f.this, transform_fn)?;
1011 f.start = transform_recursive(f.start, transform_fn)?;
1012 if let Some(len) = f.length {
1013 f.length = Some(transform_recursive(len, transform_fn)?);
1014 }
1015 Expression::Substring(f)
1016 }
1017 Expression::Replace(mut f) => {
1018 f.this = transform_recursive(f.this, transform_fn)?;
1019 f.old = transform_recursive(f.old, transform_fn)?;
1020 f.new = transform_recursive(f.new, transform_fn)?;
1021 Expression::Replace(f)
1022 }
1023 Expression::ConcatWs(mut f) => {
1024 f.separator = transform_recursive(f.separator, transform_fn)?;
1025 f.expressions = f
1026 .expressions
1027 .into_iter()
1028 .map(|e| transform_recursive(e, transform_fn))
1029 .collect::<Result<Vec<_>>>()?;
1030 Expression::ConcatWs(f)
1031 }
1032 Expression::Trim(mut f) => {
1033 f.this = transform_recursive(f.this, transform_fn)?;
1034 if let Some(chars) = f.characters {
1035 f.characters = Some(transform_recursive(chars, transform_fn)?);
1036 }
1037 Expression::Trim(f)
1038 }
1039 Expression::Split(mut f) => {
1040 f.this = transform_recursive(f.this, transform_fn)?;
1041 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
1042 Expression::Split(f)
1043 }
1044 Expression::Lpad(mut f) => {
1045 f.this = transform_recursive(f.this, transform_fn)?;
1046 f.length = transform_recursive(f.length, transform_fn)?;
1047 if let Some(fill) = f.fill {
1048 f.fill = Some(transform_recursive(fill, transform_fn)?);
1049 }
1050 Expression::Lpad(f)
1051 }
1052 Expression::Rpad(mut f) => {
1053 f.this = transform_recursive(f.this, transform_fn)?;
1054 f.length = transform_recursive(f.length, transform_fn)?;
1055 if let Some(fill) = f.fill {
1056 f.fill = Some(transform_recursive(fill, transform_fn)?);
1057 }
1058 Expression::Rpad(f)
1059 }
1060
1061 // ===== Conditional expressions =====
1062 Expression::Case(mut c) => {
1063 if let Some(operand) = c.operand {
1064 c.operand = Some(transform_recursive(operand, transform_fn)?);
1065 }
1066 c.whens = c
1067 .whens
1068 .into_iter()
1069 .map(|(cond, then)| {
1070 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
1071 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
1072 (new_cond, new_then)
1073 })
1074 .collect();
1075 if let Some(else_expr) = c.else_ {
1076 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
1077 }
1078 Expression::Case(c)
1079 }
1080 Expression::IfFunc(mut f) => {
1081 f.condition = transform_recursive(f.condition, transform_fn)?;
1082 f.true_value = transform_recursive(f.true_value, transform_fn)?;
1083 if let Some(false_val) = f.false_value {
1084 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
1085 }
1086 Expression::IfFunc(f)
1087 }
1088
1089 // ===== Date/Time expressions =====
1090 Expression::DateAdd(mut f) => {
1091 f.this = transform_recursive(f.this, transform_fn)?;
1092 f.interval = transform_recursive(f.interval, transform_fn)?;
1093 Expression::DateAdd(f)
1094 }
1095 Expression::DateSub(mut f) => {
1096 f.this = transform_recursive(f.this, transform_fn)?;
1097 f.interval = transform_recursive(f.interval, transform_fn)?;
1098 Expression::DateSub(f)
1099 }
1100 Expression::DateDiff(mut f) => {
1101 f.this = transform_recursive(f.this, transform_fn)?;
1102 f.expression = transform_recursive(f.expression, transform_fn)?;
1103 Expression::DateDiff(f)
1104 }
1105 Expression::DateTrunc(mut f) => {
1106 f.this = transform_recursive(f.this, transform_fn)?;
1107 Expression::DateTrunc(f)
1108 }
1109 Expression::Extract(mut f) => {
1110 f.this = transform_recursive(f.this, transform_fn)?;
1111 Expression::Extract(f)
1112 }
1113
1114 // ===== JSON expressions =====
1115 Expression::JsonObject(mut f) => {
1116 f.pairs = f
1117 .pairs
1118 .into_iter()
1119 .map(|(k, v)| {
1120 let new_k = transform_recursive(k, transform_fn)?;
1121 let new_v = transform_recursive(v, transform_fn)?;
1122 Ok((new_k, new_v))
1123 })
1124 .collect::<Result<Vec<_>>>()?;
1125 Expression::JsonObject(f)
1126 }
1127
1128 // ===== Subquery expressions =====
1129 Expression::Subquery(mut s) => {
1130 s.this = transform_recursive(s.this, transform_fn)?;
1131 Expression::Subquery(s)
1132 }
1133 Expression::Exists(mut e) => {
1134 e.this = transform_recursive(e.this, transform_fn)?;
1135 Expression::Exists(e)
1136 }
1137
1138 // ===== Set operations =====
1139 Expression::Union(mut u) => {
1140 u.left = transform_recursive(u.left, transform_fn)?;
1141 u.right = transform_recursive(u.right, transform_fn)?;
1142 Expression::Union(u)
1143 }
1144 Expression::Intersect(mut i) => {
1145 i.left = transform_recursive(i.left, transform_fn)?;
1146 i.right = transform_recursive(i.right, transform_fn)?;
1147 Expression::Intersect(i)
1148 }
1149 Expression::Except(mut e) => {
1150 e.left = transform_recursive(e.left, transform_fn)?;
1151 e.right = transform_recursive(e.right, transform_fn)?;
1152 Expression::Except(e)
1153 }
1154
1155 // ===== DML expressions =====
1156 Expression::Insert(mut ins) => {
1157 // Transform VALUES clause expressions
1158 let mut new_values = Vec::new();
1159 for row in ins.values {
1160 let mut new_row = Vec::new();
1161 for e in row {
1162 new_row.push(transform_recursive(e, transform_fn)?);
1163 }
1164 new_values.push(new_row);
1165 }
1166 ins.values = new_values;
1167
1168 // Transform query (for INSERT ... SELECT)
1169 if let Some(query) = ins.query {
1170 ins.query = Some(transform_recursive(query, transform_fn)?);
1171 }
1172
1173 // Transform RETURNING clause
1174 let mut new_returning = Vec::new();
1175 for e in ins.returning {
1176 new_returning.push(transform_recursive(e, transform_fn)?);
1177 }
1178 ins.returning = new_returning;
1179
1180 // Transform ON CONFLICT clause
1181 if let Some(on_conflict) = ins.on_conflict {
1182 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
1183 }
1184
1185 Expression::Insert(ins)
1186 }
1187 Expression::Update(mut upd) => {
1188 upd.set = upd
1189 .set
1190 .into_iter()
1191 .map(|(id, val)| {
1192 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
1193 (id, new_val)
1194 })
1195 .collect();
1196 if let Some(mut where_clause) = upd.where_clause.take() {
1197 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1198 upd.where_clause = Some(where_clause);
1199 }
1200 Expression::Update(upd)
1201 }
1202 Expression::Delete(mut del) => {
1203 if let Some(mut where_clause) = del.where_clause.take() {
1204 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1205 del.where_clause = Some(where_clause);
1206 }
1207 Expression::Delete(del)
1208 }
1209
1210 // ===== CTE expressions =====
1211 Expression::With(mut w) => {
1212 w.ctes = w
1213 .ctes
1214 .into_iter()
1215 .map(|mut cte| {
1216 let original = cte.this.clone();
1217 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1218 cte
1219 })
1220 .collect();
1221 Expression::With(w)
1222 }
1223 Expression::Cte(mut c) => {
1224 c.this = transform_recursive(c.this, transform_fn)?;
1225 Expression::Cte(c)
1226 }
1227
1228 // ===== Order expressions =====
1229 Expression::Ordered(mut o) => {
1230 o.this = transform_recursive(o.this, transform_fn)?;
1231 Expression::Ordered(o)
1232 }
1233
1234 // ===== Negation =====
1235 Expression::Neg(mut n) => {
1236 n.this = transform_recursive(n.this, transform_fn)?;
1237 Expression::Neg(n)
1238 }
1239
1240 // ===== Between =====
1241 Expression::Between(mut b) => {
1242 b.this = transform_recursive(b.this, transform_fn)?;
1243 b.low = transform_recursive(b.low, transform_fn)?;
1244 b.high = transform_recursive(b.high, transform_fn)?;
1245 Expression::Between(b)
1246 }
1247 Expression::IsNull(mut i) => {
1248 i.this = transform_recursive(i.this, transform_fn)?;
1249 Expression::IsNull(i)
1250 }
1251 Expression::IsTrue(mut i) => {
1252 i.this = transform_recursive(i.this, transform_fn)?;
1253 Expression::IsTrue(i)
1254 }
1255 Expression::IsFalse(mut i) => {
1256 i.this = transform_recursive(i.this, transform_fn)?;
1257 Expression::IsFalse(i)
1258 }
1259
1260 // ===== Like expressions =====
1261 Expression::Like(mut l) => {
1262 l.left = transform_recursive(l.left, transform_fn)?;
1263 l.right = transform_recursive(l.right, transform_fn)?;
1264 Expression::Like(l)
1265 }
1266 Expression::ILike(mut l) => {
1267 l.left = transform_recursive(l.left, transform_fn)?;
1268 l.right = transform_recursive(l.right, transform_fn)?;
1269 Expression::ILike(l)
1270 }
1271
1272 // ===== Additional binary ops not covered by macro =====
1273 Expression::Neq(op) => transform_binary!(Neq, *op),
1274 Expression::Lte(op) => transform_binary!(Lte, *op),
1275 Expression::Gte(op) => transform_binary!(Gte, *op),
1276 Expression::Mod(op) => transform_binary!(Mod, *op),
1277 Expression::Concat(op) => transform_binary!(Concat, *op),
1278 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
1279 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
1280 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
1281 Expression::Is(op) => transform_binary!(Is, *op),
1282
1283 // ===== TryCast / SafeCast =====
1284 Expression::TryCast(mut c) => {
1285 c.this = transform_recursive(c.this, transform_fn)?;
1286 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1287 Expression::TryCast(c)
1288 }
1289 Expression::SafeCast(mut c) => {
1290 c.this = transform_recursive(c.this, transform_fn)?;
1291 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1292 Expression::SafeCast(c)
1293 }
1294
1295 // ===== Misc =====
1296 Expression::Unnest(mut f) => {
1297 f.this = transform_recursive(f.this, transform_fn)?;
1298 f.expressions = f
1299 .expressions
1300 .into_iter()
1301 .map(|e| transform_recursive(e, transform_fn))
1302 .collect::<Result<Vec<_>>>()?;
1303 Expression::Unnest(f)
1304 }
1305 Expression::Explode(mut f) => {
1306 f.this = transform_recursive(f.this, transform_fn)?;
1307 Expression::Explode(f)
1308 }
1309 Expression::GroupConcat(mut f) => {
1310 f.this = transform_recursive(f.this, transform_fn)?;
1311 Expression::GroupConcat(f)
1312 }
1313 Expression::StringAgg(mut f) => {
1314 f.this = transform_recursive(f.this, transform_fn)?;
1315 Expression::StringAgg(f)
1316 }
1317 Expression::ListAgg(mut f) => {
1318 f.this = transform_recursive(f.this, transform_fn)?;
1319 Expression::ListAgg(f)
1320 }
1321 Expression::ArrayAgg(mut f) => {
1322 f.this = transform_recursive(f.this, transform_fn)?;
1323 Expression::ArrayAgg(f)
1324 }
1325 Expression::ParseJson(mut f) => {
1326 f.this = transform_recursive(f.this, transform_fn)?;
1327 Expression::ParseJson(f)
1328 }
1329 Expression::ToJson(mut f) => {
1330 f.this = transform_recursive(f.this, transform_fn)?;
1331 Expression::ToJson(f)
1332 }
1333 Expression::JSONExtract(mut e) => {
1334 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1335 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1336 Expression::JSONExtract(e)
1337 }
1338 Expression::JSONExtractScalar(mut e) => {
1339 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1340 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
1341 Expression::JSONExtractScalar(e)
1342 }
1343
1344 // StrToTime: recurse into this
1345 Expression::StrToTime(mut e) => {
1346 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1347 Expression::StrToTime(e)
1348 }
1349
1350 // UnixToTime: recurse into this
1351 Expression::UnixToTime(mut e) => {
1352 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
1353 Expression::UnixToTime(e)
1354 }
1355
1356 // CreateTable: recurse into column defaults, on_update expressions, and data types
1357 Expression::CreateTable(mut ct) => {
1358 for col in &mut ct.columns {
1359 if let Some(default_expr) = col.default.take() {
1360 col.default = Some(transform_recursive(default_expr, transform_fn)?);
1361 }
1362 if let Some(on_update_expr) = col.on_update.take() {
1363 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
1364 }
1365 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
1366 // are NOT applied here because per-dialect transforms are designed for CAST/expression
1367 // contexts and may not produce correct results for DDL column definitions.
1368 // The DDL type mappings would need dedicated handling per source/target pair.
1369 }
1370 if let Some(as_select) = ct.as_select.take() {
1371 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
1372 }
1373 Expression::CreateTable(ct)
1374 }
1375
1376 // CreateProcedure: recurse into body expressions
1377 Expression::CreateProcedure(mut cp) => {
1378 if let Some(body) = cp.body.take() {
1379 cp.body = Some(match body {
1380 FunctionBody::Expression(expr) => {
1381 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1382 }
1383 FunctionBody::Return(expr) => {
1384 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1385 }
1386 FunctionBody::Statements(stmts) => {
1387 let transformed_stmts = stmts
1388 .into_iter()
1389 .map(|s| transform_recursive(s, transform_fn))
1390 .collect::<Result<Vec<_>>>()?;
1391 FunctionBody::Statements(transformed_stmts)
1392 }
1393 other => other,
1394 });
1395 }
1396 Expression::CreateProcedure(cp)
1397 }
1398
1399 // CreateFunction: recurse into body expressions
1400 Expression::CreateFunction(mut cf) => {
1401 if let Some(body) = cf.body.take() {
1402 cf.body = Some(match body {
1403 FunctionBody::Expression(expr) => {
1404 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
1405 }
1406 FunctionBody::Return(expr) => {
1407 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
1408 }
1409 FunctionBody::Statements(stmts) => {
1410 let transformed_stmts = stmts
1411 .into_iter()
1412 .map(|s| transform_recursive(s, transform_fn))
1413 .collect::<Result<Vec<_>>>()?;
1414 FunctionBody::Statements(transformed_stmts)
1415 }
1416 other => other,
1417 });
1418 }
1419 Expression::CreateFunction(cf)
1420 }
1421
1422 // MemberOf: recurse into left and right operands
1423 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
1424 // ArrayContainsAll (@>): recurse into left and right operands
1425 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
1426 // ArrayContainedBy (<@): recurse into left and right operands
1427 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
1428 // ArrayOverlaps (&&): recurse into left and right operands
1429 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
1430 // TsMatch (@@): recurse into left and right operands
1431 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
1432 // Adjacent (-|-): recurse into left and right operands
1433 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
1434
1435 // Table: recurse into when (HistoricalData) and changes fields
1436 Expression::Table(mut t) => {
1437 if let Some(when) = t.when.take() {
1438 let transformed =
1439 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
1440 if let Expression::HistoricalData(hd) = transformed {
1441 t.when = Some(hd);
1442 }
1443 }
1444 if let Some(changes) = t.changes.take() {
1445 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
1446 if let Expression::Changes(c) = transformed {
1447 t.changes = Some(c);
1448 }
1449 }
1450 Expression::Table(t)
1451 }
1452
1453 // HistoricalData (Snowflake time travel): recurse into expression
1454 Expression::HistoricalData(mut hd) => {
1455 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
1456 Expression::HistoricalData(hd)
1457 }
1458
1459 // Changes (Snowflake CHANGES clause): recurse into at_before and end
1460 Expression::Changes(mut c) => {
1461 if let Some(at_before) = c.at_before.take() {
1462 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
1463 }
1464 if let Some(end) = c.end.take() {
1465 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
1466 }
1467 Expression::Changes(c)
1468 }
1469
1470 // TableArgument: TABLE(expr) or MODEL(expr)
1471 Expression::TableArgument(mut ta) => {
1472 ta.this = transform_recursive(ta.this, transform_fn)?;
1473 Expression::TableArgument(ta)
1474 }
1475
1476 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
1477 Expression::JoinedTable(mut jt) => {
1478 jt.left = transform_recursive(jt.left, transform_fn)?;
1479 for join in &mut jt.joins {
1480 join.this = transform_recursive(
1481 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
1482 transform_fn,
1483 )?;
1484 if let Some(on) = join.on.take() {
1485 join.on = Some(transform_recursive(on, transform_fn)?);
1486 }
1487 }
1488 jt.lateral_views = jt
1489 .lateral_views
1490 .into_iter()
1491 .map(|mut lv| {
1492 lv.this = transform_recursive(lv.this, transform_fn)?;
1493 Ok(lv)
1494 })
1495 .collect::<Result<Vec<_>>>()?;
1496 Expression::JoinedTable(jt)
1497 }
1498
1499 // Lateral: LATERAL func() - recurse into the function expression
1500 Expression::Lateral(mut lat) => {
1501 *lat.this = transform_recursive(*lat.this, transform_fn)?;
1502 Expression::Lateral(lat)
1503 }
1504
1505 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
1506 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
1507 // as a unit together with the WithinGroup wrapper
1508 Expression::WithinGroup(mut wg) => {
1509 wg.order_by = wg
1510 .order_by
1511 .into_iter()
1512 .map(|mut o| {
1513 let original = o.this.clone();
1514 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1515 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1516 Ok(Expression::Ordered(transformed)) => *transformed,
1517 Ok(_) | Err(_) => o,
1518 }
1519 })
1520 .collect();
1521 Expression::WithinGroup(wg)
1522 }
1523
1524 // Filter: recurse into both the aggregate and the filter condition
1525 Expression::Filter(mut f) => {
1526 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
1527 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
1528 Expression::Filter(f)
1529 }
1530
1531 // BitwiseOrAgg/BitwiseAndAgg/BitwiseXorAgg: recurse into the aggregate argument
1532 Expression::BitwiseOrAgg(mut f) => {
1533 f.this = transform_recursive(f.this, transform_fn)?;
1534 Expression::BitwiseOrAgg(f)
1535 }
1536 Expression::BitwiseAndAgg(mut f) => {
1537 f.this = transform_recursive(f.this, transform_fn)?;
1538 Expression::BitwiseAndAgg(f)
1539 }
1540 Expression::BitwiseXorAgg(mut f) => {
1541 f.this = transform_recursive(f.this, transform_fn)?;
1542 Expression::BitwiseXorAgg(f)
1543 }
1544 Expression::PipeOperator(mut pipe) => {
1545 pipe.this = transform_recursive(pipe.this, transform_fn)?;
1546 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
1547 Expression::PipeOperator(pipe)
1548 }
1549
1550 // ArrayExcept/ArrayContains/ArrayDistinct: recurse into children
1551 Expression::ArrayExcept(mut f) => {
1552 f.this = transform_recursive(f.this, transform_fn)?;
1553 f.expression = transform_recursive(f.expression, transform_fn)?;
1554 Expression::ArrayExcept(f)
1555 }
1556 Expression::ArrayContains(mut f) => {
1557 f.this = transform_recursive(f.this, transform_fn)?;
1558 f.expression = transform_recursive(f.expression, transform_fn)?;
1559 Expression::ArrayContains(f)
1560 }
1561 Expression::ArrayDistinct(mut f) => {
1562 f.this = transform_recursive(f.this, transform_fn)?;
1563 Expression::ArrayDistinct(f)
1564 }
1565 Expression::ArrayPosition(mut f) => {
1566 f.this = transform_recursive(f.this, transform_fn)?;
1567 f.expression = transform_recursive(f.expression, transform_fn)?;
1568 Expression::ArrayPosition(f)
1569 }
1570
1571 // Pass through leaf nodes unchanged
1572 other => other,
1573 };
1574
1575 // Then apply the transform function
1576 transform_fn(expr)
1577}
1578
1579/// Returns the tokenizer config, generator config, and expression transform closure
1580/// for a built-in dialect type. This is the shared implementation used by both
1581/// `Dialect::get()` and custom dialect construction.
1582// ---------------------------------------------------------------------------
1583// Cached dialect configurations
1584// ---------------------------------------------------------------------------
1585
1586/// Pre-computed tokenizer + generator configs for a dialect, cached via `LazyLock`.
1587/// Transform closures are cheap (unit-struct method calls) and created fresh each time.
1588struct CachedDialectConfig {
1589 tokenizer_config: TokenizerConfig,
1590 generator_config: Arc<GeneratorConfig>,
1591}
1592
1593/// Declare a per-dialect `LazyLock<CachedDialectConfig>` static.
1594macro_rules! cached_dialect {
1595 ($static_name:ident, $dialect_struct:expr, $feature:literal) => {
1596 #[cfg(feature = $feature)]
1597 static $static_name: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
1598 let d = $dialect_struct;
1599 CachedDialectConfig {
1600 tokenizer_config: d.tokenizer_config(),
1601 generator_config: Arc::new(d.generator_config()),
1602 }
1603 });
1604 };
1605}
1606
1607static CACHED_GENERIC: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
1608 let d = GenericDialect;
1609 CachedDialectConfig {
1610 tokenizer_config: d.tokenizer_config(),
1611 generator_config: Arc::new(d.generator_config()),
1612 }
1613});
1614
1615cached_dialect!(CACHED_POSTGRESQL, PostgresDialect, "dialect-postgresql");
1616cached_dialect!(CACHED_MYSQL, MySQLDialect, "dialect-mysql");
1617cached_dialect!(CACHED_BIGQUERY, BigQueryDialect, "dialect-bigquery");
1618cached_dialect!(CACHED_SNOWFLAKE, SnowflakeDialect, "dialect-snowflake");
1619cached_dialect!(CACHED_DUCKDB, DuckDBDialect, "dialect-duckdb");
1620cached_dialect!(CACHED_TSQL, TSQLDialect, "dialect-tsql");
1621cached_dialect!(CACHED_ORACLE, OracleDialect, "dialect-oracle");
1622cached_dialect!(CACHED_HIVE, HiveDialect, "dialect-hive");
1623cached_dialect!(CACHED_SPARK, SparkDialect, "dialect-spark");
1624cached_dialect!(CACHED_SQLITE, SQLiteDialect, "dialect-sqlite");
1625cached_dialect!(CACHED_PRESTO, PrestoDialect, "dialect-presto");
1626cached_dialect!(CACHED_TRINO, TrinoDialect, "dialect-trino");
1627cached_dialect!(CACHED_REDSHIFT, RedshiftDialect, "dialect-redshift");
1628cached_dialect!(CACHED_CLICKHOUSE, ClickHouseDialect, "dialect-clickhouse");
1629cached_dialect!(CACHED_DATABRICKS, DatabricksDialect, "dialect-databricks");
1630cached_dialect!(CACHED_ATHENA, AthenaDialect, "dialect-athena");
1631cached_dialect!(CACHED_TERADATA, TeradataDialect, "dialect-teradata");
1632cached_dialect!(CACHED_DORIS, DorisDialect, "dialect-doris");
1633cached_dialect!(CACHED_STARROCKS, StarRocksDialect, "dialect-starrocks");
1634cached_dialect!(CACHED_MATERIALIZE, MaterializeDialect, "dialect-materialize");
1635cached_dialect!(CACHED_RISINGWAVE, RisingWaveDialect, "dialect-risingwave");
1636cached_dialect!(CACHED_SINGLESTORE, SingleStoreDialect, "dialect-singlestore");
1637cached_dialect!(CACHED_COCKROACHDB, CockroachDBDialect, "dialect-cockroachdb");
1638cached_dialect!(CACHED_TIDB, TiDBDialect, "dialect-tidb");
1639cached_dialect!(CACHED_DRUID, DruidDialect, "dialect-druid");
1640cached_dialect!(CACHED_SOLR, SolrDialect, "dialect-solr");
1641cached_dialect!(CACHED_TABLEAU, TableauDialect, "dialect-tableau");
1642cached_dialect!(CACHED_DUNE, DuneDialect, "dialect-dune");
1643cached_dialect!(CACHED_FABRIC, FabricDialect, "dialect-fabric");
1644cached_dialect!(CACHED_DRILL, DrillDialect, "dialect-drill");
1645cached_dialect!(CACHED_DREMIO, DremioDialect, "dialect-dremio");
1646cached_dialect!(CACHED_EXASOL, ExasolDialect, "dialect-exasol");
1647cached_dialect!(CACHED_DATAFUSION, DataFusionDialect, "dialect-datafusion");
1648
1649fn configs_for_dialect_type(
1650 dt: DialectType,
1651) -> (
1652 TokenizerConfig,
1653 Arc<GeneratorConfig>,
1654 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1655) {
1656 /// Clone configs from a cached static and pair with a fresh transform closure.
1657 macro_rules! from_cache {
1658 ($cache:expr, $dialect_struct:expr) => {{
1659 let c = &*$cache;
1660 (
1661 c.tokenizer_config.clone(),
1662 c.generator_config.clone(),
1663 Box::new(move |e| $dialect_struct.transform_expr(e)),
1664 )
1665 }};
1666 }
1667 match dt {
1668 #[cfg(feature = "dialect-postgresql")]
1669 DialectType::PostgreSQL => from_cache!(CACHED_POSTGRESQL, PostgresDialect),
1670 #[cfg(feature = "dialect-mysql")]
1671 DialectType::MySQL => from_cache!(CACHED_MYSQL, MySQLDialect),
1672 #[cfg(feature = "dialect-bigquery")]
1673 DialectType::BigQuery => from_cache!(CACHED_BIGQUERY, BigQueryDialect),
1674 #[cfg(feature = "dialect-snowflake")]
1675 DialectType::Snowflake => from_cache!(CACHED_SNOWFLAKE, SnowflakeDialect),
1676 #[cfg(feature = "dialect-duckdb")]
1677 DialectType::DuckDB => from_cache!(CACHED_DUCKDB, DuckDBDialect),
1678 #[cfg(feature = "dialect-tsql")]
1679 DialectType::TSQL => from_cache!(CACHED_TSQL, TSQLDialect),
1680 #[cfg(feature = "dialect-oracle")]
1681 DialectType::Oracle => from_cache!(CACHED_ORACLE, OracleDialect),
1682 #[cfg(feature = "dialect-hive")]
1683 DialectType::Hive => from_cache!(CACHED_HIVE, HiveDialect),
1684 #[cfg(feature = "dialect-spark")]
1685 DialectType::Spark => from_cache!(CACHED_SPARK, SparkDialect),
1686 #[cfg(feature = "dialect-sqlite")]
1687 DialectType::SQLite => from_cache!(CACHED_SQLITE, SQLiteDialect),
1688 #[cfg(feature = "dialect-presto")]
1689 DialectType::Presto => from_cache!(CACHED_PRESTO, PrestoDialect),
1690 #[cfg(feature = "dialect-trino")]
1691 DialectType::Trino => from_cache!(CACHED_TRINO, TrinoDialect),
1692 #[cfg(feature = "dialect-redshift")]
1693 DialectType::Redshift => from_cache!(CACHED_REDSHIFT, RedshiftDialect),
1694 #[cfg(feature = "dialect-clickhouse")]
1695 DialectType::ClickHouse => from_cache!(CACHED_CLICKHOUSE, ClickHouseDialect),
1696 #[cfg(feature = "dialect-databricks")]
1697 DialectType::Databricks => from_cache!(CACHED_DATABRICKS, DatabricksDialect),
1698 #[cfg(feature = "dialect-athena")]
1699 DialectType::Athena => from_cache!(CACHED_ATHENA, AthenaDialect),
1700 #[cfg(feature = "dialect-teradata")]
1701 DialectType::Teradata => from_cache!(CACHED_TERADATA, TeradataDialect),
1702 #[cfg(feature = "dialect-doris")]
1703 DialectType::Doris => from_cache!(CACHED_DORIS, DorisDialect),
1704 #[cfg(feature = "dialect-starrocks")]
1705 DialectType::StarRocks => from_cache!(CACHED_STARROCKS, StarRocksDialect),
1706 #[cfg(feature = "dialect-materialize")]
1707 DialectType::Materialize => from_cache!(CACHED_MATERIALIZE, MaterializeDialect),
1708 #[cfg(feature = "dialect-risingwave")]
1709 DialectType::RisingWave => from_cache!(CACHED_RISINGWAVE, RisingWaveDialect),
1710 #[cfg(feature = "dialect-singlestore")]
1711 DialectType::SingleStore => from_cache!(CACHED_SINGLESTORE, SingleStoreDialect),
1712 #[cfg(feature = "dialect-cockroachdb")]
1713 DialectType::CockroachDB => from_cache!(CACHED_COCKROACHDB, CockroachDBDialect),
1714 #[cfg(feature = "dialect-tidb")]
1715 DialectType::TiDB => from_cache!(CACHED_TIDB, TiDBDialect),
1716 #[cfg(feature = "dialect-druid")]
1717 DialectType::Druid => from_cache!(CACHED_DRUID, DruidDialect),
1718 #[cfg(feature = "dialect-solr")]
1719 DialectType::Solr => from_cache!(CACHED_SOLR, SolrDialect),
1720 #[cfg(feature = "dialect-tableau")]
1721 DialectType::Tableau => from_cache!(CACHED_TABLEAU, TableauDialect),
1722 #[cfg(feature = "dialect-dune")]
1723 DialectType::Dune => from_cache!(CACHED_DUNE, DuneDialect),
1724 #[cfg(feature = "dialect-fabric")]
1725 DialectType::Fabric => from_cache!(CACHED_FABRIC, FabricDialect),
1726 #[cfg(feature = "dialect-drill")]
1727 DialectType::Drill => from_cache!(CACHED_DRILL, DrillDialect),
1728 #[cfg(feature = "dialect-dremio")]
1729 DialectType::Dremio => from_cache!(CACHED_DREMIO, DremioDialect),
1730 #[cfg(feature = "dialect-exasol")]
1731 DialectType::Exasol => from_cache!(CACHED_EXASOL, ExasolDialect),
1732 #[cfg(feature = "dialect-datafusion")]
1733 DialectType::DataFusion => from_cache!(CACHED_DATAFUSION, DataFusionDialect),
1734 _ => from_cache!(CACHED_GENERIC, GenericDialect),
1735 }
1736}
1737
1738// ---------------------------------------------------------------------------
1739// Custom dialect registry
1740// ---------------------------------------------------------------------------
1741
1742static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
1743 LazyLock::new(|| RwLock::new(HashMap::new()));
1744
1745struct CustomDialectConfig {
1746 name: String,
1747 base_dialect: DialectType,
1748 tokenizer_config: TokenizerConfig,
1749 generator_config: GeneratorConfig,
1750 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1751 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1752}
1753
1754/// Fluent builder for creating and registering custom SQL dialects.
1755///
1756/// A custom dialect is based on an existing built-in dialect and allows selective
1757/// overrides of tokenizer configuration, generator configuration, and expression
1758/// transforms.
1759///
1760/// # Example
1761///
1762/// ```rust,ignore
1763/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
1764/// use polyglot_sql::generator::NormalizeFunctions;
1765///
1766/// CustomDialectBuilder::new("my_postgres")
1767/// .based_on(DialectType::PostgreSQL)
1768/// .generator_config_modifier(|gc| {
1769/// gc.normalize_functions = NormalizeFunctions::Lower;
1770/// })
1771/// .register()
1772/// .unwrap();
1773///
1774/// let d = Dialect::get_by_name("my_postgres").unwrap();
1775/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
1776/// let sql = d.generate(&exprs[0]).unwrap();
1777/// assert_eq!(sql, "select count(*)");
1778///
1779/// polyglot_sql::unregister_custom_dialect("my_postgres");
1780/// ```
1781pub struct CustomDialectBuilder {
1782 name: String,
1783 base_dialect: DialectType,
1784 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
1785 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
1786 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1787 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1788}
1789
1790impl CustomDialectBuilder {
1791 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
1792 pub fn new(name: impl Into<String>) -> Self {
1793 Self {
1794 name: name.into(),
1795 base_dialect: DialectType::Generic,
1796 tokenizer_modifier: None,
1797 generator_modifier: None,
1798 transform: None,
1799 preprocess: None,
1800 }
1801 }
1802
1803 /// Set the base built-in dialect to inherit configuration from.
1804 pub fn based_on(mut self, dialect: DialectType) -> Self {
1805 self.base_dialect = dialect;
1806 self
1807 }
1808
1809 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
1810 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
1811 where
1812 F: FnOnce(&mut TokenizerConfig) + 'static,
1813 {
1814 self.tokenizer_modifier = Some(Box::new(f));
1815 self
1816 }
1817
1818 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
1819 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
1820 where
1821 F: FnOnce(&mut GeneratorConfig) + 'static,
1822 {
1823 self.generator_modifier = Some(Box::new(f));
1824 self
1825 }
1826
1827 /// Set a custom per-node expression transform function.
1828 ///
1829 /// This replaces the base dialect's transform. It is called on every expression
1830 /// node during the recursive transform pass.
1831 pub fn transform_fn<F>(mut self, f: F) -> Self
1832 where
1833 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1834 {
1835 self.transform = Some(Arc::new(f));
1836 self
1837 }
1838
1839 /// Set a custom whole-tree preprocessing function.
1840 ///
1841 /// This replaces the base dialect's built-in preprocessing. It is called once
1842 /// on the entire expression tree before the recursive per-node transform.
1843 pub fn preprocess_fn<F>(mut self, f: F) -> Self
1844 where
1845 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
1846 {
1847 self.preprocess = Some(Arc::new(f));
1848 self
1849 }
1850
1851 /// Build the custom dialect configuration and register it in the global registry.
1852 ///
1853 /// Returns an error if:
1854 /// - The name collides with a built-in dialect name
1855 /// - A custom dialect with the same name is already registered
1856 pub fn register(self) -> Result<()> {
1857 // Reject names that collide with built-in dialects
1858 if DialectType::from_str(&self.name).is_ok() {
1859 return Err(crate::error::Error::parse(
1860 format!(
1861 "Cannot register custom dialect '{}': name collides with built-in dialect",
1862 self.name
1863 ),
1864 0,
1865 0,
1866 0,
1867 0,
1868 ));
1869 }
1870
1871 // Get base configs
1872 let (mut tok_config, arc_gen_config, _base_transform) =
1873 configs_for_dialect_type(self.base_dialect);
1874 let mut gen_config = (*arc_gen_config).clone();
1875
1876 // Apply modifiers
1877 if let Some(tok_mod) = self.tokenizer_modifier {
1878 tok_mod(&mut tok_config);
1879 }
1880 if let Some(gen_mod) = self.generator_modifier {
1881 gen_mod(&mut gen_config);
1882 }
1883
1884 let config = CustomDialectConfig {
1885 name: self.name.clone(),
1886 base_dialect: self.base_dialect,
1887 tokenizer_config: tok_config,
1888 generator_config: gen_config,
1889 transform: self.transform,
1890 preprocess: self.preprocess,
1891 };
1892
1893 register_custom_dialect(config)
1894 }
1895}
1896
1897use std::str::FromStr;
1898
1899fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
1900 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
1901 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
1902 })?;
1903
1904 if registry.contains_key(&config.name) {
1905 return Err(crate::error::Error::parse(
1906 format!("Custom dialect '{}' is already registered", config.name),
1907 0,
1908 0,
1909 0,
1910 0,
1911 ));
1912 }
1913
1914 registry.insert(config.name.clone(), Arc::new(config));
1915 Ok(())
1916}
1917
1918/// Remove a custom dialect from the global registry.
1919///
1920/// Returns `true` if a dialect with that name was found and removed,
1921/// `false` if no such custom dialect existed.
1922pub fn unregister_custom_dialect(name: &str) -> bool {
1923 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
1924 registry.remove(name).is_some()
1925 } else {
1926 false
1927 }
1928}
1929
1930fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
1931 CUSTOM_DIALECT_REGISTRY
1932 .read()
1933 .ok()
1934 .and_then(|registry| registry.get(name).cloned())
1935}
1936
1937/// Main entry point for dialect-specific SQL operations.
1938///
1939/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
1940/// transformer for a specific SQL database engine. It is the high-level API through
1941/// which callers parse, generate, transform, and transpile SQL.
1942///
1943/// # Usage
1944///
1945/// ```rust,ignore
1946/// use polyglot_sql::dialects::{Dialect, DialectType};
1947///
1948/// // Parse PostgreSQL SQL into an AST
1949/// let pg = Dialect::get(DialectType::PostgreSQL);
1950/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
1951///
1952/// // Transpile from PostgreSQL to BigQuery
1953/// let results = pg.transpile_to("SELECT NOW()", DialectType::BigQuery)?;
1954/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
1955/// ```
1956///
1957/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
1958/// The struct is `Send + Sync` safe so it can be shared across threads.
1959pub struct Dialect {
1960 dialect_type: DialectType,
1961 tokenizer: Tokenizer,
1962 generator_config: Arc<GeneratorConfig>,
1963 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
1964 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
1965 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
1966 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
1967 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
1968}
1969
1970impl Dialect {
1971 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
1972 ///
1973 /// This is the primary constructor. It initializes the tokenizer, generator config,
1974 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
1975 /// For hybrid dialects like Athena, it also sets up expression-specific generator
1976 /// config routing.
1977 pub fn get(dialect_type: DialectType) -> Self {
1978 let (tokenizer_config, generator_config, transformer) =
1979 configs_for_dialect_type(dialect_type);
1980
1981 // Set up expression-specific generator config for hybrid dialects
1982 let generator_config_for_expr: Option<
1983 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
1984 > = match dialect_type {
1985 #[cfg(feature = "dialect-athena")]
1986 DialectType::Athena => Some(Box::new(|expr| {
1987 AthenaDialect.generator_config_for_expr(expr)
1988 })),
1989 _ => None,
1990 };
1991
1992 Self {
1993 dialect_type,
1994 tokenizer: Tokenizer::new(tokenizer_config),
1995 generator_config,
1996 transformer,
1997 generator_config_for_expr,
1998 custom_preprocess: None,
1999 }
2000 }
2001
2002 /// Look up a dialect by string name.
2003 ///
2004 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
2005 /// falls back to the custom dialect registry. Returns `None` if no dialect
2006 /// with the given name exists.
2007 pub fn get_by_name(name: &str) -> Option<Self> {
2008 // Try built-in first
2009 if let Ok(dt) = DialectType::from_str(name) {
2010 return Some(Self::get(dt));
2011 }
2012
2013 // Try custom registry
2014 let config = get_custom_dialect_config(name)?;
2015 Some(Self::from_custom_config(&config))
2016 }
2017
2018 /// Construct a `Dialect` from a custom dialect configuration.
2019 fn from_custom_config(config: &CustomDialectConfig) -> Self {
2020 // Build the transformer: use custom if provided, else use base dialect's
2021 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
2022 if let Some(ref custom_transform) = config.transform {
2023 let t = Arc::clone(custom_transform);
2024 Box::new(move |e| t(e))
2025 } else {
2026 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
2027 base_transform
2028 };
2029
2030 // Build the custom preprocess: use custom if provided
2031 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
2032 config.preprocess.as_ref().map(|p| {
2033 let p = Arc::clone(p);
2034 Box::new(move |e: Expression| p(e))
2035 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
2036 });
2037
2038 Self {
2039 dialect_type: config.base_dialect,
2040 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
2041 generator_config: Arc::new(config.generator_config.clone()),
2042 transformer,
2043 generator_config_for_expr: None,
2044 custom_preprocess,
2045 }
2046 }
2047
2048 /// Get the dialect type
2049 pub fn dialect_type(&self) -> DialectType {
2050 self.dialect_type
2051 }
2052
2053 /// Get the generator configuration
2054 pub fn generator_config(&self) -> &GeneratorConfig {
2055 &self.generator_config
2056 }
2057
2058 /// Parses a SQL string into a list of [`Expression`] AST nodes.
2059 ///
2060 /// The input may contain multiple semicolon-separated statements; each one
2061 /// produces a separate element in the returned vector. Tokenization uses
2062 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
2063 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
2064 let tokens = self.tokenizer.tokenize(sql)?;
2065 let config = crate::parser::ParserConfig {
2066 dialect: Some(self.dialect_type),
2067 ..Default::default()
2068 };
2069 let mut parser = Parser::with_source(tokens, config, sql.to_string());
2070 parser.parse()
2071 }
2072
2073 /// Tokenize SQL using this dialect's tokenizer configuration.
2074 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
2075 self.tokenizer.tokenize(sql)
2076 }
2077
2078 /// Get the generator config for a specific expression (supports hybrid dialects).
2079 /// Returns an owned `GeneratorConfig` suitable for mutation before generation.
2080 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
2081 if let Some(ref config_fn) = self.generator_config_for_expr {
2082 config_fn(expr)
2083 } else {
2084 (*self.generator_config).clone()
2085 }
2086 }
2087
2088 /// Generates a SQL string from an [`Expression`] AST node.
2089 ///
2090 /// The output uses this dialect's generator configuration for identifier quoting,
2091 /// keyword casing, function name normalization, and syntax style. The result is
2092 /// a single-line (non-pretty) SQL string.
2093 pub fn generate(&self, expr: &Expression) -> Result<String> {
2094 // Fast path: when no per-expression config override, share the Arc cheaply.
2095 if self.generator_config_for_expr.is_none() {
2096 let mut generator = Generator::with_arc_config(self.generator_config.clone());
2097 return generator.generate(expr);
2098 }
2099 let config = self.get_config_for_expr(expr);
2100 let mut generator = Generator::with_config(config);
2101 generator.generate(expr)
2102 }
2103
2104 /// Generate SQL from an expression with pretty printing enabled
2105 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
2106 let mut config = self.get_config_for_expr(expr);
2107 config.pretty = true;
2108 let mut generator = Generator::with_config(config);
2109 generator.generate(expr)
2110 }
2111
2112 /// Generate SQL from an expression with source dialect info (for transpilation)
2113 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
2114 let mut config = self.get_config_for_expr(expr);
2115 config.source_dialect = Some(source);
2116 let mut generator = Generator::with_config(config);
2117 generator.generate(expr)
2118 }
2119
2120 /// Generate SQL from an expression with pretty printing and source dialect info
2121 pub fn generate_pretty_with_source(
2122 &self,
2123 expr: &Expression,
2124 source: DialectType,
2125 ) -> Result<String> {
2126 let mut config = self.get_config_for_expr(expr);
2127 config.pretty = true;
2128 config.source_dialect = Some(source);
2129 let mut generator = Generator::with_config(config);
2130 generator.generate(expr)
2131 }
2132
2133 /// Generate SQL from an expression with forced identifier quoting (identify=True)
2134 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
2135 let mut config = self.get_config_for_expr(expr);
2136 config.always_quote_identifiers = true;
2137 let mut generator = Generator::with_config(config);
2138 generator.generate(expr)
2139 }
2140
2141 /// Generate SQL from an expression with pretty printing and forced identifier quoting
2142 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
2143 let mut config = (*self.generator_config).clone();
2144 config.pretty = true;
2145 config.always_quote_identifiers = true;
2146 let mut generator = Generator::with_config(config);
2147 generator.generate(expr)
2148 }
2149
2150 /// Generate SQL from an expression with caller-specified config overrides
2151 pub fn generate_with_overrides(
2152 &self,
2153 expr: &Expression,
2154 overrides: impl FnOnce(&mut GeneratorConfig),
2155 ) -> Result<String> {
2156 let mut config = self.get_config_for_expr(expr);
2157 overrides(&mut config);
2158 let mut generator = Generator::with_config(config);
2159 generator.generate(expr)
2160 }
2161
2162 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
2163 ///
2164 /// The transformation proceeds in two phases:
2165 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
2166 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
2167 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
2168 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
2169 ///
2170 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
2171 /// and for identity transforms (normalizing SQL within the same dialect).
2172 pub fn transform(&self, expr: Expression) -> Result<Expression> {
2173 // Apply preprocessing transforms based on dialect
2174 let preprocessed = self.preprocess(expr)?;
2175 // Then apply recursive transformation
2176 transform_recursive(preprocessed, &self.transformer)
2177 }
2178
2179 /// Apply dialect-specific preprocessing transforms
2180 fn preprocess(&self, expr: Expression) -> Result<Expression> {
2181 // If a custom preprocess function is set, use it instead of the built-in logic
2182 if let Some(ref custom_preprocess) = self.custom_preprocess {
2183 return custom_preprocess(expr);
2184 }
2185
2186 #[cfg(any(
2187 feature = "dialect-mysql",
2188 feature = "dialect-postgresql",
2189 feature = "dialect-bigquery",
2190 feature = "dialect-snowflake",
2191 feature = "dialect-tsql",
2192 feature = "dialect-spark",
2193 feature = "dialect-databricks",
2194 feature = "dialect-hive",
2195 feature = "dialect-sqlite",
2196 feature = "dialect-trino",
2197 feature = "dialect-presto",
2198 feature = "dialect-duckdb",
2199 feature = "dialect-redshift",
2200 feature = "dialect-starrocks",
2201 feature = "dialect-oracle",
2202 feature = "dialect-clickhouse",
2203 ))]
2204 use crate::transforms;
2205
2206 match self.dialect_type {
2207 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
2208 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
2209 #[cfg(feature = "dialect-mysql")]
2210 DialectType::MySQL => {
2211 let expr = transforms::eliminate_qualify(expr)?;
2212 let expr = transforms::eliminate_full_outer_join(expr)?;
2213 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2214 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2215 Ok(expr)
2216 }
2217 // PostgreSQL doesn't support QUALIFY
2218 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
2219 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
2220 #[cfg(feature = "dialect-postgresql")]
2221 DialectType::PostgreSQL => {
2222 let expr = transforms::eliminate_qualify(expr)?;
2223 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2224 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
2225 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
2226 // Only normalize when sqlglot would fully parse (no body) —
2227 // sqlglot falls back to Command for complex function bodies,
2228 // preserving the original text including TO.
2229 let expr = if let Expression::CreateFunction(mut cf) = expr {
2230 if cf.body.is_none() {
2231 for opt in &mut cf.set_options {
2232 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
2233 &mut opt.value
2234 {
2235 *use_to = false;
2236 }
2237 }
2238 }
2239 Expression::CreateFunction(cf)
2240 } else {
2241 expr
2242 };
2243 Ok(expr)
2244 }
2245 // BigQuery doesn't support DISTINCT ON or CTE column aliases
2246 #[cfg(feature = "dialect-bigquery")]
2247 DialectType::BigQuery => {
2248 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2249 let expr = transforms::pushdown_cte_column_names(expr)?;
2250 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
2251 Ok(expr)
2252 }
2253 // Snowflake
2254 #[cfg(feature = "dialect-snowflake")]
2255 DialectType::Snowflake => {
2256 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2257 let expr = transforms::eliminate_window_clause(expr)?;
2258 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
2259 Ok(expr)
2260 }
2261 // TSQL doesn't support QUALIFY
2262 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
2263 // TSQL doesn't support CTEs in subqueries (hoist to top level)
2264 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
2265 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
2266 #[cfg(feature = "dialect-tsql")]
2267 DialectType::TSQL => {
2268 let expr = transforms::eliminate_qualify(expr)?;
2269 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
2270 let expr = transforms::ensure_bools(expr)?;
2271 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2272 let expr = transforms::move_ctes_to_top_level(expr)?;
2273 let expr = transforms::qualify_derived_table_outputs(expr)?;
2274 Ok(expr)
2275 }
2276 // Spark doesn't support QUALIFY (but Databricks does)
2277 // Spark doesn't support CTEs in subqueries (hoist to top level)
2278 #[cfg(feature = "dialect-spark")]
2279 DialectType::Spark => {
2280 let expr = transforms::eliminate_qualify(expr)?;
2281 let expr = transforms::add_auto_table_alias(expr)?;
2282 let expr = transforms::simplify_nested_paren_values(expr)?;
2283 let expr = transforms::move_ctes_to_top_level(expr)?;
2284 Ok(expr)
2285 }
2286 // Databricks supports QUALIFY natively
2287 // Databricks doesn't support CTEs in subqueries (hoist to top level)
2288 #[cfg(feature = "dialect-databricks")]
2289 DialectType::Databricks => {
2290 let expr = transforms::add_auto_table_alias(expr)?;
2291 let expr = transforms::simplify_nested_paren_values(expr)?;
2292 let expr = transforms::move_ctes_to_top_level(expr)?;
2293 Ok(expr)
2294 }
2295 // Hive doesn't support QUALIFY or CTEs in subqueries
2296 #[cfg(feature = "dialect-hive")]
2297 DialectType::Hive => {
2298 let expr = transforms::eliminate_qualify(expr)?;
2299 let expr = transforms::move_ctes_to_top_level(expr)?;
2300 Ok(expr)
2301 }
2302 // SQLite doesn't support QUALIFY
2303 #[cfg(feature = "dialect-sqlite")]
2304 DialectType::SQLite => {
2305 let expr = transforms::eliminate_qualify(expr)?;
2306 Ok(expr)
2307 }
2308 // Trino doesn't support QUALIFY
2309 #[cfg(feature = "dialect-trino")]
2310 DialectType::Trino => {
2311 let expr = transforms::eliminate_qualify(expr)?;
2312 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
2313 Ok(expr)
2314 }
2315 // Presto doesn't support QUALIFY or WINDOW clause
2316 #[cfg(feature = "dialect-presto")]
2317 DialectType::Presto => {
2318 let expr = transforms::eliminate_qualify(expr)?;
2319 let expr = transforms::eliminate_window_clause(expr)?;
2320 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
2321 Ok(expr)
2322 }
2323 // DuckDB supports QUALIFY - no elimination needed
2324 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
2325 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
2326 #[cfg(feature = "dialect-duckdb")]
2327 DialectType::DuckDB => {
2328 let expr = transforms::expand_posexplode_duckdb(expr)?;
2329 let expr = transforms::expand_like_any(expr)?;
2330 Ok(expr)
2331 }
2332 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
2333 #[cfg(feature = "dialect-redshift")]
2334 DialectType::Redshift => {
2335 let expr = transforms::eliminate_qualify(expr)?;
2336 let expr = transforms::eliminate_window_clause(expr)?;
2337 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
2338 Ok(expr)
2339 }
2340 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
2341 #[cfg(feature = "dialect-starrocks")]
2342 DialectType::StarRocks => {
2343 let expr = transforms::eliminate_qualify(expr)?;
2344 let expr = transforms::expand_between_in_delete(expr)?;
2345 Ok(expr)
2346 }
2347 // DataFusion supports QUALIFY and semi/anti joins natively
2348 #[cfg(feature = "dialect-datafusion")]
2349 DialectType::DataFusion => Ok(expr),
2350 // Oracle doesn't support QUALIFY
2351 #[cfg(feature = "dialect-oracle")]
2352 DialectType::Oracle => {
2353 let expr = transforms::eliminate_qualify(expr)?;
2354 Ok(expr)
2355 }
2356 // Drill - no special preprocessing needed
2357 #[cfg(feature = "dialect-drill")]
2358 DialectType::Drill => Ok(expr),
2359 // Teradata - no special preprocessing needed
2360 #[cfg(feature = "dialect-teradata")]
2361 DialectType::Teradata => Ok(expr),
2362 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
2363 #[cfg(feature = "dialect-clickhouse")]
2364 DialectType::ClickHouse => {
2365 let expr = transforms::no_limit_order_by_union(expr)?;
2366 Ok(expr)
2367 }
2368 // Other dialects - no preprocessing
2369 _ => Ok(expr),
2370 }
2371 }
2372
2373 /// Transpile SQL from this dialect to another
2374 pub fn transpile_to(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2375 self.transpile_to_inner(sql, target, false)
2376 }
2377
2378 /// Transpile SQL from this dialect to another with pretty printing enabled
2379 pub fn transpile_to_pretty(&self, sql: &str, target: DialectType) -> Result<Vec<String>> {
2380 self.transpile_to_inner(sql, target, true)
2381 }
2382
2383 #[cfg(not(feature = "transpile"))]
2384 fn transpile_to_inner(
2385 &self,
2386 sql: &str,
2387 target: DialectType,
2388 pretty: bool,
2389 ) -> Result<Vec<String>> {
2390 // Without the transpile feature, only same-dialect or to/from generic is supported
2391 if self.dialect_type != target
2392 && self.dialect_type != DialectType::Generic
2393 && target != DialectType::Generic
2394 {
2395 return Err(crate::error::Error::parse(
2396 "Cross-dialect transpilation not available in this build",
2397 0,
2398 0,
2399 0,
2400 0,
2401 ));
2402 }
2403
2404 let expressions = self.parse(sql)?;
2405 let target_dialect = Dialect::get(target);
2406 let generic_identity =
2407 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2408
2409 if generic_identity {
2410 return expressions
2411 .into_iter()
2412 .map(|expr| {
2413 if pretty {
2414 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2415 } else {
2416 target_dialect.generate_with_source(&expr, self.dialect_type)
2417 }
2418 })
2419 .collect();
2420 }
2421
2422 expressions
2423 .into_iter()
2424 .map(|expr| {
2425 let transformed = target_dialect.transform(expr)?;
2426 if pretty {
2427 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
2428 } else {
2429 target_dialect.generate_with_source(&transformed, self.dialect_type)
2430 }
2431 })
2432 .collect()
2433 }
2434
2435 #[cfg(feature = "transpile")]
2436 fn transpile_to_inner(
2437 &self,
2438 sql: &str,
2439 target: DialectType,
2440 pretty: bool,
2441 ) -> Result<Vec<String>> {
2442 let expressions = self.parse(sql)?;
2443 let target_dialect = Dialect::get(target);
2444 let generic_identity =
2445 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
2446
2447 if generic_identity {
2448 return expressions
2449 .into_iter()
2450 .map(|expr| {
2451 if pretty {
2452 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
2453 } else {
2454 target_dialect.generate_with_source(&expr, self.dialect_type)
2455 }
2456 })
2457 .collect();
2458 }
2459
2460 expressions
2461 .into_iter()
2462 .map(|expr| {
2463 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
2464 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
2465 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
2466 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
2467 use crate::expressions::DataType as DT;
2468 transform_recursive(expr, &|e| match e {
2469 Expression::DataType(DT::VarChar { .. }) => {
2470 Ok(Expression::DataType(DT::Text))
2471 }
2472 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
2473 _ => Ok(e),
2474 })?
2475 } else {
2476 expr
2477 };
2478
2479 // When source and target differ, first normalize the source dialect's
2480 // AST constructs to standard SQL, so that the target dialect can handle them.
2481 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
2482 let normalized =
2483 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
2484 self.transform(expr)?
2485 } else {
2486 expr
2487 };
2488
2489 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
2490 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
2491 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
2492 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
2493 let normalized =
2494 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2495 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
2496 {
2497 transform_recursive(normalized, &|e| {
2498 if let Expression::Function(ref f) = e {
2499 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
2500 // Check if first arg is JSON_QUERY and second is JSON_VALUE
2501 if let (
2502 Expression::Function(ref jq),
2503 Expression::Function(ref jv),
2504 ) = (&f.args[0], &f.args[1])
2505 {
2506 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
2507 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
2508 {
2509 // Unwrap: return just JSON_QUERY(...)
2510 return Ok(f.args[0].clone());
2511 }
2512 }
2513 }
2514 }
2515 Ok(e)
2516 })?
2517 } else {
2518 normalized
2519 };
2520
2521 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
2522 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
2523 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
2524 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2525 && !matches!(target, DialectType::Snowflake)
2526 {
2527 transform_recursive(normalized, &|e| {
2528 if let Expression::Function(ref f) = e {
2529 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
2530 return Ok(Expression::Localtime(Box::new(
2531 crate::expressions::Localtime { this: None },
2532 )));
2533 }
2534 }
2535 Ok(e)
2536 })?
2537 } else {
2538 normalized
2539 };
2540
2541 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
2542 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
2543 // transform. DuckDB requires the count argument to be BIGINT.
2544 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2545 && matches!(target, DialectType::DuckDB)
2546 {
2547 transform_recursive(normalized, &|e| {
2548 if let Expression::Function(ref f) = e {
2549 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
2550 // Check if first arg is space string literal
2551 if let Expression::Literal(ref lit) = f.args[0]
2552 {
2553 if let crate::expressions::Literal::String(
2554 ref s,
2555 ) = lit.as_ref() {
2556 if s == " " {
2557 // Wrap second arg in CAST(... AS BIGINT) if not already
2558 if !matches!(f.args[1], Expression::Cast(_)) {
2559 let mut new_args = f.args.clone();
2560 new_args[1] = Expression::Cast(Box::new(
2561 crate::expressions::Cast {
2562 this: new_args[1].clone(),
2563 to: crate::expressions::DataType::BigInt {
2564 length: None,
2565 },
2566 trailing_comments: Vec::new(),
2567 double_colon_syntax: false,
2568 format: None,
2569 default: None,
2570 inferred_type: None,
2571 },
2572 ));
2573 return Ok(Expression::Function(Box::new(
2574 crate::expressions::Function {
2575 name: f.name.clone(),
2576 args: new_args,
2577 distinct: f.distinct,
2578 trailing_comments: f.trailing_comments.clone(),
2579 use_bracket_syntax: f.use_bracket_syntax,
2580 no_parens: f.no_parens,
2581 quoted: f.quoted,
2582 span: None,
2583 inferred_type: None,
2584 },
2585 )));
2586 }
2587 }
2588 }
2589 }
2590 }
2591 }
2592 Ok(e)
2593 })?
2594 } else {
2595 normalized
2596 };
2597
2598 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
2599 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
2600 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2601 && !matches!(target, DialectType::BigQuery)
2602 {
2603 crate::transforms::propagate_struct_field_names(normalized)?
2604 } else {
2605 normalized
2606 };
2607
2608 // Snowflake source to DuckDB target: RANDOM()/RANDOM(seed) -> scaled RANDOM()
2609 // Snowflake RANDOM() returns integer in [-2^63, 2^63-1], DuckDB RANDOM() returns float [0, 1)
2610 // Skip RANDOM inside UNIFORM/NORMAL/ZIPF/RANDSTR generator args since those
2611 // functions handle their generator args differently (as float seeds).
2612 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
2613 && matches!(target, DialectType::DuckDB)
2614 {
2615 fn make_scaled_random() -> Expression {
2616 let lower = Expression::Literal(Box::new(crate::expressions::Literal::Number("-9.223372036854776E+18".to_string())));
2617 let upper = Expression::Literal(Box::new(crate::expressions::Literal::Number("9.223372036854776e+18".to_string())));
2618 let random_call = Expression::Random(crate::expressions::Random);
2619 let range_size = Expression::Paren(Box::new(crate::expressions::Paren {
2620 this: Expression::Sub(Box::new(crate::expressions::BinaryOp {
2621 left: upper,
2622 right: lower.clone(),
2623 left_comments: vec![],
2624 operator_comments: vec![],
2625 trailing_comments: vec![],
2626 inferred_type: None,
2627 })),
2628 trailing_comments: vec![],
2629 }));
2630 let scaled = Expression::Mul(Box::new(crate::expressions::BinaryOp {
2631 left: random_call,
2632 right: range_size,
2633 left_comments: vec![],
2634 operator_comments: vec![],
2635 trailing_comments: vec![],
2636 inferred_type: None,
2637 }));
2638 let shifted = Expression::Add(Box::new(crate::expressions::BinaryOp {
2639 left: lower,
2640 right: scaled,
2641 left_comments: vec![],
2642 operator_comments: vec![],
2643 trailing_comments: vec![],
2644 inferred_type: None,
2645 }));
2646 Expression::Cast(Box::new(crate::expressions::Cast {
2647 this: shifted,
2648 to: crate::expressions::DataType::BigInt { length: None },
2649 trailing_comments: vec![],
2650 double_colon_syntax: false,
2651 format: None,
2652 default: None,
2653 inferred_type: None,
2654 }))
2655 }
2656
2657 // Pre-process: protect seeded RANDOM(seed) inside UNIFORM/NORMAL/ZIPF/RANDSTR
2658 // by converting Rand{seed: Some(s)} to Function{name:"RANDOM", args:[s]}.
2659 // This prevents transform_recursive (which is bottom-up) from expanding
2660 // seeded RANDOM into make_scaled_random() and losing the seed value.
2661 // Unseeded RANDOM()/Rand{seed:None} is left as-is so it gets expanded
2662 // and then un-expanded back to Expression::Random by the code below.
2663 let normalized = transform_recursive(normalized, &|e| {
2664 if let Expression::Function(ref f) = e {
2665 let n = f.name.to_ascii_uppercase();
2666 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" || n == "RANDSTR" {
2667 if let Expression::Function(mut f) = e {
2668 for arg in f.args.iter_mut() {
2669 if let Expression::Rand(ref r) = arg {
2670 if r.lower.is_none() && r.upper.is_none() {
2671 if let Some(ref seed) = r.seed {
2672 // Convert Rand{seed: Some(s)} to Function("RANDOM", [s])
2673 // so it won't be expanded by the RANDOM expansion below
2674 *arg = Expression::Function(Box::new(crate::expressions::Function::new(
2675 "RANDOM".to_string(),
2676 vec![*seed.clone()],
2677 )));
2678 }
2679 }
2680 }
2681 }
2682 return Ok(Expression::Function(f));
2683 }
2684 }
2685 }
2686 Ok(e)
2687 })?;
2688
2689 // transform_recursive processes bottom-up, so RANDOM() (unseeded) inside
2690 // generator functions (UNIFORM, NORMAL, ZIPF) gets expanded before
2691 // we see the parent. We detect this and undo the expansion by replacing
2692 // the expanded pattern back with Expression::Random.
2693 // Seeded RANDOM(seed) was already protected above as Function("RANDOM", [seed]).
2694 // Note: RANDSTR is NOT included here — it needs the expanded form for unseeded
2695 // RANDOM() since the DuckDB handler uses the expanded SQL as-is in the hash.
2696 transform_recursive(normalized, &|e| {
2697 if let Expression::Function(ref f) = e {
2698 let n = f.name.to_ascii_uppercase();
2699 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" {
2700 if let Expression::Function(mut f) = e {
2701 for arg in f.args.iter_mut() {
2702 // Detect expanded RANDOM pattern: CAST(-9.22... + RANDOM() * (...) AS BIGINT)
2703 if let Expression::Cast(ref cast) = arg {
2704 if matches!(cast.to, crate::expressions::DataType::BigInt { .. }) {
2705 if let Expression::Add(ref add) = cast.this {
2706 if let Expression::Literal(ref lit) = add.left {
2707 if let crate::expressions::Literal::Number(ref num) = lit.as_ref() {
2708 if num == "-9.223372036854776E+18" {
2709 *arg = Expression::Random(crate::expressions::Random);
2710 }
2711 }
2712 }
2713 }
2714 }
2715 }
2716 }
2717 return Ok(Expression::Function(f));
2718 }
2719 return Ok(e);
2720 }
2721 }
2722 match e {
2723 Expression::Random(_) => Ok(make_scaled_random()),
2724 // Rand(seed) with no bounds: drop seed and expand
2725 // (DuckDB RANDOM doesn't support seeds)
2726 Expression::Rand(ref r) if r.lower.is_none() && r.upper.is_none() => {
2727 Ok(make_scaled_random())
2728 }
2729 _ => Ok(e),
2730 }
2731 })?
2732 } else {
2733 normalized
2734 };
2735
2736 // Apply cross-dialect semantic normalizations
2737 let normalized =
2738 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
2739
2740 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
2741 // (SELECT UNNEST(..., max_depth => 2)) subquery
2742 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
2743 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2744 && matches!(target, DialectType::DuckDB)
2745 {
2746 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
2747 } else {
2748 normalized
2749 };
2750
2751 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
2752 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
2753 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
2754 && matches!(
2755 target,
2756 DialectType::DuckDB
2757 | DialectType::Presto
2758 | DialectType::Trino
2759 | DialectType::Athena
2760 | DialectType::Spark
2761 | DialectType::Databricks
2762 ) {
2763 crate::transforms::unnest_alias_to_column_alias(normalized)?
2764 } else if matches!(self.dialect_type, DialectType::BigQuery)
2765 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
2766 {
2767 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
2768 // but don't convert alias format (no _t0 wrapper)
2769 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
2770 // For Redshift: strip UNNEST when arg is a column reference path
2771 if matches!(target, DialectType::Redshift) {
2772 crate::transforms::strip_unnest_column_refs(result)?
2773 } else {
2774 result
2775 }
2776 } else {
2777 normalized
2778 };
2779
2780 // For Presto/Trino targets from PostgreSQL/Redshift source:
2781 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
2782 let normalized = if matches!(
2783 self.dialect_type,
2784 DialectType::PostgreSQL | DialectType::Redshift
2785 ) && matches!(
2786 target,
2787 DialectType::Presto | DialectType::Trino | DialectType::Athena
2788 ) {
2789 crate::transforms::wrap_unnest_join_aliases(normalized)?
2790 } else {
2791 normalized
2792 };
2793
2794 // Eliminate DISTINCT ON with target-dialect awareness
2795 // This must happen after source transform (which may produce DISTINCT ON)
2796 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
2797 let normalized =
2798 crate::transforms::eliminate_distinct_on_for_dialect(normalized, Some(target))?;
2799
2800 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
2801 let normalized = if matches!(target, DialectType::Snowflake) {
2802 Self::transform_generate_date_array_snowflake(normalized)?
2803 } else {
2804 normalized
2805 };
2806
2807 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
2808 let normalized = if matches!(
2809 target,
2810 DialectType::Spark | DialectType::Databricks | DialectType::Hive
2811 ) {
2812 crate::transforms::unnest_to_explode_select(normalized)?
2813 } else {
2814 normalized
2815 };
2816
2817 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
2818 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
2819 crate::transforms::no_limit_order_by_union(normalized)?
2820 } else {
2821 normalized
2822 };
2823
2824 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
2825 // Python sqlglot does this in the TSQL generator, but we can't do it there
2826 // because it would break TSQL -> TSQL identity
2827 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
2828 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
2829 {
2830 transform_recursive(normalized, &|e| {
2831 if let Expression::Count(ref c) = e {
2832 // Build COUNT_BIG(...) as an AggregateFunction
2833 let args = if c.star {
2834 vec![Expression::Star(crate::expressions::Star {
2835 table: None,
2836 except: None,
2837 replace: None,
2838 rename: None,
2839 trailing_comments: Vec::new(),
2840 span: None,
2841 })]
2842 } else if let Some(ref this) = c.this {
2843 vec![this.clone()]
2844 } else {
2845 vec![]
2846 };
2847 Ok(Expression::AggregateFunction(Box::new(
2848 crate::expressions::AggregateFunction {
2849 name: "COUNT_BIG".to_string(),
2850 args,
2851 distinct: c.distinct,
2852 filter: c.filter.clone(),
2853 order_by: Vec::new(),
2854 limit: None,
2855 ignore_nulls: None,
2856 inferred_type: None,
2857 },
2858 )))
2859 } else {
2860 Ok(e)
2861 }
2862 })?
2863 } else {
2864 normalized
2865 };
2866
2867 let transformed = target_dialect.transform(normalized)?;
2868
2869 // DuckDB target: when FROM is RANGE(n), replace SEQ's ROW_NUMBER pattern with `range`
2870 let transformed = if matches!(target, DialectType::DuckDB) {
2871 Self::seq_rownum_to_range(transformed)?
2872 } else {
2873 transformed
2874 };
2875
2876 let mut sql = if pretty {
2877 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
2878 } else {
2879 target_dialect.generate_with_source(&transformed, self.dialect_type)?
2880 };
2881
2882 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
2883 if pretty && target == DialectType::Snowflake {
2884 sql = Self::normalize_snowflake_pretty(sql);
2885 }
2886
2887 Ok(sql)
2888 })
2889 .collect()
2890 }
2891}
2892
2893// Transpile-only methods: cross-dialect normalization and helpers
2894#[cfg(feature = "transpile")]
2895impl Dialect {
2896 /// For DuckDB target: when FROM clause contains RANGE(n), replace
2897 /// `(ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1)` with `range` in select expressions.
2898 /// This handles SEQ1/2/4/8 → RANGE transpilation from Snowflake.
2899 fn seq_rownum_to_range(expr: Expression) -> Result<Expression> {
2900 if let Expression::Select(mut select) = expr {
2901 // Check if FROM contains a RANGE function
2902 let has_range_from = if let Some(ref from) = select.from {
2903 from.expressions.iter().any(|e| {
2904 // Check for direct RANGE(...) or aliased RANGE(...)
2905 match e {
2906 Expression::Function(f) => f.name.eq_ignore_ascii_case("RANGE"),
2907 Expression::Alias(a) => {
2908 matches!(&a.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("RANGE"))
2909 }
2910 _ => false,
2911 }
2912 })
2913 } else {
2914 false
2915 };
2916
2917 if has_range_from {
2918 // Replace the ROW_NUMBER pattern in select expressions
2919 select.expressions = select.expressions.into_iter().map(|e| {
2920 Self::replace_rownum_with_range(e)
2921 }).collect();
2922 }
2923
2924 Ok(Expression::Select(select))
2925 } else {
2926 Ok(expr)
2927 }
2928 }
2929
2930 /// Replace `(ROW_NUMBER() OVER (...) - 1)` with `range` column reference
2931 fn replace_rownum_with_range(expr: Expression) -> Expression {
2932 match expr {
2933 // Match: (ROW_NUMBER() OVER (...) - 1) % N → range % N
2934 Expression::Mod(op) => {
2935 let new_left = Self::try_replace_rownum_paren(&op.left);
2936 Expression::Mod(Box::new(crate::expressions::BinaryOp {
2937 left: new_left,
2938 right: op.right,
2939 left_comments: op.left_comments,
2940 operator_comments: op.operator_comments,
2941 trailing_comments: op.trailing_comments,
2942 inferred_type: op.inferred_type,
2943 }))
2944 }
2945 // Match: (CASE WHEN (ROW...) % N >= ... THEN ... ELSE ... END)
2946 Expression::Paren(p) => {
2947 let inner = Self::replace_rownum_with_range(p.this);
2948 Expression::Paren(Box::new(crate::expressions::Paren {
2949 this: inner,
2950 trailing_comments: p.trailing_comments,
2951 }))
2952 }
2953 Expression::Case(mut c) => {
2954 // Replace ROW_NUMBER in WHEN conditions and THEN expressions
2955 c.whens = c.whens.into_iter().map(|(cond, then)| {
2956 (Self::replace_rownum_with_range(cond), Self::replace_rownum_with_range(then))
2957 }).collect();
2958 if let Some(else_) = c.else_ {
2959 c.else_ = Some(Self::replace_rownum_with_range(else_));
2960 }
2961 Expression::Case(c)
2962 }
2963 Expression::Gte(op) => {
2964 Expression::Gte(Box::new(crate::expressions::BinaryOp {
2965 left: Self::replace_rownum_with_range(op.left),
2966 right: op.right,
2967 left_comments: op.left_comments,
2968 operator_comments: op.operator_comments,
2969 trailing_comments: op.trailing_comments,
2970 inferred_type: op.inferred_type,
2971 }))
2972 }
2973 Expression::Sub(op) => {
2974 Expression::Sub(Box::new(crate::expressions::BinaryOp {
2975 left: Self::replace_rownum_with_range(op.left),
2976 right: op.right,
2977 left_comments: op.left_comments,
2978 operator_comments: op.operator_comments,
2979 trailing_comments: op.trailing_comments,
2980 inferred_type: op.inferred_type,
2981 }))
2982 }
2983 Expression::Alias(mut a) => {
2984 a.this = Self::replace_rownum_with_range(a.this);
2985 Expression::Alias(a)
2986 }
2987 other => other,
2988 }
2989 }
2990
2991 /// Check if an expression is `(ROW_NUMBER() OVER (...) - 1)` and replace with `range`
2992 fn try_replace_rownum_paren(expr: &Expression) -> Expression {
2993 if let Expression::Paren(ref p) = expr {
2994 if let Expression::Sub(ref sub) = p.this {
2995 if let Expression::WindowFunction(ref wf) = sub.left {
2996 if let Expression::Function(ref f) = wf.this {
2997 if f.name.eq_ignore_ascii_case("ROW_NUMBER") {
2998 if let Expression::Literal(ref lit) = sub.right {
2999 if let crate::expressions::Literal::Number(ref n) = lit.as_ref() {
3000 if n == "1" {
3001 return Expression::column("range");
3002 }
3003 }
3004 }
3005 }
3006 }
3007 }
3008 }
3009 }
3010 expr.clone()
3011 }
3012
3013 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
3014 /// Converts:
3015 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
3016 /// To:
3017 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
3018 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)) AS _t0(seq, key, path, index, alias, this)
3019 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
3020 use crate::expressions::*;
3021 transform_recursive(expr, &|e| {
3022 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
3023 if let Expression::ArraySize(ref af) = e {
3024 if let Expression::Function(ref f) = af.this {
3025 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
3026 let result = Self::convert_array_size_gda_snowflake(f)?;
3027 return Ok(result);
3028 }
3029 }
3030 }
3031
3032 let Expression::Select(mut sel) = e else {
3033 return Ok(e);
3034 };
3035
3036 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
3037 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
3038 let mut gda_join_idx: Option<usize> = None;
3039
3040 for (idx, join) in sel.joins.iter().enumerate() {
3041 // The join.this may be:
3042 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
3043 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
3044 let (unnest_ref, alias_name) = match &join.this {
3045 Expression::Unnest(ref unnest) => {
3046 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
3047 (Some(unnest.as_ref()), alias)
3048 }
3049 Expression::Alias(ref a) => {
3050 if let Expression::Unnest(ref unnest) = a.this {
3051 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
3052 } else {
3053 (None, None)
3054 }
3055 }
3056 _ => (None, None),
3057 };
3058
3059 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
3060 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
3061 if let Expression::Function(ref f) = unnest.this {
3062 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
3063 let start_expr = f.args[0].clone();
3064 let end_expr = f.args[1].clone();
3065 let step = f.args.get(2).cloned();
3066
3067 // Extract unit from step interval
3068 let unit = if let Some(Expression::Interval(ref iv)) = step {
3069 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3070 Some(format!("{:?}", unit).to_ascii_uppercase())
3071 } else if let Some(ref this) = iv.this {
3072 // The interval may be stored as a string like "1 MONTH"
3073 if let Expression::Literal(lit) = this {
3074 if let Literal::String(ref s) = lit.as_ref() {
3075 let parts: Vec<&str> = s.split_whitespace().collect();
3076 if parts.len() == 2 {
3077 Some(parts[1].to_ascii_uppercase())
3078 } else if parts.len() == 1 {
3079 // Single word like "MONTH" or just "1"
3080 let upper = parts[0].to_ascii_uppercase();
3081 if matches!(
3082 upper.as_str(),
3083 "YEAR"
3084 | "QUARTER"
3085 | "MONTH"
3086 | "WEEK"
3087 | "DAY"
3088 | "HOUR"
3089 | "MINUTE"
3090 | "SECOND"
3091 ) {
3092 Some(upper)
3093 } else {
3094 None
3095 }
3096 } else {
3097 None
3098 }
3099 } else { None }
3100 } else {
3101 None
3102 }
3103 } else {
3104 None
3105 }
3106 } else {
3107 None
3108 };
3109
3110 if let Some(unit_str) = unit {
3111 gda_info = Some((alias, start_expr, end_expr, unit_str));
3112 gda_join_idx = Some(idx);
3113 }
3114 }
3115 }
3116 }
3117 if gda_info.is_some() {
3118 break;
3119 }
3120 }
3121
3122 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
3123 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
3124 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
3125 let result = Self::try_transform_from_gda_snowflake(sel);
3126 return result;
3127 };
3128 let join_idx = gda_join_idx.unwrap();
3129
3130 // Build ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)
3131 // ARRAY_GENERATE_RANGE uses exclusive end, and we need DATEDIFF + 1 values
3132 // (inclusive date range), so the exclusive end is DATEDIFF + 1.
3133 let datediff = Expression::Function(Box::new(Function::new(
3134 "DATEDIFF".to_string(),
3135 vec![
3136 Expression::boxed_column(Column {
3137 name: Identifier::new(&unit_str),
3138 table: None,
3139 join_mark: false,
3140 trailing_comments: vec![],
3141 span: None,
3142 inferred_type: None,
3143 }),
3144 start_expr.clone(),
3145 end_expr.clone(),
3146 ],
3147 )));
3148 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
3149 left: datediff,
3150 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3151 left_comments: vec![],
3152 operator_comments: vec![],
3153 trailing_comments: vec![],
3154 inferred_type: None,
3155 }));
3156
3157 let array_gen_range = Expression::Function(Box::new(Function::new(
3158 "ARRAY_GENERATE_RANGE".to_string(),
3159 vec![
3160 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
3161 datediff_plus_one,
3162 ],
3163 )));
3164
3165 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
3166 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3167 name: Identifier::new("INPUT"),
3168 value: array_gen_range,
3169 separator: crate::expressions::NamedArgSeparator::DArrow,
3170 }));
3171 let flatten = Expression::Function(Box::new(Function::new(
3172 "FLATTEN".to_string(),
3173 vec![flatten_input],
3174 )));
3175
3176 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
3177 let alias_table = Alias {
3178 this: flatten,
3179 alias: Identifier::new("_t0"),
3180 column_aliases: vec![
3181 Identifier::new("seq"),
3182 Identifier::new("key"),
3183 Identifier::new("path"),
3184 Identifier::new("index"),
3185 Identifier::new(&alias_name),
3186 Identifier::new("this"),
3187 ],
3188 pre_alias_comments: vec![],
3189 trailing_comments: vec![],
3190 inferred_type: None,
3191 };
3192 let lateral_expr = Expression::Lateral(Box::new(Lateral {
3193 this: Box::new(Expression::Alias(Box::new(alias_table))),
3194 view: None,
3195 outer: None,
3196 alias: None,
3197 alias_quoted: false,
3198 cross_apply: None,
3199 ordinality: None,
3200 column_aliases: vec![],
3201 }));
3202
3203 // Remove the original join and add to FROM expressions
3204 sel.joins.remove(join_idx);
3205 if let Some(ref mut from) = sel.from {
3206 from.expressions.push(lateral_expr);
3207 }
3208
3209 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
3210 let dateadd_expr = Expression::Function(Box::new(Function::new(
3211 "DATEADD".to_string(),
3212 vec![
3213 Expression::boxed_column(Column {
3214 name: Identifier::new(&unit_str),
3215 table: None,
3216 join_mark: false,
3217 trailing_comments: vec![],
3218 span: None,
3219 inferred_type: None,
3220 }),
3221 Expression::Cast(Box::new(Cast {
3222 this: Expression::boxed_column(Column {
3223 name: Identifier::new(&alias_name),
3224 table: None,
3225 join_mark: false,
3226 trailing_comments: vec![],
3227 span: None,
3228 inferred_type: None,
3229 }),
3230 to: DataType::Int {
3231 length: None,
3232 integer_spelling: false,
3233 },
3234 trailing_comments: vec![],
3235 double_colon_syntax: false,
3236 format: None,
3237 default: None,
3238 inferred_type: None,
3239 })),
3240 Expression::Cast(Box::new(Cast {
3241 this: start_expr.clone(),
3242 to: DataType::Date,
3243 trailing_comments: vec![],
3244 double_colon_syntax: false,
3245 format: None,
3246 default: None,
3247 inferred_type: None,
3248 })),
3249 ],
3250 )));
3251
3252 // Replace references to the alias in the SELECT list
3253 let new_exprs: Vec<Expression> = sel
3254 .expressions
3255 .iter()
3256 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
3257 .collect();
3258 sel.expressions = new_exprs;
3259
3260 Ok(Expression::Select(sel))
3261 })
3262 }
3263
3264 /// Helper: replace column references to `alias_name` with dateadd expression
3265 fn replace_column_ref_with_dateadd(
3266 expr: &Expression,
3267 alias_name: &str,
3268 dateadd: &Expression,
3269 ) -> Expression {
3270 use crate::expressions::*;
3271 match expr {
3272 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
3273 // Plain column reference -> DATEADD(...) AS alias_name
3274 Expression::Alias(Box::new(Alias {
3275 this: dateadd.clone(),
3276 alias: Identifier::new(alias_name),
3277 column_aliases: vec![],
3278 pre_alias_comments: vec![],
3279 trailing_comments: vec![],
3280 inferred_type: None,
3281 }))
3282 }
3283 Expression::Alias(a) => {
3284 // Check if the inner expression references the alias
3285 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
3286 Expression::Alias(Box::new(Alias {
3287 this: new_this,
3288 alias: a.alias.clone(),
3289 column_aliases: a.column_aliases.clone(),
3290 pre_alias_comments: a.pre_alias_comments.clone(),
3291 trailing_comments: a.trailing_comments.clone(),
3292 inferred_type: None,
3293 }))
3294 }
3295 _ => expr.clone(),
3296 }
3297 }
3298
3299 /// Helper: replace column references in inner expression (not top-level)
3300 fn replace_column_ref_inner(
3301 expr: &Expression,
3302 alias_name: &str,
3303 dateadd: &Expression,
3304 ) -> Expression {
3305 use crate::expressions::*;
3306 match expr {
3307 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
3308 dateadd.clone()
3309 }
3310 Expression::Add(op) => {
3311 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3312 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3313 Expression::Add(Box::new(BinaryOp {
3314 left,
3315 right,
3316 left_comments: op.left_comments.clone(),
3317 operator_comments: op.operator_comments.clone(),
3318 trailing_comments: op.trailing_comments.clone(),
3319 inferred_type: None,
3320 }))
3321 }
3322 Expression::Sub(op) => {
3323 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3324 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3325 Expression::Sub(Box::new(BinaryOp {
3326 left,
3327 right,
3328 left_comments: op.left_comments.clone(),
3329 operator_comments: op.operator_comments.clone(),
3330 trailing_comments: op.trailing_comments.clone(),
3331 inferred_type: None,
3332 }))
3333 }
3334 Expression::Mul(op) => {
3335 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
3336 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
3337 Expression::Mul(Box::new(BinaryOp {
3338 left,
3339 right,
3340 left_comments: op.left_comments.clone(),
3341 operator_comments: op.operator_comments.clone(),
3342 trailing_comments: op.trailing_comments.clone(),
3343 inferred_type: None,
3344 }))
3345 }
3346 _ => expr.clone(),
3347 }
3348 }
3349
3350 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
3351 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
3352 fn try_transform_from_gda_snowflake(
3353 mut sel: Box<crate::expressions::Select>,
3354 ) -> Result<Expression> {
3355 use crate::expressions::*;
3356
3357 // Extract GDA info from FROM clause
3358 let mut gda_info: Option<(
3359 usize,
3360 String,
3361 Expression,
3362 Expression,
3363 String,
3364 Option<(String, Vec<Identifier>)>,
3365 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
3366
3367 if let Some(ref from) = sel.from {
3368 for (idx, table_expr) in from.expressions.iter().enumerate() {
3369 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
3370 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
3371 let (unnest_opt, outer_alias_info) = match table_expr {
3372 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
3373 Expression::Alias(ref a) => {
3374 if let Expression::Unnest(ref unnest) = a.this {
3375 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
3376 (Some(unnest.as_ref()), Some(alias_info))
3377 } else {
3378 (None, None)
3379 }
3380 }
3381 _ => (None, None),
3382 };
3383
3384 if let Some(unnest) = unnest_opt {
3385 // Check for GENERATE_DATE_ARRAY function
3386 let func_opt = match &unnest.this {
3387 Expression::Function(ref f)
3388 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
3389 && f.args.len() >= 2 =>
3390 {
3391 Some(f)
3392 }
3393 // Also check for GenerateSeries (from earlier normalization)
3394 _ => None,
3395 };
3396
3397 if let Some(f) = func_opt {
3398 let start_expr = f.args[0].clone();
3399 let end_expr = f.args[1].clone();
3400 let step = f.args.get(2).cloned();
3401
3402 // Extract unit and column name
3403 let unit = Self::extract_interval_unit_str(&step);
3404 let col_name = outer_alias_info
3405 .as_ref()
3406 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
3407 .unwrap_or_else(|| "value".to_string());
3408
3409 if let Some(unit_str) = unit {
3410 gda_info = Some((
3411 idx,
3412 col_name,
3413 start_expr,
3414 end_expr,
3415 unit_str,
3416 outer_alias_info,
3417 ));
3418 break;
3419 }
3420 }
3421 }
3422 }
3423 }
3424
3425 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
3426 else {
3427 return Ok(Expression::Select(sel));
3428 };
3429
3430 // Build the Snowflake subquery:
3431 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3432 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(seq, key, path, index, col_name, this))
3433
3434 // DATEDIFF(unit, start, end)
3435 let datediff = Expression::Function(Box::new(Function::new(
3436 "DATEDIFF".to_string(),
3437 vec![
3438 Expression::boxed_column(Column {
3439 name: Identifier::new(&unit_str),
3440 table: None,
3441 join_mark: false,
3442 trailing_comments: vec![],
3443 span: None,
3444 inferred_type: None,
3445 }),
3446 start_expr.clone(),
3447 end_expr.clone(),
3448 ],
3449 )));
3450 // DATEDIFF(...) + 1
3451 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
3452 left: datediff,
3453 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3454 left_comments: vec![],
3455 operator_comments: vec![],
3456 trailing_comments: vec![],
3457 inferred_type: None,
3458 }));
3459
3460 let array_gen_range = Expression::Function(Box::new(Function::new(
3461 "ARRAY_GENERATE_RANGE".to_string(),
3462 vec![
3463 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
3464 datediff_plus_one,
3465 ],
3466 )));
3467
3468 // TABLE(FLATTEN(INPUT => ...))
3469 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3470 name: Identifier::new("INPUT"),
3471 value: array_gen_range,
3472 separator: crate::expressions::NamedArgSeparator::DArrow,
3473 }));
3474 let flatten = Expression::Function(Box::new(Function::new(
3475 "FLATTEN".to_string(),
3476 vec![flatten_input],
3477 )));
3478
3479 // Determine alias name for the table: use outer alias or _t0
3480 let table_alias_name = outer_alias_info
3481 .as_ref()
3482 .map(|(name, _)| name.clone())
3483 .unwrap_or_else(|| "_t0".to_string());
3484
3485 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
3486 let table_func =
3487 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3488 let flatten_aliased = Expression::Alias(Box::new(Alias {
3489 this: table_func,
3490 alias: Identifier::new(&table_alias_name),
3491 column_aliases: vec![
3492 Identifier::new("seq"),
3493 Identifier::new("key"),
3494 Identifier::new("path"),
3495 Identifier::new("index"),
3496 Identifier::new(&col_name),
3497 Identifier::new("this"),
3498 ],
3499 pre_alias_comments: vec![],
3500 trailing_comments: vec![],
3501 inferred_type: None,
3502 }));
3503
3504 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
3505 let dateadd_expr = Expression::Function(Box::new(Function::new(
3506 "DATEADD".to_string(),
3507 vec![
3508 Expression::boxed_column(Column {
3509 name: Identifier::new(&unit_str),
3510 table: None,
3511 join_mark: false,
3512 trailing_comments: vec![],
3513 span: None,
3514 inferred_type: None,
3515 }),
3516 Expression::Cast(Box::new(Cast {
3517 this: Expression::boxed_column(Column {
3518 name: Identifier::new(&col_name),
3519 table: None,
3520 join_mark: false,
3521 trailing_comments: vec![],
3522 span: None,
3523 inferred_type: None,
3524 }),
3525 to: DataType::Int {
3526 length: None,
3527 integer_spelling: false,
3528 },
3529 trailing_comments: vec![],
3530 double_colon_syntax: false,
3531 format: None,
3532 default: None,
3533 inferred_type: None,
3534 })),
3535 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
3536 start_expr.clone(),
3537 ],
3538 )));
3539 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3540 this: dateadd_expr,
3541 alias: Identifier::new(&col_name),
3542 column_aliases: vec![],
3543 pre_alias_comments: vec![],
3544 trailing_comments: vec![],
3545 inferred_type: None,
3546 }));
3547
3548 // Build inner SELECT
3549 let mut inner_select = Select::new();
3550 inner_select.expressions = vec![dateadd_aliased];
3551 inner_select.from = Some(From {
3552 expressions: vec![flatten_aliased],
3553 });
3554
3555 let inner_select_expr = Expression::Select(Box::new(inner_select));
3556 let subquery = Expression::Subquery(Box::new(Subquery {
3557 this: inner_select_expr,
3558 alias: None,
3559 column_aliases: vec![],
3560 order_by: None,
3561 limit: None,
3562 offset: None,
3563 distribute_by: None,
3564 sort_by: None,
3565 cluster_by: None,
3566 lateral: false,
3567 modifiers_inside: false,
3568 trailing_comments: vec![],
3569 inferred_type: None,
3570 }));
3571
3572 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
3573 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
3574 Expression::Alias(Box::new(Alias {
3575 this: subquery,
3576 alias: Identifier::new(&alias_name),
3577 column_aliases: col_aliases,
3578 pre_alias_comments: vec![],
3579 trailing_comments: vec![],
3580 inferred_type: None,
3581 }))
3582 } else {
3583 subquery
3584 };
3585
3586 // Replace the FROM expression
3587 if let Some(ref mut from) = sel.from {
3588 from.expressions[from_idx] = replacement;
3589 }
3590
3591 Ok(Expression::Select(sel))
3592 }
3593
3594 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
3595 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
3596 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(...))))
3597 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
3598 use crate::expressions::*;
3599
3600 let start_expr = f.args[0].clone();
3601 let end_expr = f.args[1].clone();
3602 let step = f.args.get(2).cloned();
3603 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
3604 let col_name = "value";
3605
3606 // Build the inner subquery: same as try_transform_from_gda_snowflake
3607 let datediff = Expression::Function(Box::new(Function::new(
3608 "DATEDIFF".to_string(),
3609 vec![
3610 Expression::boxed_column(Column {
3611 name: Identifier::new(&unit_str),
3612 table: None,
3613 join_mark: false,
3614 trailing_comments: vec![],
3615 span: None,
3616 inferred_type: None,
3617 }),
3618 start_expr.clone(),
3619 end_expr.clone(),
3620 ],
3621 )));
3622 // DATEDIFF(...) + 1
3623 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
3624 left: datediff,
3625 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3626 left_comments: vec![],
3627 operator_comments: vec![],
3628 trailing_comments: vec![],
3629 inferred_type: None,
3630 }));
3631
3632 let array_gen_range = Expression::Function(Box::new(Function::new(
3633 "ARRAY_GENERATE_RANGE".to_string(),
3634 vec![
3635 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
3636 datediff_plus_one,
3637 ],
3638 )));
3639
3640 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
3641 name: Identifier::new("INPUT"),
3642 value: array_gen_range,
3643 separator: crate::expressions::NamedArgSeparator::DArrow,
3644 }));
3645 let flatten = Expression::Function(Box::new(Function::new(
3646 "FLATTEN".to_string(),
3647 vec![flatten_input],
3648 )));
3649
3650 let table_func =
3651 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
3652 let flatten_aliased = Expression::Alias(Box::new(Alias {
3653 this: table_func,
3654 alias: Identifier::new("_t0"),
3655 column_aliases: vec![
3656 Identifier::new("seq"),
3657 Identifier::new("key"),
3658 Identifier::new("path"),
3659 Identifier::new("index"),
3660 Identifier::new(col_name),
3661 Identifier::new("this"),
3662 ],
3663 pre_alias_comments: vec![],
3664 trailing_comments: vec![],
3665 inferred_type: None,
3666 }));
3667
3668 let dateadd_expr = Expression::Function(Box::new(Function::new(
3669 "DATEADD".to_string(),
3670 vec![
3671 Expression::boxed_column(Column {
3672 name: Identifier::new(&unit_str),
3673 table: None,
3674 join_mark: false,
3675 trailing_comments: vec![],
3676 span: None,
3677 inferred_type: None,
3678 }),
3679 Expression::Cast(Box::new(Cast {
3680 this: Expression::boxed_column(Column {
3681 name: Identifier::new(col_name),
3682 table: None,
3683 join_mark: false,
3684 trailing_comments: vec![],
3685 span: None,
3686 inferred_type: None,
3687 }),
3688 to: DataType::Int {
3689 length: None,
3690 integer_spelling: false,
3691 },
3692 trailing_comments: vec![],
3693 double_colon_syntax: false,
3694 format: None,
3695 default: None,
3696 inferred_type: None,
3697 })),
3698 start_expr.clone(),
3699 ],
3700 )));
3701 let dateadd_aliased = Expression::Alias(Box::new(Alias {
3702 this: dateadd_expr,
3703 alias: Identifier::new(col_name),
3704 column_aliases: vec![],
3705 pre_alias_comments: vec![],
3706 trailing_comments: vec![],
3707 inferred_type: None,
3708 }));
3709
3710 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
3711 let mut inner_select = Select::new();
3712 inner_select.expressions = vec![dateadd_aliased];
3713 inner_select.from = Some(From {
3714 expressions: vec![flatten_aliased],
3715 });
3716
3717 // Wrap in subquery for the inner part
3718 let inner_subquery = Expression::Subquery(Box::new(Subquery {
3719 this: Expression::Select(Box::new(inner_select)),
3720 alias: None,
3721 column_aliases: vec![],
3722 order_by: None,
3723 limit: None,
3724 offset: None,
3725 distribute_by: None,
3726 sort_by: None,
3727 cluster_by: None,
3728 lateral: false,
3729 modifiers_inside: false,
3730 trailing_comments: vec![],
3731 inferred_type: None,
3732 }));
3733
3734 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
3735 let star = Expression::Star(Star {
3736 table: None,
3737 except: None,
3738 replace: None,
3739 rename: None,
3740 trailing_comments: vec![],
3741 span: None,
3742 });
3743 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
3744 this: star,
3745 distinct: false,
3746 filter: None,
3747 order_by: vec![],
3748 name: Some("ARRAY_AGG".to_string()),
3749 ignore_nulls: None,
3750 having_max: None,
3751 limit: None,
3752 inferred_type: None,
3753 }));
3754
3755 let mut outer_select = Select::new();
3756 outer_select.expressions = vec![array_agg];
3757 outer_select.from = Some(From {
3758 expressions: vec![inner_subquery],
3759 });
3760
3761 // Wrap in a subquery
3762 let outer_subquery = Expression::Subquery(Box::new(Subquery {
3763 this: Expression::Select(Box::new(outer_select)),
3764 alias: None,
3765 column_aliases: vec![],
3766 order_by: None,
3767 limit: None,
3768 offset: None,
3769 distribute_by: None,
3770 sort_by: None,
3771 cluster_by: None,
3772 lateral: false,
3773 modifiers_inside: false,
3774 trailing_comments: vec![],
3775 inferred_type: None,
3776 }));
3777
3778 // ARRAY_SIZE(subquery)
3779 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
3780 outer_subquery,
3781 ))))
3782 }
3783
3784 /// Extract interval unit string from an optional step expression.
3785 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
3786 use crate::expressions::*;
3787 if let Some(Expression::Interval(ref iv)) = step {
3788 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
3789 return Some(format!("{:?}", unit).to_ascii_uppercase());
3790 }
3791 if let Some(ref this) = iv.this {
3792 if let Expression::Literal(lit) = this {
3793 if let Literal::String(ref s) = lit.as_ref() {
3794 let parts: Vec<&str> = s.split_whitespace().collect();
3795 if parts.len() == 2 {
3796 return Some(parts[1].to_ascii_uppercase());
3797 } else if parts.len() == 1 {
3798 let upper = parts[0].to_ascii_uppercase();
3799 if matches!(
3800 upper.as_str(),
3801 "YEAR"
3802 | "QUARTER"
3803 | "MONTH"
3804 | "WEEK"
3805 | "DAY"
3806 | "HOUR"
3807 | "MINUTE"
3808 | "SECOND"
3809 ) {
3810 return Some(upper);
3811 }
3812 }
3813 }
3814 }
3815 }
3816 }
3817 // Default to DAY if no step or no interval
3818 if step.is_none() {
3819 return Some("DAY".to_string());
3820 }
3821 None
3822 }
3823
3824 fn normalize_snowflake_pretty(mut sql: String) -> String {
3825 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
3826 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
3827 {
3828 sql = sql.replace(
3829 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
3830 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
3831 );
3832
3833 sql = sql.replace(
3834 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
3835 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
3836 );
3837
3838 sql = sql.replace(
3839 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
3840 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
3841 );
3842 }
3843
3844 sql
3845 }
3846
3847 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
3848 /// This handles cases where the same syntax has different semantics across dialects.
3849 fn cross_dialect_normalize(
3850 expr: Expression,
3851 source: DialectType,
3852 target: DialectType,
3853 ) -> Result<Expression> {
3854 use crate::expressions::{
3855 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
3856 Function, Identifier, IsNull, Literal, Null, Paren,
3857 };
3858
3859 // Helper to tag which kind of transform to apply
3860 #[derive(Debug)]
3861 enum Action {
3862 None,
3863 GreatestLeastNull,
3864 ArrayGenerateRange,
3865 Div0TypedDivision,
3866 ArrayAggCollectList,
3867 ArrayAggWithinGroupFilter,
3868 ArrayAggFilter,
3869 CastTimestampToDatetime,
3870 DateTruncWrapCast,
3871 ToDateToCast,
3872 ConvertTimezoneToExpr,
3873 SetToVariable,
3874 RegexpReplaceSnowflakeToDuckDB,
3875 BigQueryFunctionNormalize,
3876 BigQuerySafeDivide,
3877 BigQueryCastType,
3878 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
3879 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
3880 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
3881 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
3882 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
3883 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
3884 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
3885 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
3886 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target (partial match)
3887 EpochConvert, // Expression::Epoch -> target-specific epoch function
3888 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
3889 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
3890 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
3891 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
3892 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
3893 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
3894 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
3895 TempTableHash, // TSQL #table -> temp table normalization
3896 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
3897 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
3898 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
3899 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
3900 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
3901 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
3902 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3903 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
3904 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
3905 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
3906 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
3907 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
3908 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
3909 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
3910 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
3911 ArrayAggToGroupConcat, // ARRAY_AGG(x) -> GROUP_CONCAT(x) for MySQL-like targets
3912 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
3913 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
3914 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
3915 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
3916 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
3917 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
3918 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
3919 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
3920 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
3921 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
3922 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
3923 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
3924 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
3925 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
3926 DollarParamConvert, // $foo -> @foo for BigQuery
3927 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
3928 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
3929 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
3930 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
3931 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
3932 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
3933 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
3934 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
3935 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
3936 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
3937 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
3938 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
3939 RespectNullsConvert, // RESPECT NULLS window function handling
3940 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
3941 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
3942 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
3943 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
3944 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
3945 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
3946 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
3947 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
3948 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
3949 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
3950 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
3951 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
3952 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
3953 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
3954 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
3955 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
3956 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
3957 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
3958 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
3959 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
3960 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
3961 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
3962 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
3963 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
3964 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
3965 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
3966 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
3967 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
3968 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
3969 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
3970 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
3971 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
3972 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
3973 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
3974 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
3975 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
3976 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
3977 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
3978 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
3979 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
3980 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
3981 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
3982 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
3983 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
3984 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
3985 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
3986 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
3987 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
3988 DecodeSimplify, // DECODE with null-safe -> simple = comparison
3989 ArraySumConvert, // ARRAY_SUM -> target-specific
3990 ArraySizeConvert, // ARRAY_SIZE -> target-specific
3991 ArrayAnyConvert, // ARRAY_ANY -> target-specific
3992 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
3993 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
3994 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
3995 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
3996 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
3997 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
3998 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
3999 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
4000 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
4001 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
4002 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
4003 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
4004 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
4005 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
4006 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
4007 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
4008 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
4009 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
4010 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
4011 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
4012 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
4013 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
4014 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
4015 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
4016 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
4017 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
4018 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
4019 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
4020 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
4021 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
4022 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
4023 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
4024 RegexpSubstrSnowflakeToDuckDB, // REGEXP_SUBSTR(s, p, ...) -> REGEXP_EXTRACT variants for DuckDB
4025 RegexpSubstrSnowflakeIdentity, // REGEXP_SUBSTR/REGEXP_SUBSTR_ALL strip trailing group=0 for Snowflake identity
4026 RegexpSubstrAllSnowflakeToDuckDB, // REGEXP_SUBSTR_ALL(s, p, ...) -> REGEXP_EXTRACT_ALL variants for DuckDB
4027 RegexpCountSnowflakeToDuckDB, // REGEXP_COUNT(s, p, ...) -> LENGTH(REGEXP_EXTRACT_ALL(...)) for DuckDB
4028 RegexpInstrSnowflakeToDuckDB, // REGEXP_INSTR(s, p, ...) -> complex CASE expression for DuckDB
4029 RegexpReplacePositionSnowflakeToDuckDB, // REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
4030 RlikeSnowflakeToDuckDB, // RLIKE(a, b[, flags]) -> REGEXP_MATCHES(a, anchored_pattern) for DuckDB
4031 RegexpExtractAllToSnowflake, // BigQuery REGEXP_EXTRACT_ALL -> REGEXP_SUBSTR_ALL for Snowflake
4032 ArrayExceptConvert, // ARRAY_EXCEPT -> DuckDB complex CASE / Snowflake ARRAY_EXCEPT / Presto ARRAY_EXCEPT
4033 ArrayPositionSnowflakeSwap, // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
4034 RegexpLikeExasolAnchor, // RegexpLike -> Exasol REGEXP_LIKE with .*pattern.* anchoring
4035 ArrayDistinctConvert, // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
4036 ArrayDistinctClickHouse, // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
4037 ArrayContainsDuckDBConvert, // ARRAY_CONTAINS -> DuckDB CASE with NULL-aware check
4038 SnowflakeWindowFrameStrip, // Strip default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for Snowflake target
4039 SnowflakeWindowFrameAdd, // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for non-Snowflake target
4040 SnowflakeArrayPositionToDuckDB, // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB
4041 }
4042
4043 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
4044 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
4045 Self::transform_select_into(expr, source, target)
4046 } else {
4047 expr
4048 };
4049
4050 // Strip OFFSET ROWS for non-TSQL/Oracle targets
4051 let expr = if !matches!(
4052 target,
4053 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
4054 ) {
4055 if let Expression::Select(mut select) = expr {
4056 if let Some(ref mut offset) = select.offset {
4057 offset.rows = None;
4058 }
4059 Expression::Select(select)
4060 } else {
4061 expr
4062 }
4063 } else {
4064 expr
4065 };
4066
4067 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
4068 let expr = if matches!(target, DialectType::Oracle) {
4069 if let Expression::Select(mut select) = expr {
4070 if let Some(limit) = select.limit.take() {
4071 // Convert LIMIT to FETCH FIRST n ROWS ONLY
4072 select.fetch = Some(crate::expressions::Fetch {
4073 direction: "FIRST".to_string(),
4074 count: Some(limit.this),
4075 percent: false,
4076 rows: true,
4077 with_ties: false,
4078 });
4079 }
4080 // Add ROWS to OFFSET if present
4081 if let Some(ref mut offset) = select.offset {
4082 offset.rows = Some(true);
4083 }
4084 Expression::Select(select)
4085 } else {
4086 expr
4087 }
4088 } else {
4089 expr
4090 };
4091
4092 // Handle CreateTable WITH properties transformation before recursive transforms
4093 let expr = if let Expression::CreateTable(mut ct) = expr {
4094 Self::transform_create_table_properties(&mut ct, source, target);
4095
4096 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
4097 // When the PARTITIONED BY clause contains column definitions, merge them into the
4098 // main column list and adjust the PARTITIONED BY clause for the target dialect.
4099 if matches!(
4100 source,
4101 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4102 ) {
4103 let mut partition_col_names: Vec<String> = Vec::new();
4104 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
4105 let mut has_col_def_partitions = false;
4106
4107 // Check if any PARTITIONED BY property contains ColumnDef expressions
4108 for prop in &ct.properties {
4109 if let Expression::PartitionedByProperty(ref pbp) = prop {
4110 if let Expression::Tuple(ref tuple) = *pbp.this {
4111 for expr in &tuple.expressions {
4112 if let Expression::ColumnDef(ref cd) = expr {
4113 has_col_def_partitions = true;
4114 partition_col_names.push(cd.name.name.clone());
4115 partition_col_defs.push(*cd.clone());
4116 }
4117 }
4118 }
4119 }
4120 }
4121
4122 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
4123 // Merge partition columns into main column list
4124 for cd in partition_col_defs {
4125 ct.columns.push(cd);
4126 }
4127
4128 // Replace PARTITIONED BY property with column-name-only version
4129 ct.properties
4130 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
4131
4132 if matches!(
4133 target,
4134 DialectType::Presto | DialectType::Trino | DialectType::Athena
4135 ) {
4136 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
4137 let array_elements: Vec<String> = partition_col_names
4138 .iter()
4139 .map(|n| format!("'{}'", n))
4140 .collect();
4141 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
4142 ct.with_properties
4143 .push(("PARTITIONED_BY".to_string(), array_value));
4144 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
4145 // Spark: PARTITIONED BY (y, z) - just column names
4146 let name_exprs: Vec<Expression> = partition_col_names
4147 .iter()
4148 .map(|n| {
4149 Expression::Column(Box::new(crate::expressions::Column {
4150 name: crate::expressions::Identifier::new(n.clone()),
4151 table: None,
4152 join_mark: false,
4153 trailing_comments: Vec::new(),
4154 span: None,
4155 inferred_type: None,
4156 }))
4157 })
4158 .collect();
4159 ct.properties.insert(
4160 0,
4161 Expression::PartitionedByProperty(Box::new(
4162 crate::expressions::PartitionedByProperty {
4163 this: Box::new(Expression::Tuple(Box::new(
4164 crate::expressions::Tuple {
4165 expressions: name_exprs,
4166 },
4167 ))),
4168 },
4169 )),
4170 );
4171 }
4172 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
4173 }
4174
4175 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
4176 // are handled by transform_create_table_properties which runs first
4177 }
4178
4179 // Strip LOCATION property for Presto/Trino (not supported)
4180 if matches!(
4181 target,
4182 DialectType::Presto | DialectType::Trino | DialectType::Athena
4183 ) {
4184 ct.properties
4185 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
4186 }
4187
4188 // Strip table-level constraints for Spark/Hive/Databricks
4189 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
4190 if matches!(
4191 target,
4192 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4193 ) {
4194 ct.constraints.retain(|c| {
4195 matches!(
4196 c,
4197 crate::expressions::TableConstraint::PrimaryKey { .. }
4198 | crate::expressions::TableConstraint::Like { .. }
4199 )
4200 });
4201 for constraint in &mut ct.constraints {
4202 if let crate::expressions::TableConstraint::PrimaryKey {
4203 columns,
4204 modifiers,
4205 ..
4206 } = constraint
4207 {
4208 // Strip ASC/DESC from column names
4209 for col in columns.iter_mut() {
4210 if col.name.ends_with(" ASC") {
4211 col.name = col.name[..col.name.len() - 4].to_string();
4212 } else if col.name.ends_with(" DESC") {
4213 col.name = col.name[..col.name.len() - 5].to_string();
4214 }
4215 }
4216 // Strip TSQL-specific modifiers
4217 modifiers.clustered = None;
4218 modifiers.with_options.clear();
4219 modifiers.on_filegroup = None;
4220 }
4221 }
4222 }
4223
4224 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
4225 if matches!(target, DialectType::Databricks) {
4226 for col in &mut ct.columns {
4227 if col.auto_increment {
4228 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
4229 col.data_type = crate::expressions::DataType::BigInt { length: None };
4230 }
4231 }
4232 }
4233 }
4234
4235 // Spark/Databricks: INTEGER -> INT in column definitions
4236 // Python sqlglot always outputs INT for Spark/Databricks
4237 if matches!(target, DialectType::Spark | DialectType::Databricks) {
4238 for col in &mut ct.columns {
4239 if let crate::expressions::DataType::Int {
4240 integer_spelling, ..
4241 } = &mut col.data_type
4242 {
4243 *integer_spelling = false;
4244 }
4245 }
4246 }
4247
4248 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
4249 if matches!(target, DialectType::Hive | DialectType::Spark) {
4250 for col in &mut ct.columns {
4251 // If nullable is explicitly true (NULL), change to None (omit it)
4252 if col.nullable == Some(true) {
4253 col.nullable = None;
4254 }
4255 // Also remove from constraints if stored there
4256 col.constraints
4257 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
4258 }
4259 }
4260
4261 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
4262 if ct.on_property.is_some()
4263 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4264 {
4265 ct.on_property = None;
4266 }
4267
4268 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
4269 // Snowflake doesn't support typed arrays in DDL
4270 if matches!(target, DialectType::Snowflake) {
4271 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
4272 if let crate::expressions::DataType::Array { .. } = dt {
4273 *dt = crate::expressions::DataType::Custom {
4274 name: "ARRAY".to_string(),
4275 };
4276 }
4277 }
4278 for col in &mut ct.columns {
4279 strip_array_type_params(&mut col.data_type);
4280 }
4281 }
4282
4283 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
4284 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
4285 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
4286 if matches!(target, DialectType::PostgreSQL) {
4287 for col in &mut ct.columns {
4288 if col.auto_increment && !col.constraint_order.is_empty() {
4289 use crate::expressions::ConstraintType;
4290 let has_explicit_not_null = col
4291 .constraint_order
4292 .iter()
4293 .any(|ct| *ct == ConstraintType::NotNull);
4294
4295 if has_explicit_not_null {
4296 // Source had explicit NOT NULL - preserve original order
4297 // Just ensure nullable is set
4298 if col.nullable != Some(false) {
4299 col.nullable = Some(false);
4300 }
4301 } else {
4302 // Source didn't have explicit NOT NULL - build order with
4303 // AutoIncrement + NotNull first, then remaining constraints
4304 let mut new_order = Vec::new();
4305 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
4306 new_order.push(ConstraintType::AutoIncrement);
4307 new_order.push(ConstraintType::NotNull);
4308 // Add remaining constraints in original order (except AutoIncrement)
4309 for ct_type in &col.constraint_order {
4310 if *ct_type != ConstraintType::AutoIncrement {
4311 new_order.push(ct_type.clone());
4312 }
4313 }
4314 col.constraint_order = new_order;
4315 col.nullable = Some(false);
4316 }
4317 }
4318 }
4319 }
4320
4321 Expression::CreateTable(ct)
4322 } else {
4323 expr
4324 };
4325
4326 // Handle CreateView column stripping for Presto/Trino target
4327 let expr = if let Expression::CreateView(mut cv) = expr {
4328 // Presto/Trino: drop column list when view has a SELECT body
4329 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
4330 {
4331 if !matches!(&cv.query, Expression::Null(_)) {
4332 cv.columns.clear();
4333 }
4334 }
4335 Expression::CreateView(cv)
4336 } else {
4337 expr
4338 };
4339
4340 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
4341 let expr = if !matches!(
4342 target,
4343 DialectType::Presto | DialectType::Trino | DialectType::Athena
4344 ) {
4345 if let Expression::Select(mut select) = expr {
4346 if let Some(ref mut with) = select.with {
4347 for cte in &mut with.ctes {
4348 if let Expression::Values(ref vals) = cte.this {
4349 // Build: SELECT * FROM (VALUES ...) AS _values
4350 let values_subquery =
4351 Expression::Subquery(Box::new(crate::expressions::Subquery {
4352 this: Expression::Values(vals.clone()),
4353 alias: Some(Identifier::new("_values".to_string())),
4354 column_aliases: Vec::new(),
4355 order_by: None,
4356 limit: None,
4357 offset: None,
4358 distribute_by: None,
4359 sort_by: None,
4360 cluster_by: None,
4361 lateral: false,
4362 modifiers_inside: false,
4363 trailing_comments: Vec::new(),
4364 inferred_type: None,
4365 }));
4366 let mut new_select = crate::expressions::Select::new();
4367 new_select.expressions =
4368 vec![Expression::Star(crate::expressions::Star {
4369 table: None,
4370 except: None,
4371 replace: None,
4372 rename: None,
4373 trailing_comments: Vec::new(),
4374 span: None,
4375 })];
4376 new_select.from = Some(crate::expressions::From {
4377 expressions: vec![values_subquery],
4378 });
4379 cte.this = Expression::Select(Box::new(new_select));
4380 }
4381 }
4382 }
4383 Expression::Select(select)
4384 } else {
4385 expr
4386 }
4387 } else {
4388 expr
4389 };
4390
4391 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
4392 let expr = if matches!(target, DialectType::PostgreSQL) {
4393 if let Expression::CreateIndex(mut ci) = expr {
4394 for col in &mut ci.columns {
4395 if col.nulls_first.is_none() {
4396 col.nulls_first = Some(true);
4397 }
4398 }
4399 Expression::CreateIndex(ci)
4400 } else {
4401 expr
4402 }
4403 } else {
4404 expr
4405 };
4406
4407 transform_recursive(expr, &|e| {
4408 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
4409 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
4410 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4411 if let Expression::Cast(ref c) = e {
4412 // Check if this is a CAST of an array to a struct array type
4413 let is_struct_array_cast =
4414 matches!(&c.to, crate::expressions::DataType::Array { .. });
4415 if is_struct_array_cast {
4416 let has_auto_named_structs = match &c.this {
4417 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
4418 if let Expression::Struct(s) = elem {
4419 s.fields.iter().all(|(name, _)| {
4420 name.as_ref().map_or(true, |n| {
4421 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4422 })
4423 })
4424 } else {
4425 false
4426 }
4427 }),
4428 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
4429 if let Expression::Struct(s) = elem {
4430 s.fields.iter().all(|(name, _)| {
4431 name.as_ref().map_or(true, |n| {
4432 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
4433 })
4434 })
4435 } else {
4436 false
4437 }
4438 }),
4439 _ => false,
4440 };
4441 if has_auto_named_structs {
4442 let convert_struct_to_row = |elem: Expression| -> Expression {
4443 if let Expression::Struct(s) = elem {
4444 let row_args: Vec<Expression> =
4445 s.fields.into_iter().map(|(_, v)| v).collect();
4446 Expression::Function(Box::new(Function::new(
4447 "ROW".to_string(),
4448 row_args,
4449 )))
4450 } else {
4451 elem
4452 }
4453 };
4454 let mut c_clone = c.as_ref().clone();
4455 match &mut c_clone.this {
4456 Expression::Array(arr) => {
4457 arr.expressions = arr
4458 .expressions
4459 .drain(..)
4460 .map(convert_struct_to_row)
4461 .collect();
4462 }
4463 Expression::ArrayFunc(arr) => {
4464 arr.expressions = arr
4465 .expressions
4466 .drain(..)
4467 .map(convert_struct_to_row)
4468 .collect();
4469 }
4470 _ => {}
4471 }
4472 return Ok(Expression::Cast(Box::new(c_clone)));
4473 }
4474 }
4475 }
4476 }
4477
4478 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
4479 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4480 if let Expression::Select(ref sel) = e {
4481 if sel.kind.as_deref() == Some("STRUCT") {
4482 let mut fields = Vec::new();
4483 for expr in &sel.expressions {
4484 match expr {
4485 Expression::Alias(a) => {
4486 fields.push((Some(a.alias.name.clone()), a.this.clone()));
4487 }
4488 Expression::Column(c) => {
4489 fields.push((Some(c.name.name.clone()), expr.clone()));
4490 }
4491 _ => {
4492 fields.push((None, expr.clone()));
4493 }
4494 }
4495 }
4496 let struct_lit =
4497 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
4498 let mut new_select = sel.as_ref().clone();
4499 new_select.kind = None;
4500 new_select.expressions = vec![struct_lit];
4501 return Ok(Expression::Select(Box::new(new_select)));
4502 }
4503 }
4504 }
4505
4506 // Convert @variable -> ${variable} for Spark/Hive/Databricks
4507 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4508 && matches!(
4509 target,
4510 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4511 )
4512 {
4513 if let Expression::Parameter(ref p) = e {
4514 if p.style == crate::expressions::ParameterStyle::At {
4515 if let Some(ref name) = p.name {
4516 return Ok(Expression::Parameter(Box::new(
4517 crate::expressions::Parameter {
4518 name: Some(name.clone()),
4519 index: p.index,
4520 style: crate::expressions::ParameterStyle::DollarBrace,
4521 quoted: p.quoted,
4522 string_quoted: p.string_quoted,
4523 expression: None,
4524 },
4525 )));
4526 }
4527 }
4528 }
4529 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
4530 if let Expression::Column(ref col) = e {
4531 if col.name.name.starts_with('@') && col.table.is_none() {
4532 let var_name = col.name.name.trim_start_matches('@').to_string();
4533 return Ok(Expression::Parameter(Box::new(
4534 crate::expressions::Parameter {
4535 name: Some(var_name),
4536 index: None,
4537 style: crate::expressions::ParameterStyle::DollarBrace,
4538 quoted: false,
4539 string_quoted: false,
4540 expression: None,
4541 },
4542 )));
4543 }
4544 }
4545 }
4546
4547 // Convert @variable -> variable in SET statements for Spark/Databricks
4548 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4549 && matches!(target, DialectType::Spark | DialectType::Databricks)
4550 {
4551 if let Expression::SetStatement(ref s) = e {
4552 let mut new_items = s.items.clone();
4553 let mut changed = false;
4554 for item in &mut new_items {
4555 // Strip @ from the SET name (Parameter style)
4556 if let Expression::Parameter(ref p) = item.name {
4557 if p.style == crate::expressions::ParameterStyle::At {
4558 if let Some(ref name) = p.name {
4559 item.name = Expression::Identifier(Identifier::new(name));
4560 changed = true;
4561 }
4562 }
4563 }
4564 // Strip @ from the SET name (Identifier style - SET parser)
4565 if let Expression::Identifier(ref id) = item.name {
4566 if id.name.starts_with('@') {
4567 let var_name = id.name.trim_start_matches('@').to_string();
4568 item.name = Expression::Identifier(Identifier::new(&var_name));
4569 changed = true;
4570 }
4571 }
4572 // Strip @ from the SET name (Column style - alternative parsing)
4573 if let Expression::Column(ref col) = item.name {
4574 if col.name.name.starts_with('@') && col.table.is_none() {
4575 let var_name = col.name.name.trim_start_matches('@').to_string();
4576 item.name = Expression::Identifier(Identifier::new(&var_name));
4577 changed = true;
4578 }
4579 }
4580 }
4581 if changed {
4582 let mut new_set = (**s).clone();
4583 new_set.items = new_items;
4584 return Ok(Expression::SetStatement(Box::new(new_set)));
4585 }
4586 }
4587 }
4588
4589 // Strip NOLOCK hint for non-TSQL targets
4590 if matches!(source, DialectType::TSQL | DialectType::Fabric)
4591 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4592 {
4593 if let Expression::Table(ref tr) = e {
4594 if !tr.hints.is_empty() {
4595 let mut new_tr = tr.clone();
4596 new_tr.hints.clear();
4597 return Ok(Expression::Table(new_tr));
4598 }
4599 }
4600 }
4601
4602 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
4603 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
4604 if matches!(target, DialectType::Snowflake) {
4605 if let Expression::IsTrue(ref itf) = e {
4606 if let Expression::Boolean(ref b) = itf.this {
4607 if !itf.not {
4608 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4609 value: b.value,
4610 }));
4611 } else {
4612 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4613 value: !b.value,
4614 }));
4615 }
4616 }
4617 }
4618 if let Expression::IsFalse(ref itf) = e {
4619 if let Expression::Boolean(ref b) = itf.this {
4620 if !itf.not {
4621 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4622 value: !b.value,
4623 }));
4624 } else {
4625 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
4626 value: b.value,
4627 }));
4628 }
4629 }
4630 }
4631 }
4632
4633 // BigQuery: split dotted backtick identifiers in table names
4634 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
4635 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4636 if let Expression::CreateTable(ref ct) = e {
4637 let mut changed = false;
4638 let mut new_ct = ct.clone();
4639 // Split the table name
4640 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
4641 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
4642 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
4643 let was_quoted = ct.name.name.quoted;
4644 let mk_id = |s: &str| {
4645 if was_quoted {
4646 Identifier::quoted(s)
4647 } else {
4648 Identifier::new(s)
4649 }
4650 };
4651 if parts.len() == 3 {
4652 new_ct.name.catalog = Some(mk_id(parts[0]));
4653 new_ct.name.schema = Some(mk_id(parts[1]));
4654 new_ct.name.name = mk_id(parts[2]);
4655 changed = true;
4656 } else if parts.len() == 2 {
4657 new_ct.name.schema = Some(mk_id(parts[0]));
4658 new_ct.name.name = mk_id(parts[1]);
4659 changed = true;
4660 }
4661 }
4662 // Split the clone source name
4663 if let Some(ref clone_src) = ct.clone_source {
4664 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
4665 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
4666 let was_quoted = clone_src.name.quoted;
4667 let mk_id = |s: &str| {
4668 if was_quoted {
4669 Identifier::quoted(s)
4670 } else {
4671 Identifier::new(s)
4672 }
4673 };
4674 let mut new_src = clone_src.clone();
4675 if parts.len() == 3 {
4676 new_src.catalog = Some(mk_id(parts[0]));
4677 new_src.schema = Some(mk_id(parts[1]));
4678 new_src.name = mk_id(parts[2]);
4679 new_ct.clone_source = Some(new_src);
4680 changed = true;
4681 } else if parts.len() == 2 {
4682 new_src.schema = Some(mk_id(parts[0]));
4683 new_src.name = mk_id(parts[1]);
4684 new_ct.clone_source = Some(new_src);
4685 changed = true;
4686 }
4687 }
4688 }
4689 if changed {
4690 return Ok(Expression::CreateTable(new_ct));
4691 }
4692 }
4693 }
4694
4695 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
4696 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
4697 if matches!(source, DialectType::BigQuery)
4698 && matches!(
4699 target,
4700 DialectType::DuckDB
4701 | DialectType::Presto
4702 | DialectType::Trino
4703 | DialectType::Athena
4704 )
4705 {
4706 if let Expression::Subscript(ref sub) = e {
4707 let (new_index, is_safe) = match &sub.index {
4708 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
4709 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
4710 let Literal::Number(n) = lit.as_ref() else { unreachable!() };
4711 if let Ok(val) = n.parse::<i64>() {
4712 (
4713 Some(Expression::Literal(Box::new(Literal::Number(
4714 (val + 1).to_string(),
4715 )))),
4716 false,
4717 )
4718 } else {
4719 (None, false)
4720 }
4721 }
4722 // OFFSET(n) -> n+1 (0-based)
4723 Expression::Function(ref f)
4724 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
4725 {
4726 if let Expression::Literal(lit) = &f.args[0] {
4727 if let Literal::Number(n) = lit.as_ref() {
4728 if let Ok(val) = n.parse::<i64>() {
4729 (
4730 Some(Expression::Literal(Box::new(Literal::Number(
4731 (val + 1).to_string(),
4732 )))),
4733 false,
4734 )
4735 } else {
4736 (
4737 Some(Expression::Add(Box::new(
4738 crate::expressions::BinaryOp::new(
4739 f.args[0].clone(),
4740 Expression::number(1),
4741 ),
4742 ))),
4743 false,
4744 )
4745 }
4746 } else { (None, false) }
4747 } else {
4748 (
4749 Some(Expression::Add(Box::new(
4750 crate::expressions::BinaryOp::new(
4751 f.args[0].clone(),
4752 Expression::number(1),
4753 ),
4754 ))),
4755 false,
4756 )
4757 }
4758 }
4759 // ORDINAL(n) -> n (already 1-based)
4760 Expression::Function(ref f)
4761 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
4762 {
4763 (Some(f.args[0].clone()), false)
4764 }
4765 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
4766 Expression::Function(ref f)
4767 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
4768 {
4769 if let Expression::Literal(lit) = &f.args[0] {
4770 if let Literal::Number(n) = lit.as_ref() {
4771 if let Ok(val) = n.parse::<i64>() {
4772 (
4773 Some(Expression::Literal(Box::new(Literal::Number(
4774 (val + 1).to_string(),
4775 )))),
4776 true,
4777 )
4778 } else {
4779 (
4780 Some(Expression::Add(Box::new(
4781 crate::expressions::BinaryOp::new(
4782 f.args[0].clone(),
4783 Expression::number(1),
4784 ),
4785 ))),
4786 true,
4787 )
4788 }
4789 } else { (None, false) }
4790 } else {
4791 (
4792 Some(Expression::Add(Box::new(
4793 crate::expressions::BinaryOp::new(
4794 f.args[0].clone(),
4795 Expression::number(1),
4796 ),
4797 ))),
4798 true,
4799 )
4800 }
4801 }
4802 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
4803 Expression::Function(ref f)
4804 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
4805 {
4806 (Some(f.args[0].clone()), true)
4807 }
4808 _ => (None, false),
4809 };
4810 if let Some(idx) = new_index {
4811 if is_safe
4812 && matches!(
4813 target,
4814 DialectType::Presto | DialectType::Trino | DialectType::Athena
4815 )
4816 {
4817 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
4818 return Ok(Expression::Function(Box::new(Function::new(
4819 "ELEMENT_AT".to_string(),
4820 vec![sub.this.clone(), idx],
4821 ))));
4822 } else {
4823 // DuckDB or non-safe: just use subscript with converted index
4824 return Ok(Expression::Subscript(Box::new(
4825 crate::expressions::Subscript {
4826 this: sub.this.clone(),
4827 index: idx,
4828 },
4829 )));
4830 }
4831 }
4832 }
4833 }
4834
4835 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
4836 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
4837 if let Expression::Length(ref uf) = e {
4838 let arg = uf.this.clone();
4839 let typeof_func = Expression::Function(Box::new(Function::new(
4840 "TYPEOF".to_string(),
4841 vec![arg.clone()],
4842 )));
4843 let blob_cast = Expression::Cast(Box::new(Cast {
4844 this: arg.clone(),
4845 to: DataType::VarBinary { length: None },
4846 trailing_comments: vec![],
4847 double_colon_syntax: false,
4848 format: None,
4849 default: None,
4850 inferred_type: None,
4851 }));
4852 let octet_length = Expression::Function(Box::new(Function::new(
4853 "OCTET_LENGTH".to_string(),
4854 vec![blob_cast],
4855 )));
4856 let text_cast = Expression::Cast(Box::new(Cast {
4857 this: arg,
4858 to: DataType::Text,
4859 trailing_comments: vec![],
4860 double_colon_syntax: false,
4861 format: None,
4862 default: None,
4863 inferred_type: None,
4864 }));
4865 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
4866 this: text_cast,
4867 original_name: None,
4868 inferred_type: None,
4869 }));
4870 return Ok(Expression::Case(Box::new(Case {
4871 operand: Some(typeof_func),
4872 whens: vec![(
4873 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
4874 octet_length,
4875 )],
4876 else_: Some(length_text),
4877 comments: Vec::new(),
4878 inferred_type: None,
4879 })));
4880 }
4881 }
4882
4883 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
4884 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
4885 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
4886 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
4887 if let Expression::Alias(ref a) = e {
4888 if matches!(&a.this, Expression::Unnest(_)) {
4889 if a.column_aliases.is_empty() {
4890 // Drop the entire alias, return just the UNNEST expression
4891 return Ok(a.this.clone());
4892 } else {
4893 // Use first column alias as the main alias
4894 let mut new_alias = a.as_ref().clone();
4895 new_alias.alias = a.column_aliases[0].clone();
4896 new_alias.column_aliases.clear();
4897 return Ok(Expression::Alias(Box::new(new_alias)));
4898 }
4899 }
4900 }
4901 }
4902
4903 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
4904 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
4905 if let Expression::In(ref in_expr) = e {
4906 if let Some(ref unnest_inner) = in_expr.unnest {
4907 // Build the function call for the target dialect
4908 let func_expr = if matches!(
4909 target,
4910 DialectType::Hive | DialectType::Spark | DialectType::Databricks
4911 ) {
4912 // Use EXPLODE for Hive/Spark
4913 Expression::Function(Box::new(Function::new(
4914 "EXPLODE".to_string(),
4915 vec![*unnest_inner.clone()],
4916 )))
4917 } else {
4918 // Use UNNEST for Presto/Trino/DuckDB/etc.
4919 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
4920 this: *unnest_inner.clone(),
4921 expressions: Vec::new(),
4922 with_ordinality: false,
4923 alias: None,
4924 offset_alias: None,
4925 }))
4926 };
4927
4928 // Wrap in SELECT
4929 let mut inner_select = crate::expressions::Select::new();
4930 inner_select.expressions = vec![func_expr];
4931
4932 let subquery_expr = Expression::Select(Box::new(inner_select));
4933
4934 return Ok(Expression::In(Box::new(crate::expressions::In {
4935 this: in_expr.this.clone(),
4936 expressions: Vec::new(),
4937 query: Some(subquery_expr),
4938 not: in_expr.not,
4939 global: in_expr.global,
4940 unnest: None,
4941 is_field: false,
4942 })));
4943 }
4944 }
4945 }
4946
4947 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
4948 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
4949 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
4950 if let Expression::Alias(ref a) = e {
4951 if let Expression::Function(ref f) = a.this {
4952 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
4953 && !a.column_aliases.is_empty()
4954 {
4955 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
4956 let col_alias = a.column_aliases[0].clone();
4957 let mut inner_select = crate::expressions::Select::new();
4958 inner_select.expressions =
4959 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
4960 Expression::Identifier(Identifier::new("value".to_string())),
4961 col_alias,
4962 )))];
4963 inner_select.from = Some(crate::expressions::From {
4964 expressions: vec![a.this.clone()],
4965 });
4966 let subquery =
4967 Expression::Subquery(Box::new(crate::expressions::Subquery {
4968 this: Expression::Select(Box::new(inner_select)),
4969 alias: Some(a.alias.clone()),
4970 column_aliases: Vec::new(),
4971 order_by: None,
4972 limit: None,
4973 offset: None,
4974 lateral: false,
4975 modifiers_inside: false,
4976 trailing_comments: Vec::new(),
4977 distribute_by: None,
4978 sort_by: None,
4979 cluster_by: None,
4980 inferred_type: None,
4981 }));
4982 return Ok(subquery);
4983 }
4984 }
4985 }
4986 }
4987
4988 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
4989 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
4990 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
4991 if matches!(source, DialectType::BigQuery) {
4992 if let Expression::Select(ref s) = e {
4993 if let Some(ref from) = s.from {
4994 if from.expressions.len() >= 2 {
4995 // Collect table names from first expression
4996 let first_tables: Vec<String> = from
4997 .expressions
4998 .iter()
4999 .take(1)
5000 .filter_map(|expr| {
5001 if let Expression::Table(t) = expr {
5002 Some(t.name.name.to_ascii_lowercase())
5003 } else {
5004 None
5005 }
5006 })
5007 .collect();
5008
5009 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
5010 // or have a dotted name matching a table
5011 let mut needs_rewrite = false;
5012 for expr in from.expressions.iter().skip(1) {
5013 if let Expression::Table(t) = expr {
5014 if let Some(ref schema) = t.schema {
5015 if first_tables.contains(&schema.name.to_ascii_lowercase()) {
5016 needs_rewrite = true;
5017 break;
5018 }
5019 }
5020 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
5021 if t.schema.is_none() && t.name.name.contains('.') {
5022 let parts: Vec<&str> = t.name.name.split('.').collect();
5023 if parts.len() >= 2
5024 && first_tables.contains(&parts[0].to_ascii_lowercase())
5025 {
5026 needs_rewrite = true;
5027 break;
5028 }
5029 }
5030 }
5031 }
5032
5033 if needs_rewrite {
5034 let mut new_select = s.clone();
5035 let mut new_from_exprs = vec![from.expressions[0].clone()];
5036 let mut new_joins = s.joins.clone();
5037
5038 for expr in from.expressions.iter().skip(1) {
5039 if let Expression::Table(ref t) = expr {
5040 if let Some(ref schema) = t.schema {
5041 if first_tables.contains(&schema.name.to_ascii_lowercase()) {
5042 // This is an array path reference, convert to CROSS JOIN UNNEST
5043 let col_expr = Expression::Column(
5044 Box::new(crate::expressions::Column {
5045 name: t.name.clone(),
5046 table: Some(schema.clone()),
5047 join_mark: false,
5048 trailing_comments: vec![],
5049 span: None,
5050 inferred_type: None,
5051 }),
5052 );
5053 let unnest_expr = Expression::Unnest(Box::new(
5054 crate::expressions::UnnestFunc {
5055 this: col_expr,
5056 expressions: Vec::new(),
5057 with_ordinality: false,
5058 alias: None,
5059 offset_alias: None,
5060 },
5061 ));
5062 let join_this = if let Some(ref alias) = t.alias {
5063 if matches!(
5064 target,
5065 DialectType::Presto
5066 | DialectType::Trino
5067 | DialectType::Athena
5068 ) {
5069 // Presto: UNNEST(x) AS _t0(results)
5070 Expression::Alias(Box::new(
5071 crate::expressions::Alias {
5072 this: unnest_expr,
5073 alias: Identifier::new("_t0"),
5074 column_aliases: vec![alias.clone()],
5075 pre_alias_comments: vec![],
5076 trailing_comments: vec![],
5077 inferred_type: None,
5078 },
5079 ))
5080 } else {
5081 // BigQuery: UNNEST(x) AS results
5082 Expression::Alias(Box::new(
5083 crate::expressions::Alias {
5084 this: unnest_expr,
5085 alias: alias.clone(),
5086 column_aliases: vec![],
5087 pre_alias_comments: vec![],
5088 trailing_comments: vec![],
5089 inferred_type: None,
5090 },
5091 ))
5092 }
5093 } else {
5094 unnest_expr
5095 };
5096 new_joins.push(crate::expressions::Join {
5097 kind: crate::expressions::JoinKind::Cross,
5098 this: join_this,
5099 on: None,
5100 using: Vec::new(),
5101 use_inner_keyword: false,
5102 use_outer_keyword: false,
5103 deferred_condition: false,
5104 join_hint: None,
5105 match_condition: None,
5106 pivots: Vec::new(),
5107 comments: Vec::new(),
5108 nesting_group: 0,
5109 directed: false,
5110 });
5111 } else {
5112 new_from_exprs.push(expr.clone());
5113 }
5114 } else if t.schema.is_none() && t.name.name.contains('.') {
5115 // Dotted name in quoted identifier: `Coordinates.position`
5116 let parts: Vec<&str> = t.name.name.split('.').collect();
5117 if parts.len() >= 2
5118 && first_tables.contains(&parts[0].to_ascii_lowercase())
5119 {
5120 let join_this =
5121 if matches!(target, DialectType::BigQuery) {
5122 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
5123 Expression::Table(t.clone())
5124 } else {
5125 // Other targets: split into "schema"."name"
5126 let mut new_t = t.clone();
5127 new_t.schema =
5128 Some(Identifier::quoted(parts[0]));
5129 new_t.name = Identifier::quoted(parts[1]);
5130 Expression::Table(new_t)
5131 };
5132 new_joins.push(crate::expressions::Join {
5133 kind: crate::expressions::JoinKind::Cross,
5134 this: join_this,
5135 on: None,
5136 using: Vec::new(),
5137 use_inner_keyword: false,
5138 use_outer_keyword: false,
5139 deferred_condition: false,
5140 join_hint: None,
5141 match_condition: None,
5142 pivots: Vec::new(),
5143 comments: Vec::new(),
5144 nesting_group: 0,
5145 directed: false,
5146 });
5147 } else {
5148 new_from_exprs.push(expr.clone());
5149 }
5150 } else {
5151 new_from_exprs.push(expr.clone());
5152 }
5153 } else {
5154 new_from_exprs.push(expr.clone());
5155 }
5156 }
5157
5158 new_select.from = Some(crate::expressions::From {
5159 expressions: new_from_exprs,
5160 ..from.clone()
5161 });
5162 new_select.joins = new_joins;
5163 return Ok(Expression::Select(new_select));
5164 }
5165 }
5166 }
5167 }
5168 }
5169
5170 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
5171 if matches!(
5172 target,
5173 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5174 ) {
5175 if let Expression::Select(ref s) = e {
5176 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
5177 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
5178 matches!(expr, Expression::Unnest(_))
5179 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
5180 };
5181 let has_unnest_join = s.joins.iter().any(|j| {
5182 j.kind == crate::expressions::JoinKind::Cross && (
5183 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
5184 || is_unnest_or_explode_expr(&j.this)
5185 )
5186 });
5187 if has_unnest_join {
5188 let mut select = s.clone();
5189 let mut new_joins = Vec::new();
5190 for join in select.joins.drain(..) {
5191 if join.kind == crate::expressions::JoinKind::Cross {
5192 // Extract the UNNEST/EXPLODE from the join
5193 let (func_expr, table_alias, col_aliases) = match &join.this {
5194 Expression::Alias(a) => {
5195 let ta = if a.alias.is_empty() {
5196 None
5197 } else {
5198 Some(a.alias.clone())
5199 };
5200 let cas = a.column_aliases.clone();
5201 match &a.this {
5202 Expression::Unnest(u) => {
5203 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
5204 if !u.expressions.is_empty() {
5205 let mut all_args = vec![u.this.clone()];
5206 all_args.extend(u.expressions.clone());
5207 let arrays_zip =
5208 Expression::Function(Box::new(
5209 crate::expressions::Function::new(
5210 "ARRAYS_ZIP".to_string(),
5211 all_args,
5212 ),
5213 ));
5214 let inline = Expression::Function(Box::new(
5215 crate::expressions::Function::new(
5216 "INLINE".to_string(),
5217 vec![arrays_zip],
5218 ),
5219 ));
5220 (Some(inline), ta, a.column_aliases.clone())
5221 } else {
5222 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
5223 let func_name = if u.with_ordinality {
5224 "POSEXPLODE"
5225 } else {
5226 "EXPLODE"
5227 };
5228 let explode = Expression::Function(Box::new(
5229 crate::expressions::Function::new(
5230 func_name.to_string(),
5231 vec![u.this.clone()],
5232 ),
5233 ));
5234 // For POSEXPLODE, add 'pos' to column aliases
5235 let cas = if u.with_ordinality {
5236 let mut pos_aliases =
5237 vec![Identifier::new(
5238 "pos".to_string(),
5239 )];
5240 pos_aliases
5241 .extend(a.column_aliases.clone());
5242 pos_aliases
5243 } else {
5244 a.column_aliases.clone()
5245 };
5246 (Some(explode), ta, cas)
5247 }
5248 }
5249 Expression::Function(f)
5250 if f.name.eq_ignore_ascii_case("EXPLODE") =>
5251 {
5252 (Some(Expression::Function(f.clone())), ta, cas)
5253 }
5254 _ => (None, None, Vec::new()),
5255 }
5256 }
5257 Expression::Unnest(u) => {
5258 let func_name = if u.with_ordinality {
5259 "POSEXPLODE"
5260 } else {
5261 "EXPLODE"
5262 };
5263 let explode = Expression::Function(Box::new(
5264 crate::expressions::Function::new(
5265 func_name.to_string(),
5266 vec![u.this.clone()],
5267 ),
5268 ));
5269 let ta = u.alias.clone();
5270 let col_aliases = if u.with_ordinality {
5271 vec![Identifier::new("pos".to_string())]
5272 } else {
5273 Vec::new()
5274 };
5275 (Some(explode), ta, col_aliases)
5276 }
5277 _ => (None, None, Vec::new()),
5278 };
5279 if let Some(func) = func_expr {
5280 select.lateral_views.push(crate::expressions::LateralView {
5281 this: func,
5282 table_alias,
5283 column_aliases: col_aliases,
5284 outer: false,
5285 });
5286 } else {
5287 new_joins.push(join);
5288 }
5289 } else {
5290 new_joins.push(join);
5291 }
5292 }
5293 select.joins = new_joins;
5294 return Ok(Expression::Select(select));
5295 }
5296 }
5297 }
5298
5299 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
5300 // for BigQuery, Presto/Trino, Snowflake
5301 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
5302 && matches!(
5303 target,
5304 DialectType::BigQuery
5305 | DialectType::Presto
5306 | DialectType::Trino
5307 | DialectType::Snowflake
5308 )
5309 {
5310 if let Expression::Select(ref s) = e {
5311 // Check if any SELECT expressions contain UNNEST
5312 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
5313 let has_unnest_in_select = s.expressions.iter().any(|expr| {
5314 fn contains_unnest(e: &Expression) -> bool {
5315 match e {
5316 Expression::Unnest(_) => true,
5317 Expression::Function(f)
5318 if f.name.eq_ignore_ascii_case("UNNEST") =>
5319 {
5320 true
5321 }
5322 Expression::Alias(a) => contains_unnest(&a.this),
5323 Expression::Add(op)
5324 | Expression::Sub(op)
5325 | Expression::Mul(op)
5326 | Expression::Div(op) => {
5327 contains_unnest(&op.left) || contains_unnest(&op.right)
5328 }
5329 _ => false,
5330 }
5331 }
5332 contains_unnest(expr)
5333 });
5334
5335 if has_unnest_in_select {
5336 let rewritten = Self::rewrite_unnest_expansion(s, target);
5337 if let Some(new_select) = rewritten {
5338 return Ok(Expression::Select(Box::new(new_select)));
5339 }
5340 }
5341 }
5342 }
5343
5344 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
5345 // BigQuery '\n' -> PostgreSQL literal newline in string
5346 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
5347 {
5348 if let Expression::Literal(ref lit) = e {
5349 if let Literal::String(ref s) = lit.as_ref() {
5350 if s.contains("\\n")
5351 || s.contains("\\t")
5352 || s.contains("\\r")
5353 || s.contains("\\\\")
5354 {
5355 let converted = s
5356 .replace("\\n", "\n")
5357 .replace("\\t", "\t")
5358 .replace("\\r", "\r")
5359 .replace("\\\\", "\\");
5360 return Ok(Expression::Literal(Box::new(Literal::String(converted))));
5361 }
5362 }
5363 }
5364 }
5365
5366 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
5367 // when source != target (identity tests keep the Literal::Timestamp for native handling)
5368 if source != target {
5369 if let Expression::Literal(ref lit) = e {
5370 if let Literal::Timestamp(ref s) = lit.as_ref() {
5371 let s = s.clone();
5372 // MySQL: TIMESTAMP handling depends on source dialect
5373 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
5374 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
5375 if matches!(target, DialectType::MySQL) {
5376 if matches!(source, DialectType::BigQuery) {
5377 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
5378 return Ok(Expression::Function(Box::new(Function::new(
5379 "TIMESTAMP".to_string(),
5380 vec![Expression::Literal(Box::new(Literal::String(s)))],
5381 ))));
5382 } else {
5383 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
5384 return Ok(Expression::Cast(Box::new(Cast {
5385 this: Expression::Literal(Box::new(Literal::String(s))),
5386 to: DataType::Custom {
5387 name: "DATETIME".to_string(),
5388 },
5389 trailing_comments: Vec::new(),
5390 double_colon_syntax: false,
5391 format: None,
5392 default: None,
5393 inferred_type: None,
5394 })));
5395 }
5396 }
5397 let dt = match target {
5398 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
5399 name: "DATETIME".to_string(),
5400 },
5401 DialectType::Snowflake => {
5402 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
5403 if matches!(source, DialectType::BigQuery) {
5404 DataType::Custom {
5405 name: "TIMESTAMPTZ".to_string(),
5406 }
5407 } else if matches!(
5408 source,
5409 DialectType::PostgreSQL
5410 | DialectType::Redshift
5411 | DialectType::Snowflake
5412 ) {
5413 DataType::Timestamp {
5414 precision: None,
5415 timezone: false,
5416 }
5417 } else {
5418 DataType::Custom {
5419 name: "TIMESTAMPNTZ".to_string(),
5420 }
5421 }
5422 }
5423 DialectType::Spark | DialectType::Databricks => {
5424 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
5425 if matches!(source, DialectType::BigQuery) {
5426 DataType::Timestamp {
5427 precision: None,
5428 timezone: false,
5429 }
5430 } else {
5431 DataType::Custom {
5432 name: "TIMESTAMP_NTZ".to_string(),
5433 }
5434 }
5435 }
5436 DialectType::ClickHouse => DataType::Nullable {
5437 inner: Box::new(DataType::Custom {
5438 name: "DateTime".to_string(),
5439 }),
5440 },
5441 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
5442 name: "DATETIME2".to_string(),
5443 },
5444 DialectType::DuckDB => {
5445 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
5446 // or when the timestamp string explicitly has timezone info
5447 if matches!(source, DialectType::BigQuery)
5448 || Self::timestamp_string_has_timezone(&s)
5449 {
5450 DataType::Custom {
5451 name: "TIMESTAMPTZ".to_string(),
5452 }
5453 } else {
5454 DataType::Timestamp {
5455 precision: None,
5456 timezone: false,
5457 }
5458 }
5459 }
5460 _ => DataType::Timestamp {
5461 precision: None,
5462 timezone: false,
5463 },
5464 };
5465 return Ok(Expression::Cast(Box::new(Cast {
5466 this: Expression::Literal(Box::new(Literal::String(s))),
5467 to: dt,
5468 trailing_comments: vec![],
5469 double_colon_syntax: false,
5470 format: None,
5471 default: None,
5472 inferred_type: None,
5473 })));
5474 }
5475 }
5476 }
5477
5478 // PostgreSQL DELETE requires explicit AS for table aliases
5479 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
5480 if let Expression::Delete(ref del) = e {
5481 if del.alias.is_some() && !del.alias_explicit_as {
5482 let mut new_del = del.clone();
5483 new_del.alias_explicit_as = true;
5484 return Ok(Expression::Delete(new_del));
5485 }
5486 }
5487 }
5488
5489 // UNION/INTERSECT/EXCEPT DISTINCT handling:
5490 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
5491 // while others don't support it (Presto, Spark, DuckDB, etc.)
5492 {
5493 let needs_distinct =
5494 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
5495 let drop_distinct = matches!(
5496 target,
5497 DialectType::Presto
5498 | DialectType::Trino
5499 | DialectType::Athena
5500 | DialectType::Spark
5501 | DialectType::Databricks
5502 | DialectType::DuckDB
5503 | DialectType::Hive
5504 | DialectType::MySQL
5505 | DialectType::PostgreSQL
5506 | DialectType::SQLite
5507 | DialectType::TSQL
5508 | DialectType::Redshift
5509 | DialectType::Snowflake
5510 | DialectType::Oracle
5511 | DialectType::Teradata
5512 | DialectType::Drill
5513 | DialectType::Doris
5514 | DialectType::StarRocks
5515 );
5516 match &e {
5517 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
5518 let mut new_u = (**u).clone();
5519 new_u.distinct = true;
5520 return Ok(Expression::Union(Box::new(new_u)));
5521 }
5522 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
5523 let mut new_i = (**i).clone();
5524 new_i.distinct = true;
5525 return Ok(Expression::Intersect(Box::new(new_i)));
5526 }
5527 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
5528 let mut new_ex = (**ex).clone();
5529 new_ex.distinct = true;
5530 return Ok(Expression::Except(Box::new(new_ex)));
5531 }
5532 Expression::Union(u) if u.distinct && drop_distinct => {
5533 let mut new_u = (**u).clone();
5534 new_u.distinct = false;
5535 return Ok(Expression::Union(Box::new(new_u)));
5536 }
5537 Expression::Intersect(i) if i.distinct && drop_distinct => {
5538 let mut new_i = (**i).clone();
5539 new_i.distinct = false;
5540 return Ok(Expression::Intersect(Box::new(new_i)));
5541 }
5542 Expression::Except(ex) if ex.distinct && drop_distinct => {
5543 let mut new_ex = (**ex).clone();
5544 new_ex.distinct = false;
5545 return Ok(Expression::Except(Box::new(new_ex)));
5546 }
5547 _ => {}
5548 }
5549 }
5550
5551 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
5552 if matches!(target, DialectType::ClickHouse) {
5553 if let Expression::Function(ref f) = e {
5554 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
5555 let mut new_f = f.as_ref().clone();
5556 new_f.name = "map".to_string();
5557 return Ok(Expression::Function(Box::new(new_f)));
5558 }
5559 }
5560 }
5561
5562 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
5563 if matches!(target, DialectType::ClickHouse) {
5564 if let Expression::Intersect(ref i) = e {
5565 if i.all {
5566 let mut new_i = (**i).clone();
5567 new_i.all = false;
5568 return Ok(Expression::Intersect(Box::new(new_i)));
5569 }
5570 }
5571 }
5572
5573 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
5574 // Only from Generic source, to prevent double-wrapping
5575 if matches!(source, DialectType::Generic) {
5576 if let Expression::Div(ref op) = e {
5577 let cast_type = match target {
5578 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
5579 precision: None,
5580 scale: None,
5581 real_spelling: false,
5582 }),
5583 DialectType::Drill
5584 | DialectType::Trino
5585 | DialectType::Athena
5586 | DialectType::Presto => Some(DataType::Double {
5587 precision: None,
5588 scale: None,
5589 }),
5590 DialectType::PostgreSQL
5591 | DialectType::Redshift
5592 | DialectType::Materialize
5593 | DialectType::Teradata
5594 | DialectType::RisingWave => Some(DataType::Double {
5595 precision: None,
5596 scale: None,
5597 }),
5598 _ => None,
5599 };
5600 if let Some(dt) = cast_type {
5601 let cast_left = Expression::Cast(Box::new(Cast {
5602 this: op.left.clone(),
5603 to: dt,
5604 double_colon_syntax: false,
5605 trailing_comments: Vec::new(),
5606 format: None,
5607 default: None,
5608 inferred_type: None,
5609 }));
5610 let new_op = crate::expressions::BinaryOp {
5611 left: cast_left,
5612 right: op.right.clone(),
5613 left_comments: op.left_comments.clone(),
5614 operator_comments: op.operator_comments.clone(),
5615 trailing_comments: op.trailing_comments.clone(),
5616 inferred_type: None,
5617 };
5618 return Ok(Expression::Div(Box::new(new_op)));
5619 }
5620 }
5621 }
5622
5623 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
5624 if matches!(target, DialectType::DuckDB) {
5625 if let Expression::CreateDatabase(db) = e {
5626 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
5627 schema.if_not_exists = db.if_not_exists;
5628 return Ok(Expression::CreateSchema(Box::new(schema)));
5629 }
5630 if let Expression::DropDatabase(db) = e {
5631 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
5632 schema.if_exists = db.if_exists;
5633 return Ok(Expression::DropSchema(Box::new(schema)));
5634 }
5635 }
5636
5637 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
5638 if matches!(source, DialectType::ClickHouse)
5639 && !matches!(target, DialectType::ClickHouse)
5640 {
5641 if let Expression::Cast(ref c) = e {
5642 if let DataType::Custom { ref name } = c.to {
5643 if name.len() >= 9 && name[..9].eq_ignore_ascii_case("NULLABLE(") && name.ends_with(")") {
5644 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
5645 let inner_upper = inner.to_ascii_uppercase();
5646 let new_dt = match inner_upper.as_str() {
5647 "DATETIME" | "DATETIME64" => DataType::Timestamp {
5648 precision: None,
5649 timezone: false,
5650 },
5651 "DATE" => DataType::Date,
5652 "INT64" | "BIGINT" => DataType::BigInt { length: None },
5653 "INT32" | "INT" | "INTEGER" => DataType::Int {
5654 length: None,
5655 integer_spelling: false,
5656 },
5657 "FLOAT64" | "DOUBLE" => DataType::Double {
5658 precision: None,
5659 scale: None,
5660 },
5661 "STRING" => DataType::Text,
5662 _ => DataType::Custom {
5663 name: inner.to_string(),
5664 },
5665 };
5666 let mut new_cast = c.clone();
5667 new_cast.to = new_dt;
5668 return Ok(Expression::Cast(new_cast));
5669 }
5670 }
5671 }
5672 }
5673
5674 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
5675 if matches!(target, DialectType::Snowflake) {
5676 if let Expression::ArrayConcatAgg(ref agg) = e {
5677 let mut agg_clone = agg.as_ref().clone();
5678 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
5679 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
5680 let flatten = Expression::Function(Box::new(Function::new(
5681 "ARRAY_FLATTEN".to_string(),
5682 vec![array_agg],
5683 )));
5684 return Ok(flatten);
5685 }
5686 }
5687
5688 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
5689 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
5690 if let Expression::ArrayConcatAgg(agg) = e {
5691 let arg = agg.this;
5692 return Ok(Expression::Function(Box::new(Function::new(
5693 "ARRAY_CONCAT_AGG".to_string(),
5694 vec![arg],
5695 ))));
5696 }
5697 }
5698
5699 // Determine what action to take by inspecting e immutably
5700 let action = {
5701 let source_propagates_nulls =
5702 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
5703 let target_ignores_nulls =
5704 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
5705
5706 match &e {
5707 Expression::Function(f) => {
5708 let name = f.name.to_ascii_uppercase();
5709 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
5710 if (name == "DATE_PART" || name == "DATEPART")
5711 && f.args.len() == 2
5712 && matches!(target, DialectType::Snowflake)
5713 && !matches!(source, DialectType::Snowflake)
5714 && matches!(
5715 &f.args[0],
5716 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
5717 )
5718 {
5719 Action::DatePartUnquote
5720 } else if source_propagates_nulls
5721 && target_ignores_nulls
5722 && (name == "GREATEST" || name == "LEAST")
5723 && f.args.len() >= 2
5724 {
5725 Action::GreatestLeastNull
5726 } else if matches!(source, DialectType::Snowflake)
5727 && name == "ARRAY_GENERATE_RANGE"
5728 && f.args.len() >= 2
5729 {
5730 Action::ArrayGenerateRange
5731 } else if matches!(source, DialectType::Snowflake)
5732 && matches!(target, DialectType::DuckDB)
5733 && name == "DATE_TRUNC"
5734 && f.args.len() == 2
5735 {
5736 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
5737 // Logic based on Python sqlglot's input_type_preserved flag:
5738 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
5739 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
5740 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
5741 let unit_str = match &f.args[0] {
5742 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
5743 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
5744 Some(s.to_ascii_uppercase())
5745 }
5746 _ => None,
5747 };
5748 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
5749 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
5750 });
5751 match &f.args[1] {
5752 Expression::Cast(c) => match &c.to {
5753 DataType::Time { .. } => Action::DateTruncWrapCast,
5754 DataType::Custom { name }
5755 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
5756 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
5757 {
5758 Action::DateTruncWrapCast
5759 }
5760 DataType::Timestamp { timezone: true, .. } => {
5761 Action::DateTruncWrapCast
5762 }
5763 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
5764 DataType::Timestamp {
5765 timezone: false, ..
5766 } if is_date_unit => Action::DateTruncWrapCast,
5767 _ => Action::None,
5768 },
5769 _ => Action::None,
5770 }
5771 } else if matches!(source, DialectType::Snowflake)
5772 && matches!(target, DialectType::DuckDB)
5773 && name == "TO_DATE"
5774 && f.args.len() == 1
5775 && !matches!(
5776 &f.args[0],
5777 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
5778 )
5779 {
5780 Action::ToDateToCast
5781 } else if !matches!(source, DialectType::Redshift)
5782 && matches!(target, DialectType::Redshift)
5783 && name == "CONVERT_TIMEZONE"
5784 && (f.args.len() == 2 || f.args.len() == 3)
5785 {
5786 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
5787 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
5788 // The Redshift parser adds 'UTC' as default source_tz, but when
5789 // transpiling from other dialects, we should preserve the original form.
5790 Action::ConvertTimezoneToExpr
5791 } else if matches!(source, DialectType::Snowflake)
5792 && matches!(target, DialectType::DuckDB)
5793 && name == "REGEXP_REPLACE"
5794 && f.args.len() == 4
5795 && !matches!(
5796 &f.args[3],
5797 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
5798 )
5799 {
5800 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
5801 Action::RegexpReplaceSnowflakeToDuckDB
5802 } else if matches!(source, DialectType::Snowflake)
5803 && matches!(target, DialectType::DuckDB)
5804 && name == "REGEXP_REPLACE"
5805 && f.args.len() == 5
5806 {
5807 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB
5808 Action::RegexpReplacePositionSnowflakeToDuckDB
5809 } else if matches!(source, DialectType::Snowflake)
5810 && matches!(target, DialectType::DuckDB)
5811 && name == "REGEXP_SUBSTR"
5812 {
5813 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
5814 Action::RegexpSubstrSnowflakeToDuckDB
5815 } else if matches!(source, DialectType::Snowflake)
5816 && matches!(target, DialectType::Snowflake)
5817 && (name == "REGEXP_SUBSTR" || name == "REGEXP_SUBSTR_ALL")
5818 && f.args.len() == 6
5819 {
5820 // Snowflake identity: strip trailing group=0
5821 Action::RegexpSubstrSnowflakeIdentity
5822 } else if matches!(source, DialectType::Snowflake)
5823 && matches!(target, DialectType::DuckDB)
5824 && name == "REGEXP_SUBSTR_ALL"
5825 {
5826 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
5827 Action::RegexpSubstrAllSnowflakeToDuckDB
5828 } else if matches!(source, DialectType::Snowflake)
5829 && matches!(target, DialectType::DuckDB)
5830 && name == "REGEXP_COUNT"
5831 {
5832 // Snowflake REGEXP_COUNT -> DuckDB LENGTH(REGEXP_EXTRACT_ALL(...))
5833 Action::RegexpCountSnowflakeToDuckDB
5834 } else if matches!(source, DialectType::Snowflake)
5835 && matches!(target, DialectType::DuckDB)
5836 && name == "REGEXP_INSTR"
5837 {
5838 // Snowflake REGEXP_INSTR -> DuckDB complex CASE expression
5839 Action::RegexpInstrSnowflakeToDuckDB
5840 } else if matches!(source, DialectType::BigQuery)
5841 && matches!(target, DialectType::Snowflake)
5842 && name == "REGEXP_EXTRACT_ALL"
5843 {
5844 // BigQuery REGEXP_EXTRACT_ALL -> Snowflake REGEXP_SUBSTR_ALL
5845 Action::RegexpExtractAllToSnowflake
5846 } else if name == "_BQ_TO_HEX" {
5847 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
5848 Action::BigQueryToHexBare
5849 } else if matches!(source, DialectType::BigQuery)
5850 && !matches!(target, DialectType::BigQuery)
5851 {
5852 // BigQuery-specific functions that need to be converted to standard forms
5853 match name.as_str() {
5854 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
5855 | "DATE_DIFF"
5856 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
5857 | "DATETIME_ADD" | "DATETIME_SUB"
5858 | "TIME_ADD" | "TIME_SUB"
5859 | "DATE_ADD" | "DATE_SUB"
5860 | "SAFE_DIVIDE"
5861 | "GENERATE_UUID"
5862 | "COUNTIF"
5863 | "EDIT_DISTANCE"
5864 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
5865 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
5866 | "TO_HEX"
5867 | "TO_JSON_STRING"
5868 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
5869 | "DIV"
5870 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
5871 | "LAST_DAY"
5872 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
5873 | "REGEXP_CONTAINS"
5874 | "CONTAINS_SUBSTR"
5875 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
5876 | "SAFE_CAST"
5877 | "GENERATE_DATE_ARRAY"
5878 | "PARSE_DATE" | "PARSE_TIMESTAMP"
5879 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
5880 | "ARRAY_CONCAT"
5881 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
5882 | "INSTR"
5883 | "MD5" | "SHA1" | "SHA256" | "SHA512"
5884 | "GENERATE_UUID()" // just in case
5885 | "REGEXP_EXTRACT_ALL"
5886 | "REGEXP_EXTRACT"
5887 | "INT64"
5888 | "ARRAY_CONCAT_AGG"
5889 | "DATE_DIFF(" // just in case
5890 | "TO_HEX_MD5" // internal
5891 | "MOD"
5892 | "CONCAT"
5893 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
5894 | "STRUCT"
5895 | "ROUND"
5896 | "MAKE_INTERVAL"
5897 | "ARRAY_TO_STRING"
5898 | "PERCENTILE_CONT"
5899 => Action::BigQueryFunctionNormalize,
5900 "ARRAY" if matches!(target, DialectType::Snowflake)
5901 && f.args.len() == 1
5902 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
5903 => Action::BigQueryArraySelectAsStructToSnowflake,
5904 _ => Action::None,
5905 }
5906 } else if matches!(source, DialectType::BigQuery)
5907 && matches!(target, DialectType::BigQuery)
5908 {
5909 // BigQuery -> BigQuery normalizations
5910 match name.as_str() {
5911 "TIMESTAMP_DIFF"
5912 | "DATETIME_DIFF"
5913 | "TIME_DIFF"
5914 | "DATE_DIFF"
5915 | "DATE_ADD"
5916 | "TO_HEX"
5917 | "CURRENT_TIMESTAMP"
5918 | "CURRENT_DATE"
5919 | "CURRENT_TIME"
5920 | "CURRENT_DATETIME"
5921 | "GENERATE_DATE_ARRAY"
5922 | "INSTR"
5923 | "FORMAT_DATETIME"
5924 | "DATETIME"
5925 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
5926 _ => Action::None,
5927 }
5928 } else {
5929 // Generic function normalization for non-BigQuery sources
5930 match name.as_str() {
5931 "ARBITRARY" | "AGGREGATE"
5932 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
5933 | "STRUCT_EXTRACT"
5934 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
5935 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
5936 | "SUBSTRINGINDEX"
5937 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
5938 | "UNICODE"
5939 | "XOR"
5940 | "ARRAY_REVERSE_SORT"
5941 | "ENCODE" | "DECODE"
5942 | "QUANTILE"
5943 | "EPOCH" | "EPOCH_MS"
5944 | "HASHBYTES"
5945 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
5946 | "APPROX_DISTINCT"
5947 | "DATE_PARSE" | "FORMAT_DATETIME"
5948 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
5949 | "RLIKE"
5950 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
5951 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
5952 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
5953 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
5954 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
5955 | "MAP" | "MAP_FROM_ENTRIES"
5956 | "COLLECT_LIST" | "COLLECT_SET"
5957 | "ISNAN" | "IS_NAN"
5958 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
5959 | "FORMAT_NUMBER"
5960 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
5961 | "ELEMENT_AT"
5962 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
5963 | "SPLIT_PART"
5964 // GENERATE_SERIES: handled separately below
5965 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
5966 | "JSON_QUERY" | "JSON_VALUE"
5967 | "JSON_SEARCH"
5968 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
5969 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
5970 | "CURDATE" | "CURTIME"
5971 | "ARRAY_TO_STRING"
5972 | "ARRAY_SORT" | "SORT_ARRAY"
5973 | "LEFT" | "RIGHT"
5974 | "MAP_FROM_ARRAYS"
5975 | "LIKE" | "ILIKE"
5976 | "ARRAY_CONCAT" | "LIST_CONCAT"
5977 | "QUANTILE_CONT" | "QUANTILE_DISC"
5978 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
5979 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
5980 | "LOCATE" | "STRPOS" | "INSTR"
5981 | "CHAR"
5982 // CONCAT: handled separately for COALESCE wrapping
5983 | "ARRAY_JOIN"
5984 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
5985 | "ISNULL"
5986 | "MONTHNAME"
5987 | "TO_TIMESTAMP"
5988 | "TO_DATE"
5989 | "TO_JSON"
5990 | "REGEXP_SPLIT"
5991 | "SPLIT"
5992 | "FORMATDATETIME"
5993 | "ARRAYJOIN"
5994 | "SPLITBYSTRING" | "SPLITBYREGEXP"
5995 | "NVL"
5996 | "TO_CHAR"
5997 | "DBMS_RANDOM.VALUE"
5998 | "REGEXP_LIKE"
5999 | "REPLICATE"
6000 | "LEN"
6001 | "COUNT_BIG"
6002 | "DATEFROMPARTS"
6003 | "DATETIMEFROMPARTS"
6004 | "CONVERT" | "TRY_CONVERT"
6005 | "STRFTIME" | "STRPTIME"
6006 | "DATE_FORMAT" | "FORMAT_DATE"
6007 | "PARSE_TIMESTAMP" | "PARSE_DATE"
6008 | "FROM_BASE64" | "TO_BASE64"
6009 | "GETDATE"
6010 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
6011 | "TO_UTF8" | "FROM_UTF8"
6012 | "STARTS_WITH" | "STARTSWITH"
6013 | "APPROX_COUNT_DISTINCT"
6014 | "JSON_FORMAT"
6015 | "SYSDATE"
6016 | "LOGICAL_OR" | "LOGICAL_AND"
6017 | "MONTHS_ADD"
6018 | "SCHEMA_NAME"
6019 | "STRTOL"
6020 | "EDITDIST3"
6021 | "FORMAT"
6022 | "LIST_CONTAINS" | "LIST_HAS"
6023 | "VARIANCE" | "STDDEV"
6024 | "ISINF"
6025 | "TO_UNIXTIME"
6026 | "FROM_UNIXTIME"
6027 | "DATEPART" | "DATE_PART"
6028 | "DATENAME"
6029 | "STRING_AGG"
6030 | "JSON_ARRAYAGG"
6031 | "APPROX_QUANTILE"
6032 | "MAKE_DATE"
6033 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
6034 | "RANGE"
6035 | "TRY_ELEMENT_AT"
6036 | "STR_TO_MAP"
6037 | "STRING"
6038 | "STR_TO_TIME"
6039 | "CURRENT_SCHEMA"
6040 | "LTRIM" | "RTRIM"
6041 | "UUID"
6042 | "FARM_FINGERPRINT"
6043 | "JSON_KEYS"
6044 | "WEEKOFYEAR"
6045 | "CONCAT_WS"
6046 | "ARRAY_SLICE"
6047 | "ARRAY_PREPEND"
6048 | "ARRAY_REMOVE"
6049 | "GENERATE_DATE_ARRAY"
6050 | "PARSE_JSON"
6051 | "JSON_REMOVE"
6052 | "JSON_SET"
6053 | "LEVENSHTEIN"
6054 | "CURRENT_VERSION"
6055 | "ARRAY_MAX"
6056 | "ARRAY_MIN"
6057 | "JAROWINKLER_SIMILARITY"
6058 | "CURRENT_SCHEMAS"
6059 | "TO_VARIANT"
6060 | "JSON_GROUP_ARRAY" | "JSON_GROUP_OBJECT"
6061 | "ARRAYS_OVERLAP" | "ARRAY_INTERSECTION"
6062 => Action::GenericFunctionNormalize,
6063 // Canonical date functions -> dialect-specific
6064 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
6065 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
6066 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
6067 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
6068 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
6069 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
6070 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
6071 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
6072 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
6073 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
6074 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
6075 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
6076 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
6077 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
6078 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
6079 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
6080 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
6081 // STR_TO_DATE(x, fmt) -> dialect-specific
6082 "STR_TO_DATE" if f.args.len() == 2
6083 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
6084 "STR_TO_DATE" => Action::GenericFunctionNormalize,
6085 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
6086 "TS_OR_DS_ADD" if f.args.len() == 3
6087 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
6088 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
6089 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
6090 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
6091 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
6092 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
6093 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
6094 // IS_ASCII(x) -> dialect-specific
6095 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
6096 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
6097 "STR_POSITION" => Action::StrPositionConvert,
6098 // ARRAY_SUM -> dialect-specific
6099 "ARRAY_SUM" => Action::ArraySumConvert,
6100 // ARRAY_SIZE -> dialect-specific (Drill only)
6101 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
6102 // ARRAY_ANY -> dialect-specific
6103 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
6104 // Functions needing specific cross-dialect transforms
6105 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
6106 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
6107 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
6108 "ARRAY" if matches!(source, DialectType::BigQuery)
6109 && matches!(target, DialectType::Snowflake)
6110 && f.args.len() == 1
6111 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
6112 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
6113 "TRUNC" if f.args.len() == 2 && matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
6114 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 && !f.args.get(1).map_or(false, |a| matches!(a, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) => Action::GenericFunctionNormalize,
6115 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
6116 "DATE_TRUNC" if f.args.len() == 2
6117 && matches!(source, DialectType::Generic)
6118 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
6119 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
6120 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
6121 "TIMESTAMP_TRUNC" if f.args.len() >= 2
6122 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
6123 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
6124 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
6125 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6126 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
6127 // GENERATE_SERIES with interval normalization for PG target
6128 "GENERATE_SERIES" if f.args.len() >= 3
6129 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6130 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
6131 "GENERATE_SERIES" => Action::None, // passthrough for other cases
6132 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
6133 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
6134 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
6135 "CONCAT" => Action::GenericFunctionNormalize,
6136 // DIV(a, b) -> target-specific integer division
6137 "DIV" if f.args.len() == 2
6138 && matches!(source, DialectType::PostgreSQL)
6139 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
6140 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
6141 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
6142 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
6143 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
6144 "JSONB_EXISTS" if f.args.len() == 2
6145 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
6146 // DATE_BIN -> TIME_BUCKET for DuckDB
6147 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
6148 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
6149 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
6150 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
6151 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
6152 // ClickHouse any -> ANY_VALUE for other dialects
6153 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
6154 _ => Action::None,
6155 }
6156 }
6157 }
6158 Expression::AggregateFunction(af) => {
6159 let name = af.name.to_ascii_uppercase();
6160 match name.as_str() {
6161 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
6162 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
6163 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
6164 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
6165 if matches!(target, DialectType::DuckDB) =>
6166 {
6167 Action::JsonObjectAggConvert
6168 }
6169 "ARRAY_AGG"
6170 if matches!(
6171 target,
6172 DialectType::Hive
6173 | DialectType::Spark
6174 | DialectType::Databricks
6175 ) =>
6176 {
6177 Action::ArrayAggToCollectList
6178 }
6179 "MAX_BY" | "MIN_BY"
6180 if matches!(
6181 target,
6182 DialectType::ClickHouse
6183 | DialectType::Spark
6184 | DialectType::Databricks
6185 | DialectType::DuckDB
6186 ) =>
6187 {
6188 Action::MaxByMinByConvert
6189 }
6190 "COLLECT_LIST"
6191 if matches!(
6192 target,
6193 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
6194 ) =>
6195 {
6196 Action::CollectListToArrayAgg
6197 }
6198 "COLLECT_SET"
6199 if matches!(
6200 target,
6201 DialectType::Presto
6202 | DialectType::Trino
6203 | DialectType::Snowflake
6204 | DialectType::DuckDB
6205 ) =>
6206 {
6207 Action::CollectSetConvert
6208 }
6209 "PERCENTILE"
6210 if matches!(
6211 target,
6212 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6213 ) =>
6214 {
6215 Action::PercentileConvert
6216 }
6217 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
6218 "CORR"
6219 if matches!(target, DialectType::DuckDB)
6220 && matches!(source, DialectType::Snowflake) =>
6221 {
6222 Action::CorrIsnanWrap
6223 }
6224 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6225 "APPROX_QUANTILES"
6226 if matches!(source, DialectType::BigQuery)
6227 && matches!(target, DialectType::DuckDB) =>
6228 {
6229 Action::BigQueryApproxQuantiles
6230 }
6231 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
6232 "PERCENTILE_CONT"
6233 if matches!(source, DialectType::BigQuery)
6234 && matches!(target, DialectType::DuckDB)
6235 && af.args.len() >= 2 =>
6236 {
6237 Action::BigQueryPercentileContToDuckDB
6238 }
6239 _ => Action::None,
6240 }
6241 }
6242 Expression::JSONArrayAgg(_) => match target {
6243 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
6244 _ => Action::None,
6245 },
6246 Expression::ToNumber(tn) => {
6247 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
6248 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
6249 match target {
6250 DialectType::Oracle
6251 | DialectType::Snowflake
6252 | DialectType::Teradata => Action::None,
6253 _ => Action::GenericFunctionNormalize,
6254 }
6255 } else {
6256 Action::None
6257 }
6258 }
6259 Expression::Nvl2(_) => {
6260 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
6261 // Keep as NVL2 for dialects that support it natively
6262 match target {
6263 DialectType::Oracle
6264 | DialectType::Snowflake
6265 | DialectType::Teradata
6266 | DialectType::Spark
6267 | DialectType::Databricks
6268 | DialectType::Redshift => Action::None,
6269 _ => Action::Nvl2Expand,
6270 }
6271 }
6272 Expression::Decode(_) | Expression::DecodeCase(_) => {
6273 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
6274 // Keep as DECODE for Oracle/Snowflake
6275 match target {
6276 DialectType::Oracle | DialectType::Snowflake => Action::None,
6277 _ => Action::DecodeSimplify,
6278 }
6279 }
6280 Expression::Coalesce(ref cf) => {
6281 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
6282 // BigQuery keeps IFNULL natively when source is also BigQuery
6283 if cf.original_name.as_deref() == Some("IFNULL")
6284 && !(matches!(source, DialectType::BigQuery)
6285 && matches!(target, DialectType::BigQuery))
6286 {
6287 Action::IfnullToCoalesce
6288 } else {
6289 Action::None
6290 }
6291 }
6292 Expression::IfFunc(if_func) => {
6293 if matches!(source, DialectType::Snowflake)
6294 && matches!(
6295 target,
6296 DialectType::Presto | DialectType::Trino | DialectType::SQLite
6297 )
6298 && matches!(if_func.false_value, Some(Expression::Div(_)))
6299 {
6300 Action::Div0TypedDivision
6301 } else {
6302 Action::None
6303 }
6304 }
6305 Expression::ToJson(_) => match target {
6306 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
6307 DialectType::BigQuery => Action::ToJsonConvert,
6308 DialectType::DuckDB => Action::ToJsonConvert,
6309 _ => Action::None,
6310 },
6311 Expression::ArrayAgg(ref agg) => {
6312 if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
6313 Action::ArrayAggToGroupConcat
6314 } else if matches!(
6315 target,
6316 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6317 ) {
6318 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
6319 Action::ArrayAggToCollectList
6320 } else if matches!(
6321 source,
6322 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6323 ) && matches!(target, DialectType::DuckDB)
6324 && agg.filter.is_some()
6325 {
6326 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
6327 // Need to add NOT x IS NULL to existing filter
6328 Action::ArrayAggNullFilter
6329 } else if matches!(target, DialectType::DuckDB)
6330 && agg.ignore_nulls == Some(true)
6331 && !agg.order_by.is_empty()
6332 {
6333 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
6334 Action::ArrayAggIgnoreNullsDuckDB
6335 } else if !matches!(source, DialectType::Snowflake) {
6336 Action::None
6337 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6338 let is_array_agg = agg.name.as_deref().map_or(false, |n| n.eq_ignore_ascii_case("ARRAY_AGG"))
6339 || agg.name.is_none();
6340 if is_array_agg {
6341 Action::ArrayAggCollectList
6342 } else {
6343 Action::None
6344 }
6345 } else if matches!(
6346 target,
6347 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6348 ) && agg.filter.is_none()
6349 {
6350 Action::ArrayAggFilter
6351 } else {
6352 Action::None
6353 }
6354 }
6355 Expression::WithinGroup(wg) => {
6356 if matches!(source, DialectType::Snowflake)
6357 && matches!(
6358 target,
6359 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
6360 )
6361 && matches!(wg.this, Expression::ArrayAgg(_))
6362 {
6363 Action::ArrayAggWithinGroupFilter
6364 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
6365 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
6366 || matches!(&wg.this, Expression::StringAgg(_))
6367 {
6368 Action::StringAggConvert
6369 } else if matches!(
6370 target,
6371 DialectType::Presto
6372 | DialectType::Trino
6373 | DialectType::Athena
6374 | DialectType::Spark
6375 | DialectType::Databricks
6376 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
6377 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
6378 || matches!(&wg.this, Expression::PercentileCont(_)))
6379 {
6380 Action::PercentileContConvert
6381 } else {
6382 Action::None
6383 }
6384 }
6385 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
6386 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
6387 // DATETIME is the timezone-unaware type
6388 Expression::Cast(ref c) => {
6389 if c.format.is_some()
6390 && (matches!(source, DialectType::BigQuery)
6391 || matches!(source, DialectType::Teradata))
6392 {
6393 Action::BigQueryCastFormat
6394 } else if matches!(target, DialectType::BigQuery)
6395 && !matches!(source, DialectType::BigQuery)
6396 && matches!(
6397 c.to,
6398 DataType::Timestamp {
6399 timezone: false,
6400 ..
6401 }
6402 )
6403 {
6404 Action::CastTimestampToDatetime
6405 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
6406 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
6407 && matches!(
6408 c.to,
6409 DataType::Timestamp {
6410 timezone: false,
6411 ..
6412 }
6413 )
6414 {
6415 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
6416 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
6417 Action::CastTimestampToDatetime
6418 } else if matches!(
6419 source,
6420 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6421 ) && matches!(
6422 target,
6423 DialectType::Presto
6424 | DialectType::Trino
6425 | DialectType::Athena
6426 | DialectType::DuckDB
6427 | DialectType::Snowflake
6428 | DialectType::BigQuery
6429 | DialectType::Databricks
6430 | DialectType::TSQL
6431 ) {
6432 Action::HiveCastToTryCast
6433 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6434 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
6435 {
6436 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
6437 Action::CastTimestamptzToFunc
6438 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
6439 && matches!(
6440 target,
6441 DialectType::Hive
6442 | DialectType::Spark
6443 | DialectType::Databricks
6444 | DialectType::BigQuery
6445 )
6446 {
6447 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
6448 Action::CastTimestampStripTz
6449 } else if matches!(&c.to, DataType::Json)
6450 && matches!(&c.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
6451 && matches!(
6452 target,
6453 DialectType::Presto
6454 | DialectType::Trino
6455 | DialectType::Athena
6456 | DialectType::Snowflake
6457 )
6458 {
6459 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
6460 // Only when the input is a string literal (JSON 'value' syntax)
6461 Action::JsonLiteralToJsonParse
6462 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
6463 && matches!(target, DialectType::Spark | DialectType::Databricks)
6464 {
6465 // CAST(x AS JSON) -> TO_JSON(x) for Spark
6466 Action::CastToJsonForSpark
6467 } else if (matches!(
6468 &c.to,
6469 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
6470 )) && matches!(
6471 target,
6472 DialectType::Spark | DialectType::Databricks
6473 ) && (matches!(&c.this, Expression::ParseJson(_))
6474 || matches!(
6475 &c.this,
6476 Expression::Function(f)
6477 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
6478 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
6479 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
6480 ))
6481 {
6482 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
6483 // -> FROM_JSON(..., type_string) for Spark
6484 Action::CastJsonToFromJson
6485 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6486 && matches!(
6487 c.to,
6488 DataType::Timestamp {
6489 timezone: false,
6490 ..
6491 }
6492 )
6493 && matches!(source, DialectType::DuckDB)
6494 {
6495 Action::StrftimeCastTimestamp
6496 } else if matches!(source, DialectType::DuckDB)
6497 && matches!(
6498 c.to,
6499 DataType::Decimal {
6500 precision: None,
6501 ..
6502 }
6503 )
6504 {
6505 Action::DecimalDefaultPrecision
6506 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
6507 && matches!(c.to, DataType::Char { length: None })
6508 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
6509 {
6510 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
6511 Action::MysqlCastCharToText
6512 } else if matches!(
6513 source,
6514 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6515 ) && matches!(
6516 target,
6517 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6518 ) && Self::has_varchar_char_type(&c.to)
6519 {
6520 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
6521 Action::SparkCastVarcharToString
6522 } else {
6523 Action::None
6524 }
6525 }
6526 Expression::SafeCast(ref c) => {
6527 if c.format.is_some()
6528 && matches!(source, DialectType::BigQuery)
6529 && !matches!(target, DialectType::BigQuery)
6530 {
6531 Action::BigQueryCastFormat
6532 } else {
6533 Action::None
6534 }
6535 }
6536 // For DuckDB: DATE_TRUNC should preserve the input type
6537 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
6538 if matches!(source, DialectType::Snowflake)
6539 && matches!(target, DialectType::DuckDB)
6540 {
6541 Action::DateTruncWrapCast
6542 } else {
6543 Action::None
6544 }
6545 }
6546 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
6547 Expression::SetStatement(s) => {
6548 if matches!(target, DialectType::DuckDB)
6549 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
6550 && s.items.iter().any(|item| item.kind.is_none())
6551 {
6552 Action::SetToVariable
6553 } else {
6554 Action::None
6555 }
6556 }
6557 // Cross-dialect NULL ordering normalization.
6558 // When nulls_first is not specified, fill in the source dialect's implied
6559 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
6560 Expression::Ordered(o) => {
6561 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
6562 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
6563 Action::MysqlNullsOrdering
6564 } else {
6565 // Skip targets that don't support NULLS FIRST/LAST syntax
6566 let target_supports_nulls = !matches!(
6567 target,
6568 DialectType::MySQL
6569 | DialectType::TSQL
6570 | DialectType::StarRocks
6571 | DialectType::Doris
6572 );
6573 if o.nulls_first.is_none() && source != target && target_supports_nulls
6574 {
6575 Action::NullsOrdering
6576 } else {
6577 Action::None
6578 }
6579 }
6580 }
6581 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
6582 Expression::DataType(dt) => {
6583 if matches!(source, DialectType::BigQuery)
6584 && !matches!(target, DialectType::BigQuery)
6585 {
6586 match dt {
6587 DataType::Custom { ref name }
6588 if name.eq_ignore_ascii_case("INT64")
6589 || name.eq_ignore_ascii_case("FLOAT64")
6590 || name.eq_ignore_ascii_case("BOOL")
6591 || name.eq_ignore_ascii_case("BYTES")
6592 || name.eq_ignore_ascii_case("NUMERIC")
6593 || name.eq_ignore_ascii_case("STRING")
6594 || name.eq_ignore_ascii_case("DATETIME") =>
6595 {
6596 Action::BigQueryCastType
6597 }
6598 _ => Action::None,
6599 }
6600 } else if matches!(source, DialectType::TSQL) {
6601 // For TSQL source -> any target (including TSQL itself for REAL)
6602 match dt {
6603 // REAL -> FLOAT even for TSQL->TSQL
6604 DataType::Custom { ref name }
6605 if name.eq_ignore_ascii_case("REAL") =>
6606 {
6607 Action::TSQLTypeNormalize
6608 }
6609 DataType::Float {
6610 real_spelling: true,
6611 ..
6612 } => Action::TSQLTypeNormalize,
6613 // Other TSQL type normalizations only for non-TSQL targets
6614 DataType::Custom { ref name }
6615 if !matches!(target, DialectType::TSQL)
6616 && (name.eq_ignore_ascii_case("MONEY")
6617 || name.eq_ignore_ascii_case("SMALLMONEY")
6618 || name.eq_ignore_ascii_case("DATETIME2")
6619 || name.eq_ignore_ascii_case("IMAGE")
6620 || name.eq_ignore_ascii_case("BIT")
6621 || name.eq_ignore_ascii_case("ROWVERSION")
6622 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
6623 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
6624 || (name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC"))
6625 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("DATETIME2("))
6626 || (name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME("))) =>
6627 {
6628 Action::TSQLTypeNormalize
6629 }
6630 DataType::Float {
6631 precision: Some(_), ..
6632 } if !matches!(target, DialectType::TSQL) => {
6633 Action::TSQLTypeNormalize
6634 }
6635 DataType::TinyInt { .. }
6636 if !matches!(target, DialectType::TSQL) =>
6637 {
6638 Action::TSQLTypeNormalize
6639 }
6640 // INTEGER -> INT for Databricks/Spark targets
6641 DataType::Int {
6642 integer_spelling: true,
6643 ..
6644 } if matches!(
6645 target,
6646 DialectType::Databricks | DialectType::Spark
6647 ) =>
6648 {
6649 Action::TSQLTypeNormalize
6650 }
6651 _ => Action::None,
6652 }
6653 } else if (matches!(source, DialectType::Oracle)
6654 || matches!(source, DialectType::Generic))
6655 && !matches!(target, DialectType::Oracle)
6656 {
6657 match dt {
6658 DataType::Custom { ref name }
6659 if (name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2("))
6660 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2("))
6661 || name.eq_ignore_ascii_case("VARCHAR2")
6662 || name.eq_ignore_ascii_case("NVARCHAR2") =>
6663 {
6664 Action::OracleVarchar2ToVarchar
6665 }
6666 _ => Action::None,
6667 }
6668 } else if matches!(target, DialectType::Snowflake)
6669 && !matches!(source, DialectType::Snowflake)
6670 {
6671 // When target is Snowflake but source is NOT Snowflake,
6672 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
6673 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
6674 // should keep their FLOAT spelling.
6675 match dt {
6676 DataType::Float { .. } => Action::SnowflakeFloatProtect,
6677 _ => Action::None,
6678 }
6679 } else {
6680 Action::None
6681 }
6682 }
6683 // LOWER patterns from BigQuery TO_HEX conversions:
6684 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
6685 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
6686 Expression::Lower(uf) => {
6687 if matches!(source, DialectType::BigQuery) {
6688 match &uf.this {
6689 Expression::Lower(_) => Action::BigQueryToHexLower,
6690 Expression::Function(f)
6691 if f.name == "TO_HEX"
6692 && matches!(target, DialectType::BigQuery) =>
6693 {
6694 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
6695 Action::BigQueryToHexLower
6696 }
6697 _ => Action::None,
6698 }
6699 } else {
6700 Action::None
6701 }
6702 }
6703 // UPPER patterns from BigQuery TO_HEX conversions:
6704 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
6705 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
6706 Expression::Upper(uf) => {
6707 if matches!(source, DialectType::BigQuery) {
6708 match &uf.this {
6709 Expression::Lower(_) => Action::BigQueryToHexUpper,
6710 _ => Action::None,
6711 }
6712 } else {
6713 Action::None
6714 }
6715 }
6716 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
6717 // Snowflake supports LAST_DAY with unit, so keep it there
6718 Expression::LastDay(ld) => {
6719 if matches!(source, DialectType::BigQuery)
6720 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
6721 && ld.unit.is_some()
6722 {
6723 Action::BigQueryLastDayStripUnit
6724 } else {
6725 Action::None
6726 }
6727 }
6728 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
6729 Expression::SafeDivide(_) => {
6730 if matches!(source, DialectType::BigQuery)
6731 && !matches!(target, DialectType::BigQuery)
6732 {
6733 Action::BigQuerySafeDivide
6734 } else {
6735 Action::None
6736 }
6737 }
6738 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
6739 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
6740 Expression::AnyValue(ref agg) => {
6741 if matches!(source, DialectType::BigQuery)
6742 && matches!(target, DialectType::DuckDB)
6743 && agg.having_max.is_some()
6744 {
6745 Action::BigQueryAnyValueHaving
6746 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
6747 && !matches!(source, DialectType::Spark | DialectType::Databricks)
6748 && agg.ignore_nulls.is_none()
6749 {
6750 Action::AnyValueIgnoreNulls
6751 } else {
6752 Action::None
6753 }
6754 }
6755 Expression::Any(ref q) => {
6756 if matches!(source, DialectType::PostgreSQL)
6757 && matches!(
6758 target,
6759 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6760 )
6761 && q.op.is_some()
6762 && !matches!(
6763 q.subquery,
6764 Expression::Select(_) | Expression::Subquery(_)
6765 )
6766 {
6767 Action::AnyToExists
6768 } else {
6769 Action::None
6770 }
6771 }
6772 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6773 // Snowflake RLIKE does full-string match; DuckDB REGEXP_MATCHES is partial
6774 // So anchor the pattern with ^(...) $ for Snowflake -> DuckDB
6775 Expression::RegexpLike(_)
6776 if matches!(source, DialectType::Snowflake)
6777 && matches!(target, DialectType::DuckDB) =>
6778 {
6779 Action::RlikeSnowflakeToDuckDB
6780 }
6781 // RegexpLike from non-DuckDB sources -> REGEXP_MATCHES for DuckDB target
6782 // DuckDB's ~ is a full match, but other dialects' REGEXP/RLIKE is a partial match
6783 Expression::RegexpLike(_)
6784 if !matches!(source, DialectType::DuckDB)
6785 && matches!(target, DialectType::DuckDB) =>
6786 {
6787 Action::RegexpLikeToDuckDB
6788 }
6789 // RegexpLike -> Exasol: anchor pattern with .*...*
6790 Expression::RegexpLike(_)
6791 if matches!(target, DialectType::Exasol) =>
6792 {
6793 Action::RegexpLikeExasolAnchor
6794 }
6795 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
6796 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
6797 Expression::Div(ref op)
6798 if matches!(
6799 source,
6800 DialectType::MySQL
6801 | DialectType::DuckDB
6802 | DialectType::SingleStore
6803 | DialectType::TiDB
6804 | DialectType::ClickHouse
6805 | DialectType::Doris
6806 ) && matches!(
6807 target,
6808 DialectType::PostgreSQL
6809 | DialectType::Redshift
6810 | DialectType::Drill
6811 | DialectType::Trino
6812 | DialectType::Presto
6813 | DialectType::Athena
6814 | DialectType::TSQL
6815 | DialectType::Teradata
6816 | DialectType::SQLite
6817 | DialectType::BigQuery
6818 | DialectType::Snowflake
6819 | DialectType::Databricks
6820 | DialectType::Oracle
6821 | DialectType::Materialize
6822 | DialectType::RisingWave
6823 ) =>
6824 {
6825 // Only wrap if RHS is not already NULLIF
6826 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
6827 {
6828 Action::MySQLSafeDivide
6829 } else {
6830 Action::None
6831 }
6832 }
6833 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
6834 // For TSQL/Fabric, convert to sp_rename instead
6835 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
6836 if let Some(crate::expressions::AlterTableAction::RenameTable(
6837 ref new_tbl,
6838 )) = at.actions.first()
6839 {
6840 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
6841 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
6842 Action::AlterTableToSpRename
6843 } else if new_tbl.schema.is_some()
6844 && matches!(
6845 target,
6846 DialectType::BigQuery
6847 | DialectType::Doris
6848 | DialectType::StarRocks
6849 | DialectType::DuckDB
6850 | DialectType::PostgreSQL
6851 | DialectType::Redshift
6852 )
6853 {
6854 Action::AlterTableRenameStripSchema
6855 } else {
6856 Action::None
6857 }
6858 } else {
6859 Action::None
6860 }
6861 }
6862 // EPOCH(x) expression -> target-specific epoch conversion
6863 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
6864 Action::EpochConvert
6865 }
6866 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
6867 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
6868 Action::EpochMsConvert
6869 }
6870 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
6871 Expression::StringAgg(_) => {
6872 if matches!(
6873 target,
6874 DialectType::MySQL
6875 | DialectType::SingleStore
6876 | DialectType::Doris
6877 | DialectType::StarRocks
6878 | DialectType::SQLite
6879 ) {
6880 Action::StringAggConvert
6881 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
6882 Action::StringAggConvert
6883 } else {
6884 Action::None
6885 }
6886 }
6887 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
6888 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
6889 Expression::GroupConcat(_) => Action::GroupConcatConvert,
6890 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
6891 // DuckDB CARDINALITY -> keep as CARDINALITY for DuckDB target (used for maps)
6892 Expression::Cardinality(_)
6893 if matches!(source, DialectType::DuckDB)
6894 && matches!(target, DialectType::DuckDB) =>
6895 {
6896 Action::None
6897 }
6898 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
6899 Action::ArrayLengthConvert
6900 }
6901 Expression::ArraySize(_) => {
6902 if matches!(target, DialectType::Drill) {
6903 Action::ArraySizeDrill
6904 } else {
6905 Action::ArrayLengthConvert
6906 }
6907 }
6908 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
6909 Expression::ArrayRemove(_) => match target {
6910 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
6911 Action::ArrayRemoveConvert
6912 }
6913 _ => Action::None,
6914 },
6915 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
6916 Expression::ArrayReverse(_) => match target {
6917 DialectType::ClickHouse => Action::ArrayReverseConvert,
6918 _ => Action::None,
6919 },
6920 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
6921 Expression::JsonKeys(_) => match target {
6922 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
6923 Action::JsonKeysConvert
6924 }
6925 _ => Action::None,
6926 },
6927 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
6928 Expression::ParseJson(_) => match target {
6929 DialectType::SQLite
6930 | DialectType::Doris
6931 | DialectType::MySQL
6932 | DialectType::StarRocks => Action::ParseJsonStrip,
6933 _ => Action::None,
6934 },
6935 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
6936 Expression::WeekOfYear(_)
6937 if matches!(target, DialectType::Snowflake)
6938 && !matches!(source, DialectType::Snowflake) =>
6939 {
6940 Action::WeekOfYearToWeekIso
6941 }
6942 // NVL: clear original_name so generator uses dialect-specific function names
6943 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
6944 // XOR: expand for dialects that don't support the XOR keyword
6945 Expression::Xor(_) => {
6946 let target_supports_xor = matches!(
6947 target,
6948 DialectType::MySQL
6949 | DialectType::SingleStore
6950 | DialectType::Doris
6951 | DialectType::StarRocks
6952 );
6953 if !target_supports_xor {
6954 Action::XorExpand
6955 } else {
6956 Action::None
6957 }
6958 }
6959 // TSQL #table -> temp table normalization (CREATE TABLE)
6960 Expression::CreateTable(ct)
6961 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6962 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6963 && ct.name.name.name.starts_with('#') =>
6964 {
6965 Action::TempTableHash
6966 }
6967 // TSQL #table -> strip # from table references in SELECT/etc.
6968 Expression::Table(tr)
6969 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6970 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6971 && tr.name.name.starts_with('#') =>
6972 {
6973 Action::TempTableHash
6974 }
6975 // TSQL #table -> strip # from DROP TABLE names
6976 Expression::DropTable(ref dt)
6977 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6978 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6979 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
6980 {
6981 Action::TempTableHash
6982 }
6983 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6984 Expression::JsonExtract(_)
6985 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6986 {
6987 Action::JsonExtractToTsql
6988 }
6989 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6990 Expression::JsonExtractScalar(_)
6991 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
6992 {
6993 Action::JsonExtractToTsql
6994 }
6995 // JSON_EXTRACT -> JSONExtractString for ClickHouse
6996 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
6997 Action::JsonExtractToClickHouse
6998 }
6999 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
7000 Expression::JsonExtractScalar(_)
7001 if matches!(target, DialectType::ClickHouse) =>
7002 {
7003 Action::JsonExtractToClickHouse
7004 }
7005 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
7006 Expression::JsonExtract(ref f)
7007 if !f.arrow_syntax
7008 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
7009 {
7010 Action::JsonExtractToArrow
7011 }
7012 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
7013 Expression::JsonExtract(ref f)
7014 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
7015 && !matches!(
7016 source,
7017 DialectType::PostgreSQL
7018 | DialectType::Redshift
7019 | DialectType::Materialize
7020 )
7021 && matches!(&f.path, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$'))) =>
7022 {
7023 Action::JsonExtractToGetJsonObject
7024 }
7025 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
7026 Expression::JsonExtract(_)
7027 if matches!(
7028 target,
7029 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7030 ) =>
7031 {
7032 Action::JsonExtractToGetJsonObject
7033 }
7034 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
7035 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
7036 Expression::JsonExtractScalar(ref f)
7037 if !f.arrow_syntax
7038 && !f.hash_arrow_syntax
7039 && matches!(
7040 target,
7041 DialectType::PostgreSQL
7042 | DialectType::Redshift
7043 | DialectType::Snowflake
7044 | DialectType::SQLite
7045 | DialectType::DuckDB
7046 ) =>
7047 {
7048 Action::JsonExtractScalarConvert
7049 }
7050 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
7051 Expression::JsonExtractScalar(_)
7052 if matches!(
7053 target,
7054 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7055 ) =>
7056 {
7057 Action::JsonExtractScalarToGetJsonObject
7058 }
7059 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
7060 Expression::JsonExtract(ref f)
7061 if !f.arrow_syntax
7062 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
7063 {
7064 Action::JsonPathNormalize
7065 }
7066 // JsonQuery (parsed JSON_QUERY) -> target-specific
7067 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
7068 // JsonValue (parsed JSON_VALUE) -> target-specific
7069 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
7070 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
7071 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
7072 Expression::AtTimeZone(_)
7073 if matches!(
7074 target,
7075 DialectType::Presto
7076 | DialectType::Trino
7077 | DialectType::Athena
7078 | DialectType::Spark
7079 | DialectType::Databricks
7080 | DialectType::BigQuery
7081 | DialectType::Snowflake
7082 ) =>
7083 {
7084 Action::AtTimeZoneConvert
7085 }
7086 // DAY_OF_WEEK -> dialect-specific
7087 Expression::DayOfWeek(_)
7088 if matches!(
7089 target,
7090 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
7091 ) =>
7092 {
7093 Action::DayOfWeekConvert
7094 }
7095 // CURRENT_USER -> CURRENT_USER() for Snowflake
7096 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
7097 Action::CurrentUserParens
7098 }
7099 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
7100 Expression::ElementAt(_)
7101 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
7102 {
7103 Action::ElementAtConvert
7104 }
7105 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
7106 Expression::ArrayFunc(ref arr)
7107 if !arr.bracket_notation
7108 && matches!(
7109 target,
7110 DialectType::Spark
7111 | DialectType::Databricks
7112 | DialectType::Hive
7113 | DialectType::BigQuery
7114 | DialectType::DuckDB
7115 | DialectType::Snowflake
7116 | DialectType::Presto
7117 | DialectType::Trino
7118 | DialectType::Athena
7119 | DialectType::ClickHouse
7120 | DialectType::StarRocks
7121 ) =>
7122 {
7123 Action::ArraySyntaxConvert
7124 }
7125 // VARIANCE expression -> varSamp for ClickHouse
7126 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
7127 Action::VarianceToClickHouse
7128 }
7129 // STDDEV expression -> stddevSamp for ClickHouse
7130 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
7131 Action::StddevToClickHouse
7132 }
7133 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
7134 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
7135 Action::ApproxQuantileConvert
7136 }
7137 // MonthsBetween -> target-specific
7138 Expression::MonthsBetween(_)
7139 if !matches!(
7140 target,
7141 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7142 ) =>
7143 {
7144 Action::MonthsBetweenConvert
7145 }
7146 // AddMonths -> target-specific DATEADD/DATE_ADD
7147 Expression::AddMonths(_) => Action::AddMonthsConvert,
7148 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
7149 Expression::MapFromArrays(_)
7150 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
7151 {
7152 Action::MapFromArraysConvert
7153 }
7154 // CURRENT_USER -> CURRENT_USER() for Spark
7155 Expression::CurrentUser(_)
7156 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
7157 {
7158 Action::CurrentUserSparkParens
7159 }
7160 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
7161 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
7162 if matches!(
7163 source,
7164 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7165 ) && matches!(&f.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
7166 && matches!(
7167 target,
7168 DialectType::DuckDB
7169 | DialectType::Presto
7170 | DialectType::Trino
7171 | DialectType::Athena
7172 | DialectType::PostgreSQL
7173 | DialectType::Redshift
7174 ) =>
7175 {
7176 Action::SparkDateFuncCast
7177 }
7178 // $parameter -> @parameter for BigQuery
7179 Expression::Parameter(ref p)
7180 if matches!(target, DialectType::BigQuery)
7181 && matches!(source, DialectType::DuckDB)
7182 && (p.style == crate::expressions::ParameterStyle::Dollar
7183 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
7184 {
7185 Action::DollarParamConvert
7186 }
7187 // EscapeString literal: normalize literal newlines to \n
7188 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::EscapeString(ref s) if s.contains('\n') || s.contains('\r') || s.contains('\t'))
7189 =>
7190 {
7191 Action::EscapeStringNormalize
7192 }
7193 // straight_join: keep lowercase for DuckDB, quote for MySQL
7194 Expression::Column(ref col)
7195 if col.name.name == "STRAIGHT_JOIN"
7196 && col.table.is_none()
7197 && matches!(source, DialectType::DuckDB)
7198 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
7199 {
7200 Action::StraightJoinCase
7201 }
7202 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
7203 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
7204 Expression::Interval(ref iv)
7205 if matches!(
7206 target,
7207 DialectType::Snowflake
7208 | DialectType::PostgreSQL
7209 | DialectType::Redshift
7210 ) && iv.unit.is_some()
7211 && iv.this.as_ref().map_or(false, |t| matches!(t, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) =>
7212 {
7213 Action::SnowflakeIntervalFormat
7214 }
7215 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
7216 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
7217 if let Some(ref sample) = ts.sample {
7218 if !sample.explicit_method {
7219 Action::TablesampleReservoir
7220 } else {
7221 Action::None
7222 }
7223 } else {
7224 Action::None
7225 }
7226 }
7227 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
7228 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
7229 Expression::TableSample(ref ts)
7230 if matches!(target, DialectType::Snowflake)
7231 && !matches!(source, DialectType::Snowflake)
7232 && ts.sample.is_some() =>
7233 {
7234 if let Some(ref sample) = ts.sample {
7235 if !sample.explicit_method {
7236 Action::TablesampleSnowflakeStrip
7237 } else {
7238 Action::None
7239 }
7240 } else {
7241 Action::None
7242 }
7243 }
7244 Expression::Table(ref t)
7245 if matches!(target, DialectType::Snowflake)
7246 && !matches!(source, DialectType::Snowflake)
7247 && t.table_sample.is_some() =>
7248 {
7249 if let Some(ref sample) = t.table_sample {
7250 if !sample.explicit_method {
7251 Action::TablesampleSnowflakeStrip
7252 } else {
7253 Action::None
7254 }
7255 } else {
7256 Action::None
7257 }
7258 }
7259 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
7260 Expression::AlterTable(ref at)
7261 if matches!(target, DialectType::TSQL | DialectType::Fabric)
7262 && !at.actions.is_empty()
7263 && matches!(
7264 at.actions.first(),
7265 Some(crate::expressions::AlterTableAction::RenameTable(_))
7266 ) =>
7267 {
7268 Action::AlterTableToSpRename
7269 }
7270 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
7271 Expression::Subscript(ref sub)
7272 if matches!(
7273 target,
7274 DialectType::BigQuery
7275 | DialectType::Hive
7276 | DialectType::Spark
7277 | DialectType::Databricks
7278 ) && matches!(
7279 source,
7280 DialectType::DuckDB
7281 | DialectType::PostgreSQL
7282 | DialectType::Presto
7283 | DialectType::Trino
7284 | DialectType::Redshift
7285 | DialectType::ClickHouse
7286 ) && matches!(&sub.index, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(ref n) if n.parse::<i64>().unwrap_or(0) > 0)) =>
7287 {
7288 Action::ArrayIndexConvert
7289 }
7290 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
7291 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
7292 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
7293 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
7294 Expression::WindowFunction(ref wf) => {
7295 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
7296 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
7297 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
7298 if matches!(target, DialectType::BigQuery)
7299 && !is_row_number
7300 && !wf.over.order_by.is_empty()
7301 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
7302 {
7303 Action::BigQueryNullsOrdering
7304 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
7305 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
7306 } else {
7307 let source_nulls_last = matches!(source, DialectType::DuckDB);
7308 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
7309 matches!(
7310 f.kind,
7311 crate::expressions::WindowFrameKind::Range
7312 | crate::expressions::WindowFrameKind::Groups
7313 )
7314 });
7315 if source_nulls_last
7316 && matches!(target, DialectType::MySQL)
7317 && !wf.over.order_by.is_empty()
7318 && wf.over.order_by.iter().any(|o| !o.desc)
7319 && !has_range_frame
7320 {
7321 Action::MysqlNullsLastRewrite
7322 } else {
7323 // Check for Snowflake window frame handling for FIRST_VALUE/LAST_VALUE/NTH_VALUE
7324 let is_ranking_window_func = matches!(
7325 &wf.this,
7326 Expression::FirstValue(_)
7327 | Expression::LastValue(_)
7328 | Expression::NthValue(_)
7329 );
7330 let has_full_unbounded_frame = wf.over.frame.as_ref().map_or(false, |f| {
7331 matches!(f.kind, crate::expressions::WindowFrameKind::Rows)
7332 && matches!(f.start, crate::expressions::WindowFrameBound::UnboundedPreceding)
7333 && matches!(f.end, Some(crate::expressions::WindowFrameBound::UnboundedFollowing))
7334 && f.exclude.is_none()
7335 });
7336 if is_ranking_window_func && matches!(source, DialectType::Snowflake) {
7337 if has_full_unbounded_frame && matches!(target, DialectType::Snowflake) {
7338 // Strip the default frame for Snowflake target
7339 Action::SnowflakeWindowFrameStrip
7340 } else if !has_full_unbounded_frame && wf.over.frame.is_none() && !matches!(target, DialectType::Snowflake) {
7341 // Add default frame for non-Snowflake target
7342 Action::SnowflakeWindowFrameAdd
7343 } else {
7344 match &wf.this {
7345 Expression::FirstValue(ref vf)
7346 | Expression::LastValue(ref vf)
7347 if vf.ignore_nulls == Some(false) =>
7348 {
7349 match target {
7350 DialectType::SQLite => Action::RespectNullsConvert,
7351 _ => Action::None,
7352 }
7353 }
7354 _ => Action::None,
7355 }
7356 }
7357 } else {
7358 match &wf.this {
7359 Expression::FirstValue(ref vf)
7360 | Expression::LastValue(ref vf)
7361 if vf.ignore_nulls == Some(false) =>
7362 {
7363 // RESPECT NULLS
7364 match target {
7365 DialectType::SQLite => Action::RespectNullsConvert,
7366 _ => Action::None,
7367 }
7368 }
7369 _ => Action::None,
7370 }
7371 }
7372 }
7373 }
7374 }
7375 // CREATE TABLE a LIKE b -> dialect-specific transformations
7376 Expression::CreateTable(ref ct)
7377 if ct.columns.is_empty()
7378 && ct.constraints.iter().any(|c| {
7379 matches!(c, crate::expressions::TableConstraint::Like { .. })
7380 })
7381 && matches!(
7382 target,
7383 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
7384 ) =>
7385 {
7386 Action::CreateTableLikeToCtas
7387 }
7388 Expression::CreateTable(ref ct)
7389 if ct.columns.is_empty()
7390 && ct.constraints.iter().any(|c| {
7391 matches!(c, crate::expressions::TableConstraint::Like { .. })
7392 })
7393 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
7394 {
7395 Action::CreateTableLikeToSelectInto
7396 }
7397 Expression::CreateTable(ref ct)
7398 if ct.columns.is_empty()
7399 && ct.constraints.iter().any(|c| {
7400 matches!(c, crate::expressions::TableConstraint::Like { .. })
7401 })
7402 && matches!(target, DialectType::ClickHouse) =>
7403 {
7404 Action::CreateTableLikeToAs
7405 }
7406 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
7407 Expression::CreateTable(ref ct)
7408 if matches!(target, DialectType::DuckDB)
7409 && matches!(
7410 source,
7411 DialectType::DuckDB
7412 | DialectType::Spark
7413 | DialectType::Databricks
7414 | DialectType::Hive
7415 ) =>
7416 {
7417 let has_comment = ct.columns.iter().any(|c| {
7418 c.comment.is_some()
7419 || c.constraints.iter().any(|con| {
7420 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
7421 })
7422 });
7423 let has_props = !ct.properties.is_empty();
7424 if has_comment || has_props {
7425 Action::CreateTableStripComment
7426 } else {
7427 Action::None
7428 }
7429 }
7430 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
7431 Expression::Array(_)
7432 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
7433 {
7434 Action::ArrayConcatBracketConvert
7435 }
7436 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
7437 Expression::ArrayFunc(ref arr)
7438 if arr.bracket_notation
7439 && matches!(source, DialectType::BigQuery)
7440 && matches!(target, DialectType::Redshift) =>
7441 {
7442 Action::ArrayConcatBracketConvert
7443 }
7444 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
7445 Expression::BitwiseOrAgg(ref f)
7446 | Expression::BitwiseAndAgg(ref f)
7447 | Expression::BitwiseXorAgg(ref f) => {
7448 if matches!(target, DialectType::DuckDB) {
7449 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
7450 if let Expression::Cast(ref c) = f.this {
7451 match &c.to {
7452 DataType::Float { .. }
7453 | DataType::Double { .. }
7454 | DataType::Decimal { .. } => Action::BitAggFloatCast,
7455 DataType::Custom { ref name }
7456 if name.eq_ignore_ascii_case("REAL") =>
7457 {
7458 Action::BitAggFloatCast
7459 }
7460 _ => Action::None,
7461 }
7462 } else {
7463 Action::None
7464 }
7465 } else if matches!(target, DialectType::Snowflake) {
7466 Action::BitAggSnowflakeRename
7467 } else {
7468 Action::None
7469 }
7470 }
7471 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
7472 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
7473 Action::FilterToIff
7474 }
7475 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
7476 Expression::Avg(ref f)
7477 | Expression::Sum(ref f)
7478 | Expression::Min(ref f)
7479 | Expression::Max(ref f)
7480 | Expression::CountIf(ref f)
7481 | Expression::Stddev(ref f)
7482 | Expression::StddevPop(ref f)
7483 | Expression::StddevSamp(ref f)
7484 | Expression::Variance(ref f)
7485 | Expression::VarPop(ref f)
7486 | Expression::VarSamp(ref f)
7487 | Expression::Median(ref f)
7488 | Expression::Mode(ref f)
7489 | Expression::First(ref f)
7490 | Expression::Last(ref f)
7491 | Expression::ApproxDistinct(ref f)
7492 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7493 {
7494 Action::AggFilterToIff
7495 }
7496 Expression::Count(ref c)
7497 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
7498 {
7499 Action::AggFilterToIff
7500 }
7501 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
7502 Expression::Count(ref c)
7503 if c.distinct
7504 && matches!(&c.this, Some(Expression::Tuple(_)))
7505 && matches!(
7506 target,
7507 DialectType::Presto
7508 | DialectType::Trino
7509 | DialectType::DuckDB
7510 | DialectType::PostgreSQL
7511 ) =>
7512 {
7513 Action::CountDistinctMultiArg
7514 }
7515 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
7516 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
7517 Action::JsonToGetPath
7518 }
7519 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
7520 Expression::Struct(_)
7521 if matches!(
7522 target,
7523 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7524 ) && matches!(source, DialectType::DuckDB) =>
7525 {
7526 Action::StructToRow
7527 }
7528 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
7529 Expression::MapFunc(ref m)
7530 if m.curly_brace_syntax
7531 && matches!(
7532 target,
7533 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
7534 )
7535 && matches!(source, DialectType::DuckDB) =>
7536 {
7537 Action::StructToRow
7538 }
7539 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
7540 Expression::ApproxCountDistinct(_)
7541 if matches!(
7542 target,
7543 DialectType::Presto | DialectType::Trino | DialectType::Athena
7544 ) =>
7545 {
7546 Action::ApproxCountDistinctToApproxDistinct
7547 }
7548 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
7549 Expression::ArrayContains(_)
7550 if matches!(
7551 target,
7552 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
7553 ) && !(matches!(source, DialectType::Snowflake) && matches!(target, DialectType::Snowflake)) =>
7554 {
7555 Action::ArrayContainsConvert
7556 }
7557 // ARRAY_CONTAINS -> DuckDB NULL-aware CASE (from Snowflake source with check_null semantics)
7558 Expression::ArrayContains(_)
7559 if matches!(target, DialectType::DuckDB)
7560 && matches!(source, DialectType::Snowflake) =>
7561 {
7562 Action::ArrayContainsDuckDBConvert
7563 }
7564 // ARRAY_EXCEPT -> target-specific conversion
7565 Expression::ArrayExcept(_)
7566 if matches!(
7567 target,
7568 DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena
7569 ) =>
7570 {
7571 Action::ArrayExceptConvert
7572 }
7573 // ARRAY_POSITION -> swap args for Snowflake target (only when source is not Snowflake)
7574 Expression::ArrayPosition(_)
7575 if matches!(target, DialectType::Snowflake)
7576 && !matches!(source, DialectType::Snowflake) =>
7577 {
7578 Action::ArrayPositionSnowflakeSwap
7579 }
7580 // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB from Snowflake source
7581 Expression::ArrayPosition(_)
7582 if matches!(target, DialectType::DuckDB)
7583 && matches!(source, DialectType::Snowflake) =>
7584 {
7585 Action::SnowflakeArrayPositionToDuckDB
7586 }
7587 // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
7588 Expression::ArrayDistinct(_)
7589 if matches!(target, DialectType::ClickHouse) =>
7590 {
7591 Action::ArrayDistinctClickHouse
7592 }
7593 // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
7594 Expression::ArrayDistinct(_)
7595 if matches!(target, DialectType::DuckDB)
7596 && matches!(source, DialectType::Snowflake) =>
7597 {
7598 Action::ArrayDistinctConvert
7599 }
7600 // StrPosition with position -> complex expansion for Presto/DuckDB
7601 // STRPOS doesn't support a position arg in these dialects
7602 Expression::StrPosition(ref sp)
7603 if sp.position.is_some()
7604 && matches!(
7605 target,
7606 DialectType::Presto
7607 | DialectType::Trino
7608 | DialectType::Athena
7609 | DialectType::DuckDB
7610 ) =>
7611 {
7612 Action::StrPositionExpand
7613 }
7614 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
7615 Expression::First(ref f)
7616 if f.ignore_nulls == Some(true)
7617 && matches!(target, DialectType::DuckDB) =>
7618 {
7619 Action::FirstToAnyValue
7620 }
7621 // BEGIN -> START TRANSACTION for Presto/Trino
7622 Expression::Command(ref cmd)
7623 if cmd.this.eq_ignore_ascii_case("BEGIN")
7624 && matches!(
7625 target,
7626 DialectType::Presto | DialectType::Trino | DialectType::Athena
7627 ) =>
7628 {
7629 // Handled inline below
7630 Action::None // We'll handle it directly
7631 }
7632 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
7633 // PostgreSQL # is parsed as BitwiseXor (which is correct).
7634 // a || b (Concat operator) -> CONCAT function for Presto/Trino
7635 Expression::Concat(ref _op)
7636 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7637 && matches!(target, DialectType::Presto | DialectType::Trino) =>
7638 {
7639 Action::PipeConcatToConcat
7640 }
7641 _ => Action::None,
7642 }
7643 };
7644
7645 match action {
7646 Action::None => {
7647 // Handle inline transforms that don't need a dedicated action
7648
7649 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
7650 if let Expression::Between(ref b) = e {
7651 if let Some(sym) = b.symmetric {
7652 let keeps_symmetric =
7653 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
7654 if !keeps_symmetric {
7655 if sym {
7656 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
7657 let b = if let Expression::Between(b) = e {
7658 *b
7659 } else {
7660 unreachable!()
7661 };
7662 let between1 = Expression::Between(Box::new(
7663 crate::expressions::Between {
7664 this: b.this.clone(),
7665 low: b.low.clone(),
7666 high: b.high.clone(),
7667 not: b.not,
7668 symmetric: None,
7669 },
7670 ));
7671 let between2 = Expression::Between(Box::new(
7672 crate::expressions::Between {
7673 this: b.this,
7674 low: b.high,
7675 high: b.low,
7676 not: b.not,
7677 symmetric: None,
7678 },
7679 ));
7680 return Ok(Expression::Paren(Box::new(
7681 crate::expressions::Paren {
7682 this: Expression::Or(Box::new(
7683 crate::expressions::BinaryOp::new(
7684 between1, between2,
7685 ),
7686 )),
7687 trailing_comments: vec![],
7688 },
7689 )));
7690 } else {
7691 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
7692 let b = if let Expression::Between(b) = e {
7693 *b
7694 } else {
7695 unreachable!()
7696 };
7697 return Ok(Expression::Between(Box::new(
7698 crate::expressions::Between {
7699 this: b.this,
7700 low: b.low,
7701 high: b.high,
7702 not: b.not,
7703 symmetric: None,
7704 },
7705 )));
7706 }
7707 }
7708 }
7709 }
7710
7711 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
7712 if let Expression::ILike(ref _like) = e {
7713 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
7714 let like = if let Expression::ILike(l) = e {
7715 *l
7716 } else {
7717 unreachable!()
7718 };
7719 let lower_left = Expression::Function(Box::new(Function::new(
7720 "LOWER".to_string(),
7721 vec![like.left],
7722 )));
7723 let lower_right = Expression::Function(Box::new(Function::new(
7724 "LOWER".to_string(),
7725 vec![like.right],
7726 )));
7727 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
7728 left: lower_left,
7729 right: lower_right,
7730 escape: like.escape,
7731 quantifier: like.quantifier,
7732 inferred_type: None,
7733 })));
7734 }
7735 }
7736
7737 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
7738 if let Expression::MethodCall(ref mc) = e {
7739 if matches!(source, DialectType::Oracle)
7740 && mc.method.name.eq_ignore_ascii_case("VALUE")
7741 && mc.args.is_empty()
7742 {
7743 let is_dbms_random = match &mc.this {
7744 Expression::Identifier(id) => {
7745 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
7746 }
7747 Expression::Column(col) => {
7748 col.table.is_none()
7749 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
7750 }
7751 _ => false,
7752 };
7753 if is_dbms_random {
7754 let func_name = match target {
7755 DialectType::PostgreSQL
7756 | DialectType::Redshift
7757 | DialectType::DuckDB
7758 | DialectType::SQLite => "RANDOM",
7759 DialectType::Oracle => "DBMS_RANDOM.VALUE",
7760 _ => "RAND",
7761 };
7762 return Ok(Expression::Function(Box::new(Function::new(
7763 func_name.to_string(),
7764 vec![],
7765 ))));
7766 }
7767 }
7768 }
7769 // TRIM without explicit position -> add BOTH for ClickHouse
7770 if let Expression::Trim(ref trim) = e {
7771 if matches!(target, DialectType::ClickHouse)
7772 && trim.sql_standard_syntax
7773 && trim.characters.is_some()
7774 && !trim.position_explicit
7775 {
7776 let mut new_trim = (**trim).clone();
7777 new_trim.position_explicit = true;
7778 return Ok(Expression::Trim(Box::new(new_trim)));
7779 }
7780 }
7781 // BEGIN -> START TRANSACTION for Presto/Trino
7782 if let Expression::Transaction(ref txn) = e {
7783 if matches!(
7784 target,
7785 DialectType::Presto | DialectType::Trino | DialectType::Athena
7786 ) {
7787 // Convert BEGIN to START TRANSACTION by setting mark to "START"
7788 let mut txn = txn.clone();
7789 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
7790 "START".to_string(),
7791 ))));
7792 return Ok(Expression::Transaction(Box::new(*txn)));
7793 }
7794 }
7795 // IS TRUE/FALSE -> simplified forms for Presto/Trino
7796 if matches!(
7797 target,
7798 DialectType::Presto | DialectType::Trino | DialectType::Athena
7799 ) {
7800 match &e {
7801 Expression::IsTrue(itf) if !itf.not => {
7802 // x IS TRUE -> x
7803 return Ok(itf.this.clone());
7804 }
7805 Expression::IsTrue(itf) if itf.not => {
7806 // x IS NOT TRUE -> NOT x
7807 return Ok(Expression::Not(Box::new(
7808 crate::expressions::UnaryOp {
7809 this: itf.this.clone(),
7810 inferred_type: None,
7811 },
7812 )));
7813 }
7814 Expression::IsFalse(itf) if !itf.not => {
7815 // x IS FALSE -> NOT x
7816 return Ok(Expression::Not(Box::new(
7817 crate::expressions::UnaryOp {
7818 this: itf.this.clone(),
7819 inferred_type: None,
7820 },
7821 )));
7822 }
7823 Expression::IsFalse(itf) if itf.not => {
7824 // x IS NOT FALSE -> NOT NOT x
7825 let not_x =
7826 Expression::Not(Box::new(crate::expressions::UnaryOp {
7827 this: itf.this.clone(),
7828 inferred_type: None,
7829 }));
7830 return Ok(Expression::Not(Box::new(
7831 crate::expressions::UnaryOp {
7832 this: not_x,
7833 inferred_type: None,
7834 },
7835 )));
7836 }
7837 _ => {}
7838 }
7839 }
7840 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
7841 if matches!(target, DialectType::Redshift) {
7842 if let Expression::IsFalse(ref itf) = e {
7843 if itf.not {
7844 return Ok(Expression::Not(Box::new(
7845 crate::expressions::UnaryOp {
7846 this: Expression::IsFalse(Box::new(
7847 crate::expressions::IsTrueFalse {
7848 this: itf.this.clone(),
7849 not: false,
7850 },
7851 )),
7852 inferred_type: None,
7853 },
7854 )));
7855 }
7856 }
7857 }
7858 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
7859 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
7860 if let Expression::Function(ref f) = e {
7861 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
7862 && matches!(source, DialectType::Snowflake)
7863 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
7864 {
7865 if f.args.len() == 3 {
7866 let mut args = f.args.clone();
7867 args.push(Expression::string("g"));
7868 return Ok(Expression::Function(Box::new(Function::new(
7869 "REGEXP_REPLACE".to_string(),
7870 args,
7871 ))));
7872 } else if f.args.len() == 4 {
7873 // 4th arg might be position, add 'g' as 5th
7874 let mut args = f.args.clone();
7875 args.push(Expression::string("g"));
7876 return Ok(Expression::Function(Box::new(Function::new(
7877 "REGEXP_REPLACE".to_string(),
7878 args,
7879 ))));
7880 }
7881 }
7882 }
7883 Ok(e)
7884 }
7885
7886 Action::GreatestLeastNull => {
7887 let f = if let Expression::Function(f) = e {
7888 *f
7889 } else {
7890 unreachable!("action only triggered for Function expressions")
7891 };
7892 let mut null_checks: Vec<Expression> = f
7893 .args
7894 .iter()
7895 .map(|a| {
7896 Expression::IsNull(Box::new(IsNull {
7897 this: a.clone(),
7898 not: false,
7899 postfix_form: false,
7900 }))
7901 })
7902 .collect();
7903 let condition = if null_checks.len() == 1 {
7904 null_checks.remove(0)
7905 } else {
7906 let first = null_checks.remove(0);
7907 null_checks.into_iter().fold(first, |acc, check| {
7908 Expression::Or(Box::new(BinaryOp::new(acc, check)))
7909 })
7910 };
7911 Ok(Expression::Case(Box::new(Case {
7912 operand: None,
7913 whens: vec![(condition, Expression::Null(Null))],
7914 else_: Some(Expression::Function(Box::new(Function::new(
7915 f.name, f.args,
7916 )))),
7917 comments: Vec::new(),
7918 inferred_type: None,
7919 })))
7920 }
7921
7922 Action::ArrayGenerateRange => {
7923 let f = if let Expression::Function(f) = e {
7924 *f
7925 } else {
7926 unreachable!("action only triggered for Function expressions")
7927 };
7928 let start = f.args[0].clone();
7929 let end = f.args[1].clone();
7930 let step = f.args.get(2).cloned();
7931
7932 // Helper: compute end - 1 for converting exclusive→inclusive end.
7933 // When end is a literal number, simplify to a computed literal.
7934 fn exclusive_to_inclusive_end(end: &Expression) -> Expression {
7935 // Try to simplify literal numbers
7936 match end {
7937 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
7938 let Literal::Number(n) = lit.as_ref() else { unreachable!() };
7939 if let Ok(val) = n.parse::<i64>() {
7940 return Expression::number(val - 1);
7941 }
7942 }
7943 Expression::Neg(u) => {
7944 if let Expression::Literal(lit) = &u.this {
7945 if let Literal::Number(n) = lit.as_ref() {
7946 if let Ok(val) = n.parse::<i64>() {
7947 return Expression::number(-val - 1);
7948 }
7949 }
7950 }
7951 }
7952 _ => {}
7953 }
7954 // Non-literal: produce end - 1 expression
7955 Expression::Sub(Box::new(BinaryOp::new(
7956 end.clone(),
7957 Expression::number(1),
7958 )))
7959 }
7960
7961 match target {
7962 // Snowflake ARRAY_GENERATE_RANGE and DuckDB RANGE both use exclusive end,
7963 // so no adjustment needed — just rename the function.
7964 DialectType::Snowflake => {
7965 let mut args = vec![start, end];
7966 if let Some(s) = step {
7967 args.push(s);
7968 }
7969 Ok(Expression::Function(Box::new(Function::new(
7970 "ARRAY_GENERATE_RANGE".to_string(),
7971 args,
7972 ))))
7973 }
7974 DialectType::DuckDB => {
7975 let mut args = vec![start, end];
7976 if let Some(s) = step {
7977 args.push(s);
7978 }
7979 Ok(Expression::Function(Box::new(Function::new(
7980 "RANGE".to_string(),
7981 args,
7982 ))))
7983 }
7984 // These dialects use inclusive end, so convert exclusive→inclusive.
7985 // Presto/Trino: simplify literal numbers (3 → 2).
7986 DialectType::Presto | DialectType::Trino => {
7987 let end_inclusive = exclusive_to_inclusive_end(&end);
7988 let mut args = vec![start, end_inclusive];
7989 if let Some(s) = step {
7990 args.push(s);
7991 }
7992 Ok(Expression::Function(Box::new(Function::new(
7993 "SEQUENCE".to_string(),
7994 args,
7995 ))))
7996 }
7997 // PostgreSQL, Redshift, BigQuery: keep as end - 1 expression form.
7998 DialectType::PostgreSQL | DialectType::Redshift => {
7999 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
8000 end.clone(),
8001 Expression::number(1),
8002 )));
8003 let mut args = vec![start, end_minus_1];
8004 if let Some(s) = step {
8005 args.push(s);
8006 }
8007 Ok(Expression::Function(Box::new(Function::new(
8008 "GENERATE_SERIES".to_string(),
8009 args,
8010 ))))
8011 }
8012 DialectType::BigQuery => {
8013 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
8014 end.clone(),
8015 Expression::number(1),
8016 )));
8017 let mut args = vec![start, end_minus_1];
8018 if let Some(s) = step {
8019 args.push(s);
8020 }
8021 Ok(Expression::Function(Box::new(Function::new(
8022 "GENERATE_ARRAY".to_string(),
8023 args,
8024 ))))
8025 }
8026 _ => Ok(Expression::Function(Box::new(Function::new(
8027 f.name, f.args,
8028 )))),
8029 }
8030 }
8031
8032 Action::Div0TypedDivision => {
8033 let if_func = if let Expression::IfFunc(f) = e {
8034 *f
8035 } else {
8036 unreachable!("action only triggered for IfFunc expressions")
8037 };
8038 if let Some(Expression::Div(div)) = if_func.false_value {
8039 let cast_type = if matches!(target, DialectType::SQLite) {
8040 DataType::Float {
8041 precision: None,
8042 scale: None,
8043 real_spelling: true,
8044 }
8045 } else {
8046 DataType::Double {
8047 precision: None,
8048 scale: None,
8049 }
8050 };
8051 let casted_left = Expression::Cast(Box::new(Cast {
8052 this: div.left,
8053 to: cast_type,
8054 trailing_comments: vec![],
8055 double_colon_syntax: false,
8056 format: None,
8057 default: None,
8058 inferred_type: None,
8059 }));
8060 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
8061 condition: if_func.condition,
8062 true_value: if_func.true_value,
8063 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
8064 casted_left,
8065 div.right,
8066 )))),
8067 original_name: if_func.original_name,
8068 inferred_type: None,
8069 })))
8070 } else {
8071 // Not actually a Div, reconstruct
8072 Ok(Expression::IfFunc(Box::new(if_func)))
8073 }
8074 }
8075
8076 Action::ArrayAggCollectList => {
8077 let agg = if let Expression::ArrayAgg(a) = e {
8078 *a
8079 } else {
8080 unreachable!("action only triggered for ArrayAgg expressions")
8081 };
8082 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8083 name: Some("COLLECT_LIST".to_string()),
8084 ..agg
8085 })))
8086 }
8087
8088 Action::ArrayAggToGroupConcat => {
8089 let agg = if let Expression::ArrayAgg(a) = e {
8090 *a
8091 } else {
8092 unreachable!("action only triggered for ArrayAgg expressions")
8093 };
8094 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8095 name: Some("GROUP_CONCAT".to_string()),
8096 ..agg
8097 })))
8098 }
8099
8100 Action::ArrayAggWithinGroupFilter => {
8101 let wg = if let Expression::WithinGroup(w) = e {
8102 *w
8103 } else {
8104 unreachable!("action only triggered for WithinGroup expressions")
8105 };
8106 if let Expression::ArrayAgg(inner_agg) = wg.this {
8107 let col = inner_agg.this.clone();
8108 let filter = Expression::IsNull(Box::new(IsNull {
8109 this: col,
8110 not: true,
8111 postfix_form: false,
8112 }));
8113 // For DuckDB, add explicit NULLS FIRST for DESC ordering
8114 let order_by = if matches!(target, DialectType::DuckDB) {
8115 wg.order_by
8116 .into_iter()
8117 .map(|mut o| {
8118 if o.desc && o.nulls_first.is_none() {
8119 o.nulls_first = Some(true);
8120 }
8121 o
8122 })
8123 .collect()
8124 } else {
8125 wg.order_by
8126 };
8127 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8128 this: inner_agg.this,
8129 distinct: inner_agg.distinct,
8130 filter: Some(filter),
8131 order_by,
8132 name: inner_agg.name,
8133 ignore_nulls: inner_agg.ignore_nulls,
8134 having_max: inner_agg.having_max,
8135 limit: inner_agg.limit,
8136 inferred_type: None,
8137 })))
8138 } else {
8139 Ok(Expression::WithinGroup(Box::new(wg)))
8140 }
8141 }
8142
8143 Action::ArrayAggFilter => {
8144 let agg = if let Expression::ArrayAgg(a) = e {
8145 *a
8146 } else {
8147 unreachable!("action only triggered for ArrayAgg expressions")
8148 };
8149 let col = agg.this.clone();
8150 let filter = Expression::IsNull(Box::new(IsNull {
8151 this: col,
8152 not: true,
8153 postfix_form: false,
8154 }));
8155 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8156 filter: Some(filter),
8157 ..agg
8158 })))
8159 }
8160
8161 Action::ArrayAggNullFilter => {
8162 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
8163 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
8164 let agg = if let Expression::ArrayAgg(a) = e {
8165 *a
8166 } else {
8167 unreachable!("action only triggered for ArrayAgg expressions")
8168 };
8169 let col = agg.this.clone();
8170 let not_null = Expression::IsNull(Box::new(IsNull {
8171 this: col,
8172 not: true,
8173 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
8174 }));
8175 let new_filter = if let Some(existing_filter) = agg.filter {
8176 // AND the NOT IS NULL with existing filter
8177 Expression::And(Box::new(crate::expressions::BinaryOp::new(
8178 existing_filter,
8179 not_null,
8180 )))
8181 } else {
8182 not_null
8183 };
8184 Ok(Expression::ArrayAgg(Box::new(AggFunc {
8185 filter: Some(new_filter),
8186 ..agg
8187 })))
8188 }
8189
8190 Action::BigQueryArraySelectAsStructToSnowflake => {
8191 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
8192 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
8193 if let Expression::Function(mut f) = e {
8194 let is_match = f.args.len() == 1
8195 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
8196 if is_match {
8197 let inner_select = match f.args.remove(0) {
8198 Expression::Select(s) => *s,
8199 _ => unreachable!(
8200 "argument already verified to be a Select expression"
8201 ),
8202 };
8203 // Build OBJECT_CONSTRUCT args from SELECT expressions
8204 let mut oc_args = Vec::new();
8205 for expr in &inner_select.expressions {
8206 match expr {
8207 Expression::Alias(a) => {
8208 let key = Expression::Literal(Box::new(Literal::String(
8209 a.alias.name.clone(),
8210 )));
8211 let value = a.this.clone();
8212 oc_args.push(key);
8213 oc_args.push(value);
8214 }
8215 Expression::Column(c) => {
8216 let key = Expression::Literal(Box::new(Literal::String(
8217 c.name.name.clone(),
8218 )));
8219 oc_args.push(key);
8220 oc_args.push(expr.clone());
8221 }
8222 _ => {
8223 oc_args.push(expr.clone());
8224 }
8225 }
8226 }
8227 let object_construct = Expression::Function(Box::new(Function::new(
8228 "OBJECT_CONSTRUCT".to_string(),
8229 oc_args,
8230 )));
8231 let array_agg = Expression::Function(Box::new(Function::new(
8232 "ARRAY_AGG".to_string(),
8233 vec![object_construct],
8234 )));
8235 let mut new_select = crate::expressions::Select::new();
8236 new_select.expressions = vec![array_agg];
8237 new_select.from = inner_select.from.clone();
8238 new_select.where_clause = inner_select.where_clause.clone();
8239 new_select.group_by = inner_select.group_by.clone();
8240 new_select.having = inner_select.having.clone();
8241 new_select.joins = inner_select.joins.clone();
8242 Ok(Expression::Subquery(Box::new(
8243 crate::expressions::Subquery {
8244 this: Expression::Select(Box::new(new_select)),
8245 alias: None,
8246 column_aliases: Vec::new(),
8247 order_by: None,
8248 limit: None,
8249 offset: None,
8250 distribute_by: None,
8251 sort_by: None,
8252 cluster_by: None,
8253 lateral: false,
8254 modifiers_inside: false,
8255 trailing_comments: Vec::new(),
8256 inferred_type: None,
8257 },
8258 )))
8259 } else {
8260 Ok(Expression::Function(f))
8261 }
8262 } else {
8263 Ok(e)
8264 }
8265 }
8266
8267 Action::BigQueryPercentileContToDuckDB => {
8268 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
8269 if let Expression::AggregateFunction(mut af) = e {
8270 af.name = "QUANTILE_CONT".to_string();
8271 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
8272 // Keep only first 2 args
8273 if af.args.len() > 2 {
8274 af.args.truncate(2);
8275 }
8276 Ok(Expression::AggregateFunction(af))
8277 } else {
8278 Ok(e)
8279 }
8280 }
8281
8282 Action::ArrayAggIgnoreNullsDuckDB => {
8283 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
8284 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
8285 let mut agg = if let Expression::ArrayAgg(a) = e {
8286 *a
8287 } else {
8288 unreachable!("action only triggered for ArrayAgg expressions")
8289 };
8290 agg.ignore_nulls = None; // Strip IGNORE NULLS
8291 if !agg.order_by.is_empty() {
8292 agg.order_by[0].nulls_first = Some(true);
8293 }
8294 Ok(Expression::ArrayAgg(Box::new(agg)))
8295 }
8296
8297 Action::CountDistinctMultiArg => {
8298 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
8299 if let Expression::Count(c) = e {
8300 if let Some(Expression::Tuple(t)) = c.this {
8301 let args = t.expressions;
8302 // Build CASE expression:
8303 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
8304 let mut whens = Vec::new();
8305 for arg in &args {
8306 whens.push((
8307 Expression::IsNull(Box::new(IsNull {
8308 this: arg.clone(),
8309 not: false,
8310 postfix_form: false,
8311 })),
8312 Expression::Null(crate::expressions::Null),
8313 ));
8314 }
8315 // Build the tuple for ELSE
8316 let tuple_expr =
8317 Expression::Tuple(Box::new(crate::expressions::Tuple {
8318 expressions: args,
8319 }));
8320 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
8321 operand: None,
8322 whens,
8323 else_: Some(tuple_expr),
8324 comments: Vec::new(),
8325 inferred_type: None,
8326 }));
8327 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
8328 this: Some(case_expr),
8329 star: false,
8330 distinct: true,
8331 filter: c.filter,
8332 ignore_nulls: c.ignore_nulls,
8333 original_name: c.original_name,
8334 inferred_type: None,
8335 })))
8336 } else {
8337 Ok(Expression::Count(c))
8338 }
8339 } else {
8340 Ok(e)
8341 }
8342 }
8343
8344 Action::CastTimestampToDatetime => {
8345 let c = if let Expression::Cast(c) = e {
8346 *c
8347 } else {
8348 unreachable!("action only triggered for Cast expressions")
8349 };
8350 Ok(Expression::Cast(Box::new(Cast {
8351 to: DataType::Custom {
8352 name: "DATETIME".to_string(),
8353 },
8354 ..c
8355 })))
8356 }
8357
8358 Action::CastTimestampStripTz => {
8359 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
8360 let c = if let Expression::Cast(c) = e {
8361 *c
8362 } else {
8363 unreachable!("action only triggered for Cast expressions")
8364 };
8365 Ok(Expression::Cast(Box::new(Cast {
8366 to: DataType::Timestamp {
8367 precision: None,
8368 timezone: false,
8369 },
8370 ..c
8371 })))
8372 }
8373
8374 Action::CastTimestamptzToFunc => {
8375 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
8376 let c = if let Expression::Cast(c) = e {
8377 *c
8378 } else {
8379 unreachable!("action only triggered for Cast expressions")
8380 };
8381 Ok(Expression::Function(Box::new(Function::new(
8382 "TIMESTAMP".to_string(),
8383 vec![c.this],
8384 ))))
8385 }
8386
8387 Action::ToDateToCast => {
8388 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
8389 if let Expression::Function(f) = e {
8390 let arg = f.args.into_iter().next().unwrap();
8391 Ok(Expression::Cast(Box::new(Cast {
8392 this: arg,
8393 to: DataType::Date,
8394 double_colon_syntax: false,
8395 trailing_comments: vec![],
8396 format: None,
8397 default: None,
8398 inferred_type: None,
8399 })))
8400 } else {
8401 Ok(e)
8402 }
8403 }
8404 Action::DateTruncWrapCast => {
8405 // Handle both Expression::DateTrunc/TimestampTrunc and
8406 // Expression::Function("DATE_TRUNC", [unit, expr])
8407 match e {
8408 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
8409 let input_type = match &d.this {
8410 Expression::Cast(c) => Some(c.to.clone()),
8411 _ => None,
8412 };
8413 if let Some(cast_type) = input_type {
8414 let is_time = matches!(cast_type, DataType::Time { .. });
8415 if is_time {
8416 let date_expr = Expression::Cast(Box::new(Cast {
8417 this: Expression::Literal(Box::new(crate::expressions::Literal::String(
8418 "1970-01-01".to_string(),
8419 ),)),
8420 to: DataType::Date,
8421 double_colon_syntax: false,
8422 trailing_comments: vec![],
8423 format: None,
8424 default: None,
8425 inferred_type: None,
8426 }));
8427 let add_expr =
8428 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
8429 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
8430 this: add_expr,
8431 unit: d.unit,
8432 }));
8433 Ok(Expression::Cast(Box::new(Cast {
8434 this: inner,
8435 to: cast_type,
8436 double_colon_syntax: false,
8437 trailing_comments: vec![],
8438 format: None,
8439 default: None,
8440 inferred_type: None,
8441 })))
8442 } else {
8443 let inner = Expression::DateTrunc(Box::new(*d));
8444 Ok(Expression::Cast(Box::new(Cast {
8445 this: inner,
8446 to: cast_type,
8447 double_colon_syntax: false,
8448 trailing_comments: vec![],
8449 format: None,
8450 default: None,
8451 inferred_type: None,
8452 })))
8453 }
8454 } else {
8455 Ok(Expression::DateTrunc(d))
8456 }
8457 }
8458 Expression::Function(f) if f.args.len() == 2 => {
8459 // Function-based DATE_TRUNC(unit, expr)
8460 let input_type = match &f.args[1] {
8461 Expression::Cast(c) => Some(c.to.clone()),
8462 _ => None,
8463 };
8464 if let Some(cast_type) = input_type {
8465 let is_time = matches!(cast_type, DataType::Time { .. });
8466 if is_time {
8467 let date_expr = Expression::Cast(Box::new(Cast {
8468 this: Expression::Literal(Box::new(crate::expressions::Literal::String(
8469 "1970-01-01".to_string(),
8470 ),)),
8471 to: DataType::Date,
8472 double_colon_syntax: false,
8473 trailing_comments: vec![],
8474 format: None,
8475 default: None,
8476 inferred_type: None,
8477 }));
8478 let mut args = f.args;
8479 let unit_arg = args.remove(0);
8480 let time_expr = args.remove(0);
8481 let add_expr = Expression::Add(Box::new(BinaryOp::new(
8482 date_expr, time_expr,
8483 )));
8484 let inner = Expression::Function(Box::new(Function::new(
8485 "DATE_TRUNC".to_string(),
8486 vec![unit_arg, add_expr],
8487 )));
8488 Ok(Expression::Cast(Box::new(Cast {
8489 this: inner,
8490 to: cast_type,
8491 double_colon_syntax: false,
8492 trailing_comments: vec![],
8493 format: None,
8494 default: None,
8495 inferred_type: None,
8496 })))
8497 } else {
8498 // Wrap the function in CAST
8499 Ok(Expression::Cast(Box::new(Cast {
8500 this: Expression::Function(f),
8501 to: cast_type,
8502 double_colon_syntax: false,
8503 trailing_comments: vec![],
8504 format: None,
8505 default: None,
8506 inferred_type: None,
8507 })))
8508 }
8509 } else {
8510 Ok(Expression::Function(f))
8511 }
8512 }
8513 other => Ok(other),
8514 }
8515 }
8516
8517 Action::RegexpReplaceSnowflakeToDuckDB => {
8518 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
8519 if let Expression::Function(f) = e {
8520 let mut args = f.args;
8521 let subject = args.remove(0);
8522 let pattern = args.remove(0);
8523 let replacement = args.remove(0);
8524 Ok(Expression::Function(Box::new(Function::new(
8525 "REGEXP_REPLACE".to_string(),
8526 vec![
8527 subject,
8528 pattern,
8529 replacement,
8530 Expression::Literal(Box::new(crate::expressions::Literal::String(
8531 "g".to_string(),
8532 ))),
8533 ],
8534 ))))
8535 } else {
8536 Ok(e)
8537 }
8538 }
8539
8540 Action::RegexpReplacePositionSnowflakeToDuckDB => {
8541 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
8542 // pos=1, occ=1 -> REGEXP_REPLACE(s, p, r) (single replace, no 'g')
8543 // pos>1, occ=0 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r, 'g')
8544 // pos>1, occ=1 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r)
8545 // pos=1, occ=0 -> REGEXP_REPLACE(s, p, r, 'g') (replace all)
8546 if let Expression::Function(f) = e {
8547 let mut args = f.args;
8548 let subject = args.remove(0);
8549 let pattern = args.remove(0);
8550 let replacement = args.remove(0);
8551 let position = args.remove(0);
8552 let occurrence = args.remove(0);
8553
8554 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8555 let is_occ_0 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
8556 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8557
8558 if is_pos_1 && is_occ_1 {
8559 // REGEXP_REPLACE(s, p, r) - single replace, no flags
8560 Ok(Expression::Function(Box::new(Function::new(
8561 "REGEXP_REPLACE".to_string(),
8562 vec![subject, pattern, replacement],
8563 ))))
8564 } else if is_pos_1 && is_occ_0 {
8565 // REGEXP_REPLACE(s, p, r, 'g') - global replace
8566 Ok(Expression::Function(Box::new(Function::new(
8567 "REGEXP_REPLACE".to_string(),
8568 vec![
8569 subject,
8570 pattern,
8571 replacement,
8572 Expression::Literal(Box::new(Literal::String("g".to_string()))),
8573 ],
8574 ))))
8575 } else {
8576 // pos>1: SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r[, 'g'])
8577 // Pre-compute pos-1 when position is a numeric literal
8578 let pos_minus_1 = if let Expression::Literal(ref lit) = position {
8579 if let Literal::Number(ref n) = lit.as_ref() {
8580 if let Ok(val) = n.parse::<i64>() {
8581 Expression::number(val - 1)
8582 } else {
8583 Expression::Sub(Box::new(BinaryOp::new(
8584 position.clone(),
8585 Expression::number(1),
8586 )))
8587 }
8588 } else { position.clone() }
8589 } else {
8590 Expression::Sub(Box::new(BinaryOp::new(
8591 position.clone(),
8592 Expression::number(1),
8593 )))
8594 };
8595 let prefix = Expression::Function(Box::new(Function::new(
8596 "SUBSTRING".to_string(),
8597 vec![subject.clone(), Expression::number(1), pos_minus_1],
8598 )));
8599 let suffix_subject = Expression::Function(Box::new(Function::new(
8600 "SUBSTRING".to_string(),
8601 vec![subject, position],
8602 )));
8603 let mut replace_args = vec![suffix_subject, pattern, replacement];
8604 if is_occ_0 {
8605 replace_args.push(Expression::Literal(Box::new(Literal::String(
8606 "g".to_string(),
8607 ))));
8608 }
8609 let replace_expr = Expression::Function(Box::new(Function::new(
8610 "REGEXP_REPLACE".to_string(),
8611 replace_args,
8612 )));
8613 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
8614 this: Box::new(prefix),
8615 expression: Box::new(replace_expr),
8616 safe: None,
8617 })))
8618 }
8619 } else {
8620 Ok(e)
8621 }
8622 }
8623
8624 Action::RegexpSubstrSnowflakeToDuckDB => {
8625 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
8626 if let Expression::Function(f) = e {
8627 let mut args = f.args;
8628 let arg_count = args.len();
8629 match arg_count {
8630 // REGEXP_SUBSTR(s, p) -> REGEXP_EXTRACT(s, p)
8631 0..=2 => {
8632 Ok(Expression::Function(Box::new(Function::new(
8633 "REGEXP_EXTRACT".to_string(),
8634 args,
8635 ))))
8636 }
8637 // REGEXP_SUBSTR(s, p, pos) -> REGEXP_EXTRACT(NULLIF(SUBSTRING(s, pos), ''), p)
8638 3 => {
8639 let subject = args.remove(0);
8640 let pattern = args.remove(0);
8641 let position = args.remove(0);
8642 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8643 if is_pos_1 {
8644 Ok(Expression::Function(Box::new(Function::new(
8645 "REGEXP_EXTRACT".to_string(),
8646 vec![subject, pattern],
8647 ))))
8648 } else {
8649 let substring_expr = Expression::Function(Box::new(
8650 Function::new(
8651 "SUBSTRING".to_string(),
8652 vec![subject, position],
8653 ),
8654 ));
8655 let nullif_expr = Expression::Function(Box::new(
8656 Function::new(
8657 "NULLIF".to_string(),
8658 vec![
8659 substring_expr,
8660 Expression::Literal(Box::new(Literal::String(
8661 String::new(),
8662 ))),
8663 ],
8664 ),
8665 ));
8666 Ok(Expression::Function(Box::new(Function::new(
8667 "REGEXP_EXTRACT".to_string(),
8668 vec![nullif_expr, pattern],
8669 ))))
8670 }
8671 }
8672 // REGEXP_SUBSTR(s, p, pos, occ) -> depends on pos and occ
8673 4 => {
8674 let subject = args.remove(0);
8675 let pattern = args.remove(0);
8676 let position = args.remove(0);
8677 let occurrence = args.remove(0);
8678 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8679 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8680
8681 let effective_subject = if is_pos_1 {
8682 subject
8683 } else {
8684 let substring_expr = Expression::Function(Box::new(
8685 Function::new(
8686 "SUBSTRING".to_string(),
8687 vec![subject, position],
8688 ),
8689 ));
8690 Expression::Function(Box::new(Function::new(
8691 "NULLIF".to_string(),
8692 vec![
8693 substring_expr,
8694 Expression::Literal(Box::new(Literal::String(String::new()))),
8695 ],
8696 )))
8697 };
8698
8699 if is_occ_1 {
8700 Ok(Expression::Function(Box::new(Function::new(
8701 "REGEXP_EXTRACT".to_string(),
8702 vec![effective_subject, pattern],
8703 ))))
8704 } else {
8705 // ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, p), occ)
8706 let extract_all = Expression::Function(Box::new(
8707 Function::new(
8708 "REGEXP_EXTRACT_ALL".to_string(),
8709 vec![effective_subject, pattern],
8710 ),
8711 ));
8712 Ok(Expression::Function(Box::new(Function::new(
8713 "ARRAY_EXTRACT".to_string(),
8714 vec![extract_all, occurrence],
8715 ))))
8716 }
8717 }
8718 // REGEXP_SUBSTR(s, p, 1, 1, 'e') -> REGEXP_EXTRACT(s, p)
8719 5 => {
8720 let subject = args.remove(0);
8721 let pattern = args.remove(0);
8722 let _position = args.remove(0);
8723 let _occurrence = args.remove(0);
8724 let _flags = args.remove(0);
8725 // Strip 'e' flag, convert to REGEXP_EXTRACT
8726 Ok(Expression::Function(Box::new(Function::new(
8727 "REGEXP_EXTRACT".to_string(),
8728 vec![subject, pattern],
8729 ))))
8730 }
8731 // REGEXP_SUBSTR(s, p, 1, 1, 'e', group) -> REGEXP_EXTRACT(s, p[, group])
8732 _ => {
8733 let subject = args.remove(0);
8734 let pattern = args.remove(0);
8735 let _position = args.remove(0);
8736 let _occurrence = args.remove(0);
8737 let _flags = args.remove(0);
8738 let group = args.remove(0);
8739 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
8740 if is_group_0 {
8741 // Strip group=0 (default)
8742 Ok(Expression::Function(Box::new(Function::new(
8743 "REGEXP_EXTRACT".to_string(),
8744 vec![subject, pattern],
8745 ))))
8746 } else {
8747 Ok(Expression::Function(Box::new(Function::new(
8748 "REGEXP_EXTRACT".to_string(),
8749 vec![subject, pattern, group],
8750 ))))
8751 }
8752 }
8753 }
8754 } else {
8755 Ok(e)
8756 }
8757 }
8758
8759 Action::RegexpSubstrSnowflakeIdentity => {
8760 // Snowflake→Snowflake: REGEXP_SUBSTR/REGEXP_SUBSTR_ALL with 6 args
8761 // Strip trailing group=0
8762 if let Expression::Function(f) = e {
8763 let func_name = f.name.clone();
8764 let mut args = f.args;
8765 if args.len() == 6 {
8766 let is_group_0 = matches!(&args[5], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
8767 if is_group_0 {
8768 args.truncate(5);
8769 }
8770 }
8771 Ok(Expression::Function(Box::new(Function::new(
8772 func_name,
8773 args,
8774 ))))
8775 } else {
8776 Ok(e)
8777 }
8778 }
8779
8780 Action::RegexpSubstrAllSnowflakeToDuckDB => {
8781 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
8782 if let Expression::Function(f) = e {
8783 let mut args = f.args;
8784 let arg_count = args.len();
8785 match arg_count {
8786 // REGEXP_SUBSTR_ALL(s, p) -> REGEXP_EXTRACT_ALL(s, p)
8787 0..=2 => {
8788 Ok(Expression::Function(Box::new(Function::new(
8789 "REGEXP_EXTRACT_ALL".to_string(),
8790 args,
8791 ))))
8792 }
8793 // REGEXP_SUBSTR_ALL(s, p, pos) -> REGEXP_EXTRACT_ALL(SUBSTRING(s, pos), p)
8794 3 => {
8795 let subject = args.remove(0);
8796 let pattern = args.remove(0);
8797 let position = args.remove(0);
8798 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8799 if is_pos_1 {
8800 Ok(Expression::Function(Box::new(Function::new(
8801 "REGEXP_EXTRACT_ALL".to_string(),
8802 vec![subject, pattern],
8803 ))))
8804 } else {
8805 let substring_expr = Expression::Function(Box::new(
8806 Function::new(
8807 "SUBSTRING".to_string(),
8808 vec![subject, position],
8809 ),
8810 ));
8811 Ok(Expression::Function(Box::new(Function::new(
8812 "REGEXP_EXTRACT_ALL".to_string(),
8813 vec![substring_expr, pattern],
8814 ))))
8815 }
8816 }
8817 // REGEXP_SUBSTR_ALL(s, p, 1, occ) -> REGEXP_EXTRACT_ALL(s, p)[occ:]
8818 4 => {
8819 let subject = args.remove(0);
8820 let pattern = args.remove(0);
8821 let position = args.remove(0);
8822 let occurrence = args.remove(0);
8823 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8824 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
8825
8826 let effective_subject = if is_pos_1 {
8827 subject
8828 } else {
8829 Expression::Function(Box::new(Function::new(
8830 "SUBSTRING".to_string(),
8831 vec![subject, position],
8832 )))
8833 };
8834
8835 if is_occ_1 {
8836 Ok(Expression::Function(Box::new(Function::new(
8837 "REGEXP_EXTRACT_ALL".to_string(),
8838 vec![effective_subject, pattern],
8839 ))))
8840 } else {
8841 // REGEXP_EXTRACT_ALL(s, p)[occ:]
8842 let extract_all = Expression::Function(Box::new(
8843 Function::new(
8844 "REGEXP_EXTRACT_ALL".to_string(),
8845 vec![effective_subject, pattern],
8846 ),
8847 ));
8848 Ok(Expression::ArraySlice(Box::new(
8849 crate::expressions::ArraySlice {
8850 this: extract_all,
8851 start: Some(occurrence),
8852 end: None,
8853 },
8854 )))
8855 }
8856 }
8857 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e') -> REGEXP_EXTRACT_ALL(s, p)
8858 5 => {
8859 let subject = args.remove(0);
8860 let pattern = args.remove(0);
8861 let _position = args.remove(0);
8862 let _occurrence = args.remove(0);
8863 let _flags = args.remove(0);
8864 Ok(Expression::Function(Box::new(Function::new(
8865 "REGEXP_EXTRACT_ALL".to_string(),
8866 vec![subject, pattern],
8867 ))))
8868 }
8869 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e', 0) -> REGEXP_EXTRACT_ALL(s, p)
8870 _ => {
8871 let subject = args.remove(0);
8872 let pattern = args.remove(0);
8873 let _position = args.remove(0);
8874 let _occurrence = args.remove(0);
8875 let _flags = args.remove(0);
8876 let group = args.remove(0);
8877 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
8878 if is_group_0 {
8879 Ok(Expression::Function(Box::new(Function::new(
8880 "REGEXP_EXTRACT_ALL".to_string(),
8881 vec![subject, pattern],
8882 ))))
8883 } else {
8884 Ok(Expression::Function(Box::new(Function::new(
8885 "REGEXP_EXTRACT_ALL".to_string(),
8886 vec![subject, pattern, group],
8887 ))))
8888 }
8889 }
8890 }
8891 } else {
8892 Ok(e)
8893 }
8894 }
8895
8896 Action::RegexpCountSnowflakeToDuckDB => {
8897 // Snowflake REGEXP_COUNT(s, p[, pos[, flags]]) ->
8898 // DuckDB: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
8899 if let Expression::Function(f) = e {
8900 let mut args = f.args;
8901 let arg_count = args.len();
8902 let subject = args.remove(0);
8903 let pattern = args.remove(0);
8904
8905 // Handle position arg
8906 let effective_subject = if arg_count >= 3 {
8907 let position = args.remove(0);
8908 Expression::Function(Box::new(Function::new(
8909 "SUBSTRING".to_string(),
8910 vec![subject, position],
8911 )))
8912 } else {
8913 subject
8914 };
8915
8916 // Handle flags arg -> embed as (?flags) prefix in pattern
8917 let effective_pattern = if arg_count >= 4 {
8918 let flags = args.remove(0);
8919 match &flags {
8920 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(f_str) if !f_str.is_empty()) => {
8921 let Literal::String(f_str) = lit.as_ref() else { unreachable!() };
8922 // Always use concatenation: '(?flags)' || pattern
8923 let prefix = Expression::Literal(Box::new(Literal::String(
8924 format!("(?{})", f_str),
8925 )));
8926 Expression::DPipe(Box::new(crate::expressions::DPipe {
8927 this: Box::new(prefix),
8928 expression: Box::new(pattern.clone()),
8929 safe: None,
8930 }))
8931 }
8932 _ => pattern.clone(),
8933 }
8934 } else {
8935 pattern.clone()
8936 };
8937
8938 // Build: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
8939 let extract_all = Expression::Function(Box::new(Function::new(
8940 "REGEXP_EXTRACT_ALL".to_string(),
8941 vec![effective_subject, effective_pattern.clone()],
8942 )));
8943 let length_expr = Expression::Length(Box::new(
8944 crate::expressions::UnaryFunc {
8945 this: extract_all,
8946 original_name: None,
8947 inferred_type: None,
8948 },
8949 ));
8950 let condition = Expression::Eq(Box::new(BinaryOp::new(
8951 effective_pattern,
8952 Expression::Literal(Box::new(Literal::String(String::new()))),
8953 )));
8954 Ok(Expression::Case(Box::new(Case {
8955 operand: None,
8956 whens: vec![(condition, Expression::number(0))],
8957 else_: Some(length_expr),
8958 comments: vec![],
8959 inferred_type: None,
8960 })))
8961 } else {
8962 Ok(e)
8963 }
8964 }
8965
8966 Action::RegexpInstrSnowflakeToDuckDB => {
8967 // Snowflake REGEXP_INSTR(s, p[, pos[, occ[, option[, flags[, group]]]]]) ->
8968 // DuckDB: CASE WHEN s IS NULL OR p IS NULL [OR ...] THEN NULL
8969 // WHEN p = '' THEN 0
8970 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
8971 // ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(eff_s, eff_p)[1:occ], x -> LENGTH(x))), 0)
8972 // + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(eff_s, eff_p)[1:occ - 1], x -> LENGTH(x))), 0)
8973 // + pos_offset
8974 // END
8975 if let Expression::Function(f) = e {
8976 let mut args = f.args;
8977 let subject = args.remove(0);
8978 let pattern = if !args.is_empty() { args.remove(0) } else {
8979 Expression::Literal(Box::new(Literal::String(String::new())))
8980 };
8981
8982 // Collect all original args for NULL checks
8983 let position = if !args.is_empty() { Some(args.remove(0)) } else { None };
8984 let occurrence = if !args.is_empty() { Some(args.remove(0)) } else { None };
8985 let option = if !args.is_empty() { Some(args.remove(0)) } else { None };
8986 let flags = if !args.is_empty() { Some(args.remove(0)) } else { None };
8987 let _group = if !args.is_empty() { Some(args.remove(0)) } else { None };
8988
8989 let is_pos_1 = position.as_ref().map_or(true, |p| matches!(p, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1")));
8990 let occurrence_expr = occurrence.clone().unwrap_or(Expression::number(1));
8991
8992 // Build NULL check: subject IS NULL OR pattern IS NULL [OR pos IS NULL ...]
8993 let mut null_checks: Vec<Expression> = vec![
8994 Expression::Is(Box::new(BinaryOp::new(
8995 subject.clone(),
8996 Expression::Null(Null),
8997 ))),
8998 Expression::Is(Box::new(BinaryOp::new(
8999 pattern.clone(),
9000 Expression::Null(Null),
9001 ))),
9002 ];
9003 // Add NULL checks for all provided optional args
9004 for opt_arg in [&position, &occurrence, &option, &flags].iter() {
9005 if let Some(arg) = opt_arg {
9006 null_checks.push(Expression::Is(Box::new(BinaryOp::new(
9007 (*arg).clone(),
9008 Expression::Null(Null),
9009 ))));
9010 }
9011 }
9012 // Chain with OR
9013 let null_condition = null_checks.into_iter().reduce(|a, b| {
9014 Expression::Or(Box::new(BinaryOp::new(a, b)))
9015 }).unwrap();
9016
9017 // Effective subject (apply position offset)
9018 let effective_subject = if is_pos_1 {
9019 subject.clone()
9020 } else {
9021 let pos = position.clone().unwrap_or(Expression::number(1));
9022 Expression::Function(Box::new(Function::new(
9023 "SUBSTRING".to_string(),
9024 vec![subject.clone(), pos],
9025 )))
9026 };
9027
9028 // Effective pattern (apply flags if present)
9029 let effective_pattern = if let Some(ref fl) = flags {
9030 if let Expression::Literal(lit) = fl {
9031 if let Literal::String(f_str) = lit.as_ref() {
9032 if !f_str.is_empty() {
9033 let prefix = Expression::Literal(Box::new(Literal::String(
9034 format!("(?{})", f_str),
9035 )));
9036 Expression::DPipe(Box::new(crate::expressions::DPipe {
9037 this: Box::new(prefix),
9038 expression: Box::new(pattern.clone()),
9039 safe: None,
9040 }))
9041 } else {
9042 pattern.clone()
9043 }
9044 } else { fl.clone() }
9045 } else {
9046 pattern.clone()
9047 }
9048 } else {
9049 pattern.clone()
9050 };
9051
9052 // WHEN pattern = '' THEN 0
9053 let empty_pattern_check = Expression::Eq(Box::new(BinaryOp::new(
9054 effective_pattern.clone(),
9055 Expression::Literal(Box::new(Literal::String(String::new()))),
9056 )));
9057
9058 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
9059 let match_count_check = Expression::Lt(Box::new(BinaryOp::new(
9060 Expression::Length(Box::new(crate::expressions::UnaryFunc {
9061 this: Expression::Function(Box::new(Function::new(
9062 "REGEXP_EXTRACT_ALL".to_string(),
9063 vec![effective_subject.clone(), effective_pattern.clone()],
9064 ))),
9065 original_name: None,
9066 inferred_type: None,
9067 })),
9068 occurrence_expr.clone(),
9069 )));
9070
9071 // Helper: build LENGTH lambda for LIST_TRANSFORM
9072 let make_len_lambda = || Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
9073 parameters: vec![crate::expressions::Identifier::new("x")],
9074 body: Expression::Length(Box::new(crate::expressions::UnaryFunc {
9075 this: Expression::Identifier(crate::expressions::Identifier::new("x")),
9076 original_name: None,
9077 inferred_type: None,
9078 })),
9079 colon: false,
9080 parameter_types: vec![],
9081 }));
9082
9083 // COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(s, p)[1:occ], x -> LENGTH(x))), 0)
9084 let split_sliced = Expression::ArraySlice(Box::new(
9085 crate::expressions::ArraySlice {
9086 this: Expression::Function(Box::new(Function::new(
9087 "STRING_SPLIT_REGEX".to_string(),
9088 vec![effective_subject.clone(), effective_pattern.clone()],
9089 ))),
9090 start: Some(Expression::number(1)),
9091 end: Some(occurrence_expr.clone()),
9092 },
9093 ));
9094 let split_sum = Expression::Function(Box::new(Function::new(
9095 "COALESCE".to_string(),
9096 vec![
9097 Expression::Function(Box::new(Function::new(
9098 "LIST_SUM".to_string(),
9099 vec![Expression::Function(Box::new(Function::new(
9100 "LIST_TRANSFORM".to_string(),
9101 vec![split_sliced, make_len_lambda()],
9102 )))],
9103 ))),
9104 Expression::number(0),
9105 ],
9106 )));
9107
9108 // COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(s, p)[1:occ - 1], x -> LENGTH(x))), 0)
9109 let extract_sliced = Expression::ArraySlice(Box::new(
9110 crate::expressions::ArraySlice {
9111 this: Expression::Function(Box::new(Function::new(
9112 "REGEXP_EXTRACT_ALL".to_string(),
9113 vec![effective_subject.clone(), effective_pattern.clone()],
9114 ))),
9115 start: Some(Expression::number(1)),
9116 end: Some(Expression::Sub(Box::new(BinaryOp::new(
9117 occurrence_expr.clone(),
9118 Expression::number(1),
9119 )))),
9120 },
9121 ));
9122 let extract_sum = Expression::Function(Box::new(Function::new(
9123 "COALESCE".to_string(),
9124 vec![
9125 Expression::Function(Box::new(Function::new(
9126 "LIST_SUM".to_string(),
9127 vec![Expression::Function(Box::new(Function::new(
9128 "LIST_TRANSFORM".to_string(),
9129 vec![extract_sliced, make_len_lambda()],
9130 )))],
9131 ))),
9132 Expression::number(0),
9133 ],
9134 )));
9135
9136 // Position offset: pos - 1 when pos > 1, else 0
9137 let pos_offset: Expression = if !is_pos_1 {
9138 let pos = position.clone().unwrap_or(Expression::number(1));
9139 Expression::Sub(Box::new(BinaryOp::new(
9140 pos,
9141 Expression::number(1),
9142 )))
9143 } else {
9144 Expression::number(0)
9145 };
9146
9147 // ELSE: 1 + split_sum + extract_sum + pos_offset
9148 let else_expr = Expression::Add(Box::new(BinaryOp::new(
9149 Expression::Add(Box::new(BinaryOp::new(
9150 Expression::Add(Box::new(BinaryOp::new(
9151 Expression::number(1),
9152 split_sum,
9153 ))),
9154 extract_sum,
9155 ))),
9156 pos_offset,
9157 )));
9158
9159 Ok(Expression::Case(Box::new(Case {
9160 operand: None,
9161 whens: vec![
9162 (null_condition, Expression::Null(Null)),
9163 (empty_pattern_check, Expression::number(0)),
9164 (match_count_check, Expression::number(0)),
9165 ],
9166 else_: Some(else_expr),
9167 comments: vec![],
9168 inferred_type: None,
9169 })))
9170 } else {
9171 Ok(e)
9172 }
9173 }
9174
9175 Action::RlikeSnowflakeToDuckDB => {
9176 // Snowflake RLIKE(a, b[, flags]) -> DuckDB REGEXP_MATCHES(a, '^(' || (b) || ')$'[, flags])
9177 // Snowflake RLIKE does full-string match; DuckDB REGEXP_MATCHES does partial match
9178 // So we anchor the pattern with ^ and $
9179 // Can come as Expression::RegexpLike (from Snowflake transform_expr) or
9180 // Expression::Function("RLIKE", args) (if not transformed yet)
9181 let (subject, pattern, flags) = match e {
9182 Expression::RegexpLike(ref rl) => {
9183 (rl.this.clone(), rl.pattern.clone(), rl.flags.clone())
9184 }
9185 Expression::Function(ref f) if f.args.len() >= 2 => {
9186 let s = f.args[0].clone();
9187 let p = f.args[1].clone();
9188 let fl = f.args.get(2).cloned();
9189 (s, p, fl)
9190 }
9191 _ => return Ok(e),
9192 };
9193
9194 // Build anchored pattern: '^(' || (pattern) || ')$'
9195 let prefix = Expression::Literal(Box::new(Literal::String("^(".to_string())));
9196 let suffix = Expression::Literal(Box::new(Literal::String(")$".to_string())));
9197 let paren_pattern = Expression::Paren(Box::new(Paren {
9198 this: pattern,
9199 trailing_comments: vec![],
9200 }));
9201 let left_concat = Expression::DPipe(Box::new(
9202 crate::expressions::DPipe {
9203 this: Box::new(prefix),
9204 expression: Box::new(paren_pattern),
9205 safe: None,
9206 },
9207 ));
9208 let anchored = Expression::DPipe(Box::new(
9209 crate::expressions::DPipe {
9210 this: Box::new(left_concat),
9211 expression: Box::new(suffix),
9212 safe: None,
9213 },
9214 ));
9215
9216 let mut result_args = vec![subject, anchored];
9217 if let Some(fl) = flags {
9218 result_args.push(fl);
9219 }
9220 Ok(Expression::Function(Box::new(Function::new(
9221 "REGEXP_MATCHES".to_string(),
9222 result_args,
9223 ))))
9224 }
9225
9226 Action::RegexpExtractAllToSnowflake => {
9227 // BigQuery REGEXP_EXTRACT_ALL(s, p) -> Snowflake REGEXP_SUBSTR_ALL(s, p)
9228 // With capture group: REGEXP_SUBSTR_ALL(s, p, 1, 1, 'c', 1)
9229 if let Expression::Function(f) = e {
9230 let mut args = f.args;
9231 if args.len() >= 2 {
9232 let str_expr = args.remove(0);
9233 let pattern = args.remove(0);
9234
9235 let has_groups = match &pattern {
9236 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
9237 let Literal::String(s) = lit.as_ref() else { unreachable!() };
9238 s.contains('(') && s.contains(')')
9239 }
9240 _ => false,
9241 };
9242
9243 if has_groups {
9244 Ok(Expression::Function(Box::new(Function::new(
9245 "REGEXP_SUBSTR_ALL".to_string(),
9246 vec![
9247 str_expr,
9248 pattern,
9249 Expression::number(1),
9250 Expression::number(1),
9251 Expression::Literal(Box::new(Literal::String("c".to_string()))),
9252 Expression::number(1),
9253 ],
9254 ))))
9255 } else {
9256 Ok(Expression::Function(Box::new(Function::new(
9257 "REGEXP_SUBSTR_ALL".to_string(),
9258 vec![str_expr, pattern],
9259 ))))
9260 }
9261 } else {
9262 Ok(Expression::Function(Box::new(Function::new(
9263 "REGEXP_SUBSTR_ALL".to_string(),
9264 args,
9265 ))))
9266 }
9267 } else {
9268 Ok(e)
9269 }
9270 }
9271
9272 Action::SetToVariable => {
9273 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
9274 if let Expression::SetStatement(mut s) = e {
9275 for item in &mut s.items {
9276 if item.kind.is_none() {
9277 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
9278 let already_variable = match &item.name {
9279 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
9280 _ => false,
9281 };
9282 if already_variable {
9283 // Extract the actual name and set kind
9284 if let Expression::Identifier(ref mut id) = item.name {
9285 let actual_name = id.name["VARIABLE ".len()..].to_string();
9286 id.name = actual_name;
9287 }
9288 }
9289 item.kind = Some("VARIABLE".to_string());
9290 }
9291 }
9292 Ok(Expression::SetStatement(s))
9293 } else {
9294 Ok(e)
9295 }
9296 }
9297
9298 Action::ConvertTimezoneToExpr => {
9299 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
9300 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
9301 if let Expression::Function(f) = e {
9302 if f.args.len() == 2 {
9303 let mut args = f.args;
9304 let target_tz = args.remove(0);
9305 let timestamp = args.remove(0);
9306 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
9307 source_tz: None,
9308 target_tz: Some(Box::new(target_tz)),
9309 timestamp: Some(Box::new(timestamp)),
9310 options: vec![],
9311 })))
9312 } else if f.args.len() == 3 {
9313 let mut args = f.args;
9314 let source_tz = args.remove(0);
9315 let target_tz = args.remove(0);
9316 let timestamp = args.remove(0);
9317 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
9318 source_tz: Some(Box::new(source_tz)),
9319 target_tz: Some(Box::new(target_tz)),
9320 timestamp: Some(Box::new(timestamp)),
9321 options: vec![],
9322 })))
9323 } else {
9324 Ok(Expression::Function(f))
9325 }
9326 } else {
9327 Ok(e)
9328 }
9329 }
9330
9331 Action::BigQueryCastType => {
9332 // Convert BigQuery types to standard SQL types
9333 if let Expression::DataType(dt) = e {
9334 match dt {
9335 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
9336 Ok(Expression::DataType(DataType::BigInt { length: None }))
9337 }
9338 DataType::Custom { ref name }
9339 if name.eq_ignore_ascii_case("FLOAT64") =>
9340 {
9341 Ok(Expression::DataType(DataType::Double {
9342 precision: None,
9343 scale: None,
9344 }))
9345 }
9346 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
9347 Ok(Expression::DataType(DataType::Boolean))
9348 }
9349 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
9350 Ok(Expression::DataType(DataType::VarBinary { length: None }))
9351 }
9352 DataType::Custom { ref name }
9353 if name.eq_ignore_ascii_case("NUMERIC") =>
9354 {
9355 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
9356 // default precision (18, 3) being added to bare DECIMAL
9357 if matches!(target, DialectType::DuckDB) {
9358 Ok(Expression::DataType(DataType::Custom {
9359 name: "DECIMAL".to_string(),
9360 }))
9361 } else {
9362 Ok(Expression::DataType(DataType::Decimal {
9363 precision: None,
9364 scale: None,
9365 }))
9366 }
9367 }
9368 DataType::Custom { ref name }
9369 if name.eq_ignore_ascii_case("STRING") =>
9370 {
9371 Ok(Expression::DataType(DataType::String { length: None }))
9372 }
9373 DataType::Custom { ref name }
9374 if name.eq_ignore_ascii_case("DATETIME") =>
9375 {
9376 Ok(Expression::DataType(DataType::Timestamp {
9377 precision: None,
9378 timezone: false,
9379 }))
9380 }
9381 _ => Ok(Expression::DataType(dt)),
9382 }
9383 } else {
9384 Ok(e)
9385 }
9386 }
9387
9388 Action::BigQuerySafeDivide => {
9389 // Convert SafeDivide expression to IF/CASE form for most targets
9390 if let Expression::SafeDivide(sd) = e {
9391 let x = *sd.this;
9392 let y = *sd.expression;
9393 // Wrap x and y in parens if they're complex expressions
9394 let y_ref = match &y {
9395 Expression::Column(_)
9396 | Expression::Literal(_)
9397 | Expression::Identifier(_) => y.clone(),
9398 _ => Expression::Paren(Box::new(Paren {
9399 this: y.clone(),
9400 trailing_comments: vec![],
9401 })),
9402 };
9403 let x_ref = match &x {
9404 Expression::Column(_)
9405 | Expression::Literal(_)
9406 | Expression::Identifier(_) => x.clone(),
9407 _ => Expression::Paren(Box::new(Paren {
9408 this: x.clone(),
9409 trailing_comments: vec![],
9410 })),
9411 };
9412 let condition = Expression::Neq(Box::new(BinaryOp::new(
9413 y_ref.clone(),
9414 Expression::number(0),
9415 )));
9416 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
9417
9418 if matches!(target, DialectType::Presto | DialectType::Trino) {
9419 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
9420 let cast_x = Expression::Cast(Box::new(Cast {
9421 this: match &x {
9422 Expression::Column(_)
9423 | Expression::Literal(_)
9424 | Expression::Identifier(_) => x,
9425 _ => Expression::Paren(Box::new(Paren {
9426 this: x,
9427 trailing_comments: vec![],
9428 })),
9429 },
9430 to: DataType::Double {
9431 precision: None,
9432 scale: None,
9433 },
9434 trailing_comments: vec![],
9435 double_colon_syntax: false,
9436 format: None,
9437 default: None,
9438 inferred_type: None,
9439 }));
9440 let cast_div = Expression::Div(Box::new(BinaryOp::new(
9441 cast_x,
9442 match &y {
9443 Expression::Column(_)
9444 | Expression::Literal(_)
9445 | Expression::Identifier(_) => y,
9446 _ => Expression::Paren(Box::new(Paren {
9447 this: y,
9448 trailing_comments: vec![],
9449 })),
9450 },
9451 )));
9452 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9453 condition,
9454 true_value: cast_div,
9455 false_value: Some(Expression::Null(Null)),
9456 original_name: None,
9457 inferred_type: None,
9458 })))
9459 } else if matches!(target, DialectType::PostgreSQL) {
9460 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
9461 let cast_x = Expression::Cast(Box::new(Cast {
9462 this: match &x {
9463 Expression::Column(_)
9464 | Expression::Literal(_)
9465 | Expression::Identifier(_) => x,
9466 _ => Expression::Paren(Box::new(Paren {
9467 this: x,
9468 trailing_comments: vec![],
9469 })),
9470 },
9471 to: DataType::Custom {
9472 name: "DOUBLE PRECISION".to_string(),
9473 },
9474 trailing_comments: vec![],
9475 double_colon_syntax: false,
9476 format: None,
9477 default: None,
9478 inferred_type: None,
9479 }));
9480 let y_paren = match &y {
9481 Expression::Column(_)
9482 | Expression::Literal(_)
9483 | Expression::Identifier(_) => y,
9484 _ => Expression::Paren(Box::new(Paren {
9485 this: y,
9486 trailing_comments: vec![],
9487 })),
9488 };
9489 let cast_div =
9490 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
9491 Ok(Expression::Case(Box::new(Case {
9492 operand: None,
9493 whens: vec![(condition, cast_div)],
9494 else_: Some(Expression::Null(Null)),
9495 comments: Vec::new(),
9496 inferred_type: None,
9497 })))
9498 } else if matches!(target, DialectType::DuckDB) {
9499 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
9500 Ok(Expression::Case(Box::new(Case {
9501 operand: None,
9502 whens: vec![(condition, div_expr)],
9503 else_: Some(Expression::Null(Null)),
9504 comments: Vec::new(),
9505 inferred_type: None,
9506 })))
9507 } else if matches!(target, DialectType::Snowflake) {
9508 // Snowflake: IFF(y <> 0, x / y, NULL)
9509 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9510 condition,
9511 true_value: div_expr,
9512 false_value: Some(Expression::Null(Null)),
9513 original_name: Some("IFF".to_string()),
9514 inferred_type: None,
9515 })))
9516 } else {
9517 // All others: IF(y <> 0, x / y, NULL)
9518 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9519 condition,
9520 true_value: div_expr,
9521 false_value: Some(Expression::Null(Null)),
9522 original_name: None,
9523 inferred_type: None,
9524 })))
9525 }
9526 } else {
9527 Ok(e)
9528 }
9529 }
9530
9531 Action::BigQueryLastDayStripUnit => {
9532 if let Expression::LastDay(mut ld) = e {
9533 ld.unit = None; // Strip the unit (MONTH is default)
9534 match target {
9535 DialectType::PostgreSQL => {
9536 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
9537 let date_trunc = Expression::Function(Box::new(Function::new(
9538 "DATE_TRUNC".to_string(),
9539 vec![
9540 Expression::Literal(Box::new(crate::expressions::Literal::String(
9541 "MONTH".to_string(),
9542 ))),
9543 ld.this.clone(),
9544 ],
9545 )));
9546 let plus_month =
9547 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
9548 date_trunc,
9549 Expression::Interval(Box::new(
9550 crate::expressions::Interval {
9551 this: Some(Expression::Literal(Box::new(crate::expressions::Literal::String(
9552 "1 MONTH".to_string(),
9553 ),))),
9554 unit: None,
9555 },
9556 )),
9557 )));
9558 let minus_day =
9559 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
9560 plus_month,
9561 Expression::Interval(Box::new(
9562 crate::expressions::Interval {
9563 this: Some(Expression::Literal(Box::new(crate::expressions::Literal::String(
9564 "1 DAY".to_string(),
9565 ),))),
9566 unit: None,
9567 },
9568 )),
9569 )));
9570 Ok(Expression::Cast(Box::new(Cast {
9571 this: minus_day,
9572 to: DataType::Date,
9573 trailing_comments: vec![],
9574 double_colon_syntax: false,
9575 format: None,
9576 default: None,
9577 inferred_type: None,
9578 })))
9579 }
9580 DialectType::Presto => {
9581 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
9582 Ok(Expression::Function(Box::new(Function::new(
9583 "LAST_DAY_OF_MONTH".to_string(),
9584 vec![ld.this],
9585 ))))
9586 }
9587 DialectType::ClickHouse => {
9588 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
9589 // Need to wrap the DATE type in Nullable
9590 let nullable_date = match ld.this {
9591 Expression::Cast(mut c) => {
9592 c.to = DataType::Nullable {
9593 inner: Box::new(DataType::Date),
9594 };
9595 Expression::Cast(c)
9596 }
9597 other => other,
9598 };
9599 ld.this = nullable_date;
9600 Ok(Expression::LastDay(ld))
9601 }
9602 _ => Ok(Expression::LastDay(ld)),
9603 }
9604 } else {
9605 Ok(e)
9606 }
9607 }
9608
9609 Action::BigQueryCastFormat => {
9610 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
9611 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
9612 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
9613 let (this, to, format_expr, is_safe) = match e {
9614 Expression::Cast(ref c) if c.format.is_some() => (
9615 c.this.clone(),
9616 c.to.clone(),
9617 c.format.as_ref().unwrap().as_ref().clone(),
9618 false,
9619 ),
9620 Expression::SafeCast(ref c) if c.format.is_some() => (
9621 c.this.clone(),
9622 c.to.clone(),
9623 c.format.as_ref().unwrap().as_ref().clone(),
9624 true,
9625 ),
9626 _ => return Ok(e),
9627 };
9628 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
9629 if matches!(target, DialectType::BigQuery) {
9630 match &to {
9631 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
9632 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
9633 return Ok(e);
9634 }
9635 _ => {}
9636 }
9637 }
9638 // Extract timezone from format if AT TIME ZONE is present
9639 let (actual_format_expr, timezone) = match &format_expr {
9640 Expression::AtTimeZone(ref atz) => {
9641 (atz.this.clone(), Some(atz.zone.clone()))
9642 }
9643 _ => (format_expr.clone(), None),
9644 };
9645 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
9646 match target {
9647 DialectType::BigQuery => {
9648 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
9649 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
9650 let func_name = match &to {
9651 DataType::Date => "PARSE_DATE",
9652 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
9653 DataType::Time { .. } => "PARSE_TIMESTAMP",
9654 _ => "PARSE_TIMESTAMP",
9655 };
9656 let mut func_args = vec![strftime_fmt, this];
9657 if let Some(tz) = timezone {
9658 func_args.push(tz);
9659 }
9660 Ok(Expression::Function(Box::new(Function::new(
9661 func_name.to_string(),
9662 func_args,
9663 ))))
9664 }
9665 DialectType::DuckDB => {
9666 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
9667 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
9668 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
9669 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
9670 let parse_call = Expression::Function(Box::new(Function::new(
9671 parse_fn_name.to_string(),
9672 vec![this, duck_fmt],
9673 )));
9674 Ok(Expression::Cast(Box::new(Cast {
9675 this: parse_call,
9676 to,
9677 trailing_comments: vec![],
9678 double_colon_syntax: false,
9679 format: None,
9680 default: None,
9681 inferred_type: None,
9682 })))
9683 }
9684 _ => Ok(e),
9685 }
9686 }
9687
9688 Action::BigQueryFunctionNormalize => {
9689 Self::normalize_bigquery_function(e, source, target)
9690 }
9691
9692 Action::BigQueryToHexBare => {
9693 // Not used anymore - handled directly in normalize_bigquery_function
9694 Ok(e)
9695 }
9696
9697 Action::BigQueryToHexLower => {
9698 if let Expression::Lower(uf) = e {
9699 match uf.this {
9700 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
9701 Expression::Function(f)
9702 if matches!(target, DialectType::BigQuery)
9703 && f.name == "TO_HEX" =>
9704 {
9705 Ok(Expression::Function(f))
9706 }
9707 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
9708 Expression::Lower(inner_uf) => {
9709 if matches!(target, DialectType::BigQuery) {
9710 // BQ->BQ: extract TO_HEX
9711 if let Expression::Function(f) = inner_uf.this {
9712 Ok(Expression::Function(Box::new(Function::new(
9713 "TO_HEX".to_string(),
9714 f.args,
9715 ))))
9716 } else {
9717 Ok(Expression::Lower(inner_uf))
9718 }
9719 } else {
9720 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
9721 Ok(Expression::Lower(inner_uf))
9722 }
9723 }
9724 other => {
9725 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
9726 this: other,
9727 original_name: None,
9728 inferred_type: None,
9729 })))
9730 }
9731 }
9732 } else {
9733 Ok(e)
9734 }
9735 }
9736
9737 Action::BigQueryToHexUpper => {
9738 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
9739 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
9740 if let Expression::Upper(uf) = e {
9741 if let Expression::Lower(inner_uf) = uf.this {
9742 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
9743 if matches!(target, DialectType::BigQuery) {
9744 // Restore TO_HEX name in inner function
9745 if let Expression::Function(f) = inner_uf.this {
9746 let restored = Expression::Function(Box::new(Function::new(
9747 "TO_HEX".to_string(),
9748 f.args,
9749 )));
9750 Ok(Expression::Upper(Box::new(
9751 crate::expressions::UnaryFunc::new(restored),
9752 )))
9753 } else {
9754 Ok(Expression::Upper(inner_uf))
9755 }
9756 } else {
9757 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
9758 Ok(inner_uf.this)
9759 }
9760 } else {
9761 Ok(Expression::Upper(uf))
9762 }
9763 } else {
9764 Ok(e)
9765 }
9766 }
9767
9768 Action::BigQueryAnyValueHaving => {
9769 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
9770 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
9771 if let Expression::AnyValue(agg) = e {
9772 if let Some((having_expr, is_max)) = agg.having_max {
9773 let func_name = if is_max {
9774 "ARG_MAX_NULL"
9775 } else {
9776 "ARG_MIN_NULL"
9777 };
9778 Ok(Expression::Function(Box::new(Function::new(
9779 func_name.to_string(),
9780 vec![agg.this, *having_expr],
9781 ))))
9782 } else {
9783 Ok(Expression::AnyValue(agg))
9784 }
9785 } else {
9786 Ok(e)
9787 }
9788 }
9789
9790 Action::BigQueryApproxQuantiles => {
9791 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
9792 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
9793 if let Expression::AggregateFunction(agg) = e {
9794 if agg.args.len() >= 2 {
9795 let x_expr = agg.args[0].clone();
9796 let n_expr = &agg.args[1];
9797
9798 // Extract the numeric value from n_expr
9799 let n = match n_expr {
9800 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::Number(_)) => {
9801 let crate::expressions::Literal::Number(s) = lit.as_ref() else { unreachable!() };
9802 s.parse::<usize>().unwrap_or(2)
9803 }
9804 _ => 2,
9805 };
9806
9807 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
9808 let mut quantiles = Vec::new();
9809 for i in 0..=n {
9810 let q = i as f64 / n as f64;
9811 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
9812 if q == 0.0 {
9813 quantiles.push(Expression::number(0));
9814 } else if q == 1.0 {
9815 quantiles.push(Expression::number(1));
9816 } else {
9817 quantiles.push(Expression::Literal(Box::new(crate::expressions::Literal::Number(format!("{}", q)),)));
9818 }
9819 }
9820
9821 let array_expr =
9822 Expression::Array(Box::new(crate::expressions::Array {
9823 expressions: quantiles,
9824 }));
9825
9826 // Preserve DISTINCT modifier
9827 let mut new_func = Function::new(
9828 "APPROX_QUANTILE".to_string(),
9829 vec![x_expr, array_expr],
9830 );
9831 new_func.distinct = agg.distinct;
9832 Ok(Expression::Function(Box::new(new_func)))
9833 } else {
9834 Ok(Expression::AggregateFunction(agg))
9835 }
9836 } else {
9837 Ok(e)
9838 }
9839 }
9840
9841 Action::GenericFunctionNormalize => {
9842 // Helper closure to convert ARBITRARY to target-specific function
9843 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
9844 let name = match target {
9845 DialectType::ClickHouse => "any",
9846 DialectType::TSQL | DialectType::SQLite => "MAX",
9847 DialectType::Hive => "FIRST",
9848 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
9849 "ARBITRARY"
9850 }
9851 _ => "ANY_VALUE",
9852 };
9853 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
9854 }
9855
9856 if let Expression::Function(f) = e {
9857 let name = f.name.to_ascii_uppercase();
9858 match name.as_str() {
9859 "ARBITRARY" if f.args.len() == 1 => {
9860 let arg = f.args.into_iter().next().unwrap();
9861 Ok(convert_arbitrary(arg, target))
9862 }
9863 "TO_NUMBER" if f.args.len() == 1 => {
9864 let arg = f.args.into_iter().next().unwrap();
9865 match target {
9866 DialectType::Oracle | DialectType::Snowflake => {
9867 Ok(Expression::Function(Box::new(Function::new(
9868 "TO_NUMBER".to_string(),
9869 vec![arg],
9870 ))))
9871 }
9872 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
9873 this: arg,
9874 to: crate::expressions::DataType::Double {
9875 precision: None,
9876 scale: None,
9877 },
9878 double_colon_syntax: false,
9879 trailing_comments: Vec::new(),
9880 format: None,
9881 default: None,
9882 inferred_type: None,
9883 }))),
9884 }
9885 }
9886 "AGGREGATE" if f.args.len() >= 3 => match target {
9887 DialectType::DuckDB
9888 | DialectType::Hive
9889 | DialectType::Presto
9890 | DialectType::Trino => Ok(Expression::Function(Box::new(
9891 Function::new("REDUCE".to_string(), f.args),
9892 ))),
9893 _ => Ok(Expression::Function(f)),
9894 },
9895 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep for DuckDB
9896 "REGEXP_MATCHES" if f.args.len() >= 2 => {
9897 if matches!(target, DialectType::DuckDB) {
9898 Ok(Expression::Function(f))
9899 } else {
9900 let mut args = f.args;
9901 let this = args.remove(0);
9902 let pattern = args.remove(0);
9903 let flags = if args.is_empty() {
9904 None
9905 } else {
9906 Some(args.remove(0))
9907 };
9908 Ok(Expression::RegexpLike(Box::new(
9909 crate::expressions::RegexpFunc {
9910 this,
9911 pattern,
9912 flags,
9913 },
9914 )))
9915 }
9916 }
9917 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
9918 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
9919 if matches!(target, DialectType::DuckDB) {
9920 Ok(Expression::Function(f))
9921 } else {
9922 let mut args = f.args;
9923 let this = args.remove(0);
9924 let pattern = args.remove(0);
9925 let flags = if args.is_empty() {
9926 None
9927 } else {
9928 Some(args.remove(0))
9929 };
9930 Ok(Expression::RegexpLike(Box::new(
9931 crate::expressions::RegexpFunc {
9932 this,
9933 pattern,
9934 flags,
9935 },
9936 )))
9937 }
9938 }
9939 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
9940 "STRUCT_EXTRACT" if f.args.len() == 2 => {
9941 let mut args = f.args;
9942 let this = args.remove(0);
9943 let field_expr = args.remove(0);
9944 // Extract string literal to get field name
9945 let field_name = match &field_expr {
9946 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
9947 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
9948 s.clone()
9949 }
9950 Expression::Identifier(id) => id.name.clone(),
9951 _ => {
9952 return Ok(Expression::Function(Box::new(Function::new(
9953 "STRUCT_EXTRACT".to_string(),
9954 vec![this, field_expr],
9955 ))))
9956 }
9957 };
9958 Ok(Expression::StructExtract(Box::new(
9959 crate::expressions::StructExtractFunc {
9960 this,
9961 field: crate::expressions::Identifier::new(field_name),
9962 },
9963 )))
9964 }
9965 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
9966 "LIST_FILTER" if f.args.len() == 2 => {
9967 let name = match target {
9968 DialectType::DuckDB => "LIST_FILTER",
9969 _ => "FILTER",
9970 };
9971 Ok(Expression::Function(Box::new(Function::new(
9972 name.to_string(),
9973 f.args,
9974 ))))
9975 }
9976 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
9977 "LIST_TRANSFORM" if f.args.len() == 2 => {
9978 let name = match target {
9979 DialectType::DuckDB => "LIST_TRANSFORM",
9980 _ => "TRANSFORM",
9981 };
9982 Ok(Expression::Function(Box::new(Function::new(
9983 name.to_string(),
9984 f.args,
9985 ))))
9986 }
9987 // LIST_SORT(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, SORT_ARRAY(x) for others
9988 "LIST_SORT" if f.args.len() >= 1 => {
9989 let name = match target {
9990 DialectType::DuckDB => "LIST_SORT",
9991 DialectType::Presto
9992 | DialectType::Trino => "ARRAY_SORT",
9993 _ => "SORT_ARRAY",
9994 };
9995 Ok(Expression::Function(Box::new(Function::new(
9996 name.to_string(),
9997 f.args,
9998 ))))
9999 }
10000 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
10001 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
10002 match target {
10003 DialectType::DuckDB => Ok(Expression::Function(Box::new(
10004 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
10005 ))),
10006 DialectType::Spark
10007 | DialectType::Databricks
10008 | DialectType::Hive => {
10009 let mut args = f.args;
10010 args.push(Expression::Identifier(
10011 crate::expressions::Identifier::new("FALSE"),
10012 ));
10013 Ok(Expression::Function(Box::new(Function::new(
10014 "SORT_ARRAY".to_string(),
10015 args,
10016 ))))
10017 }
10018 DialectType::Presto
10019 | DialectType::Trino
10020 | DialectType::Athena => {
10021 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
10022 let arr = f.args.into_iter().next().unwrap();
10023 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
10024 parameters: vec![
10025 crate::expressions::Identifier::new("a"),
10026 crate::expressions::Identifier::new("b"),
10027 ],
10028 body: Expression::Case(Box::new(Case {
10029 operand: None,
10030 whens: vec![
10031 (
10032 Expression::Lt(Box::new(BinaryOp::new(
10033 Expression::Identifier(crate::expressions::Identifier::new("a")),
10034 Expression::Identifier(crate::expressions::Identifier::new("b")),
10035 ))),
10036 Expression::number(1),
10037 ),
10038 (
10039 Expression::Gt(Box::new(BinaryOp::new(
10040 Expression::Identifier(crate::expressions::Identifier::new("a")),
10041 Expression::Identifier(crate::expressions::Identifier::new("b")),
10042 ))),
10043 Expression::Literal(Box::new(Literal::Number("-1".to_string()))),
10044 ),
10045 ],
10046 else_: Some(Expression::number(0)),
10047 comments: Vec::new(),
10048 inferred_type: None,
10049 })),
10050 colon: false,
10051 parameter_types: Vec::new(),
10052 }));
10053 Ok(Expression::Function(Box::new(Function::new(
10054 "ARRAY_SORT".to_string(),
10055 vec![arr, lambda],
10056 ))))
10057 }
10058 _ => Ok(Expression::Function(Box::new(Function::new(
10059 "LIST_REVERSE_SORT".to_string(),
10060 f.args,
10061 )))),
10062 }
10063 }
10064 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
10065 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
10066 let mut args = f.args;
10067 args.push(Expression::string(","));
10068 let name = match target {
10069 DialectType::DuckDB => "STR_SPLIT",
10070 DialectType::Presto | DialectType::Trino => "SPLIT",
10071 DialectType::Spark
10072 | DialectType::Databricks
10073 | DialectType::Hive => "SPLIT",
10074 DialectType::PostgreSQL => "STRING_TO_ARRAY",
10075 DialectType::Redshift => "SPLIT_TO_ARRAY",
10076 _ => "SPLIT",
10077 };
10078 Ok(Expression::Function(Box::new(Function::new(
10079 name.to_string(),
10080 args,
10081 ))))
10082 }
10083 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
10084 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
10085 let name = match target {
10086 DialectType::DuckDB => "STR_SPLIT",
10087 DialectType::Presto | DialectType::Trino => "SPLIT",
10088 DialectType::Spark
10089 | DialectType::Databricks
10090 | DialectType::Hive => "SPLIT",
10091 DialectType::PostgreSQL => "STRING_TO_ARRAY",
10092 DialectType::Redshift => "SPLIT_TO_ARRAY",
10093 _ => "SPLIT",
10094 };
10095 Ok(Expression::Function(Box::new(Function::new(
10096 name.to_string(),
10097 f.args,
10098 ))))
10099 }
10100 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
10101 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
10102 let name = match target {
10103 DialectType::DuckDB => "STR_SPLIT",
10104 DialectType::Presto | DialectType::Trino => "SPLIT",
10105 DialectType::Spark
10106 | DialectType::Databricks
10107 | DialectType::Hive => "SPLIT",
10108 DialectType::Doris | DialectType::StarRocks => {
10109 "SPLIT_BY_STRING"
10110 }
10111 DialectType::PostgreSQL | DialectType::Redshift => {
10112 "STRING_TO_ARRAY"
10113 }
10114 _ => "SPLIT",
10115 };
10116 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
10117 if matches!(
10118 target,
10119 DialectType::Spark
10120 | DialectType::Databricks
10121 | DialectType::Hive
10122 ) {
10123 let mut args = f.args;
10124 let x = args.remove(0);
10125 let sep = args.remove(0);
10126 // Wrap separator in CONCAT('\\Q', sep, '\\E')
10127 let escaped_sep =
10128 Expression::Function(Box::new(Function::new(
10129 "CONCAT".to_string(),
10130 vec![
10131 Expression::string("\\Q"),
10132 sep,
10133 Expression::string("\\E"),
10134 ],
10135 )));
10136 Ok(Expression::Function(Box::new(Function::new(
10137 name.to_string(),
10138 vec![x, escaped_sep],
10139 ))))
10140 } else {
10141 Ok(Expression::Function(Box::new(Function::new(
10142 name.to_string(),
10143 f.args,
10144 ))))
10145 }
10146 }
10147 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
10148 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
10149 let name = match target {
10150 DialectType::DuckDB => "STR_SPLIT_REGEX",
10151 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
10152 DialectType::Spark
10153 | DialectType::Databricks
10154 | DialectType::Hive => "SPLIT",
10155 _ => "REGEXP_SPLIT",
10156 };
10157 Ok(Expression::Function(Box::new(Function::new(
10158 name.to_string(),
10159 f.args,
10160 ))))
10161 }
10162 // SPLIT(str, delim) from Snowflake -> DuckDB with CASE wrapper
10163 "SPLIT"
10164 if f.args.len() == 2
10165 && matches!(source, DialectType::Snowflake)
10166 && matches!(target, DialectType::DuckDB) =>
10167 {
10168 let mut args = f.args;
10169 let str_arg = args.remove(0);
10170 let delim_arg = args.remove(0);
10171
10172 // STR_SPLIT(str, delim) as the base
10173 let base_func = Expression::Function(Box::new(Function::new(
10174 "STR_SPLIT".to_string(),
10175 vec![str_arg.clone(), delim_arg.clone()],
10176 )));
10177
10178 // [str] - array with single element
10179 let array_with_input = Expression::Array(Box::new(
10180 crate::expressions::Array {
10181 expressions: vec![str_arg],
10182 },
10183 ));
10184
10185 // CASE
10186 // WHEN delim IS NULL THEN NULL
10187 // WHEN delim = '' THEN [str]
10188 // ELSE STR_SPLIT(str, delim)
10189 // END
10190 Ok(Expression::Case(Box::new(Case {
10191 operand: None,
10192 whens: vec![
10193 (
10194 Expression::Is(Box::new(BinaryOp {
10195 left: delim_arg.clone(),
10196 right: Expression::Null(Null),
10197 left_comments: vec![],
10198 operator_comments: vec![],
10199 trailing_comments: vec![],
10200 inferred_type: None,
10201 })),
10202 Expression::Null(Null),
10203 ),
10204 (
10205 Expression::Eq(Box::new(BinaryOp {
10206 left: delim_arg,
10207 right: Expression::string(""),
10208 left_comments: vec![],
10209 operator_comments: vec![],
10210 trailing_comments: vec![],
10211 inferred_type: None,
10212 })),
10213 array_with_input,
10214 ),
10215 ],
10216 else_: Some(base_func),
10217 comments: vec![],
10218 inferred_type: None,
10219 })))
10220 }
10221 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
10222 "SPLIT"
10223 if f.args.len() == 2
10224 && matches!(
10225 source,
10226 DialectType::Presto
10227 | DialectType::Trino
10228 | DialectType::Athena
10229 | DialectType::StarRocks
10230 | DialectType::Doris
10231 )
10232 && matches!(
10233 target,
10234 DialectType::Spark
10235 | DialectType::Databricks
10236 | DialectType::Hive
10237 ) =>
10238 {
10239 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
10240 let mut args = f.args;
10241 let x = args.remove(0);
10242 let sep = args.remove(0);
10243 let escaped_sep = Expression::Function(Box::new(Function::new(
10244 "CONCAT".to_string(),
10245 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
10246 )));
10247 Ok(Expression::Function(Box::new(Function::new(
10248 "SPLIT".to_string(),
10249 vec![x, escaped_sep],
10250 ))))
10251 }
10252 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
10253 // For ClickHouse target, preserve original name to maintain camelCase
10254 "SUBSTRINGINDEX" => {
10255 let name = if matches!(target, DialectType::ClickHouse) {
10256 f.name.clone()
10257 } else {
10258 "SUBSTRING_INDEX".to_string()
10259 };
10260 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
10261 }
10262 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
10263 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
10264 // DuckDB source CARDINALITY -> DuckDB target: keep as CARDINALITY (used for maps)
10265 if name == "CARDINALITY"
10266 && matches!(source, DialectType::DuckDB)
10267 && matches!(target, DialectType::DuckDB)
10268 {
10269 return Ok(Expression::Function(f));
10270 }
10271 // Get the array argument (first arg, drop dimension args)
10272 let mut args = f.args;
10273 let arr = if args.is_empty() {
10274 return Ok(Expression::Function(Box::new(Function::new(
10275 name.to_string(),
10276 args,
10277 ))));
10278 } else {
10279 args.remove(0)
10280 };
10281 let name =
10282 match target {
10283 DialectType::Spark
10284 | DialectType::Databricks
10285 | DialectType::Hive => "SIZE",
10286 DialectType::Presto | DialectType::Trino => "CARDINALITY",
10287 DialectType::BigQuery => "ARRAY_LENGTH",
10288 DialectType::DuckDB => {
10289 // DuckDB: use ARRAY_LENGTH with all args
10290 let mut all_args = vec![arr];
10291 all_args.extend(args);
10292 return Ok(Expression::Function(Box::new(
10293 Function::new("ARRAY_LENGTH".to_string(), all_args),
10294 )));
10295 }
10296 DialectType::PostgreSQL | DialectType::Redshift => {
10297 // Keep ARRAY_LENGTH with dimension arg
10298 let mut all_args = vec![arr];
10299 all_args.extend(args);
10300 return Ok(Expression::Function(Box::new(
10301 Function::new("ARRAY_LENGTH".to_string(), all_args),
10302 )));
10303 }
10304 DialectType::ClickHouse => "LENGTH",
10305 _ => "ARRAY_LENGTH",
10306 };
10307 Ok(Expression::Function(Box::new(Function::new(
10308 name.to_string(),
10309 vec![arr],
10310 ))))
10311 }
10312 // TO_VARIANT(x) -> CAST(x AS VARIANT) for DuckDB
10313 "TO_VARIANT" if f.args.len() == 1 => {
10314 match target {
10315 DialectType::DuckDB => {
10316 let arg = f.args.into_iter().next().unwrap();
10317 Ok(Expression::Cast(Box::new(Cast {
10318 this: arg,
10319 to: DataType::Custom {
10320 name: "VARIANT".to_string(),
10321 },
10322 double_colon_syntax: false,
10323 trailing_comments: Vec::new(),
10324 format: None,
10325 default: None,
10326 inferred_type: None,
10327 })))
10328 }
10329 _ => Ok(Expression::Function(f)),
10330 }
10331 }
10332 // JSON_GROUP_ARRAY(x) -> JSON_AGG(x) for PostgreSQL
10333 "JSON_GROUP_ARRAY" if f.args.len() == 1 => {
10334 match target {
10335 DialectType::PostgreSQL => {
10336 Ok(Expression::Function(Box::new(Function::new(
10337 "JSON_AGG".to_string(),
10338 f.args,
10339 ))))
10340 }
10341 _ => Ok(Expression::Function(f)),
10342 }
10343 }
10344 // JSON_GROUP_OBJECT(key, value) -> JSON_OBJECT_AGG(key, value) for PostgreSQL
10345 "JSON_GROUP_OBJECT" if f.args.len() == 2 => {
10346 match target {
10347 DialectType::PostgreSQL => {
10348 Ok(Expression::Function(Box::new(Function::new(
10349 "JSON_OBJECT_AGG".to_string(),
10350 f.args,
10351 ))))
10352 }
10353 _ => Ok(Expression::Function(f)),
10354 }
10355 }
10356 // UNICODE(x) -> target-specific codepoint function
10357 "UNICODE" if f.args.len() == 1 => {
10358 match target {
10359 DialectType::SQLite | DialectType::DuckDB => {
10360 Ok(Expression::Function(Box::new(Function::new(
10361 "UNICODE".to_string(),
10362 f.args,
10363 ))))
10364 }
10365 DialectType::Oracle => {
10366 // ASCII(UNISTR(x))
10367 let inner = Expression::Function(Box::new(Function::new(
10368 "UNISTR".to_string(),
10369 f.args,
10370 )));
10371 Ok(Expression::Function(Box::new(Function::new(
10372 "ASCII".to_string(),
10373 vec![inner],
10374 ))))
10375 }
10376 DialectType::MySQL => {
10377 // ORD(CONVERT(x USING utf32))
10378 let arg = f.args.into_iter().next().unwrap();
10379 let convert_expr = Expression::ConvertToCharset(Box::new(
10380 crate::expressions::ConvertToCharset {
10381 this: Box::new(arg),
10382 dest: Some(Box::new(Expression::Identifier(
10383 crate::expressions::Identifier::new("utf32"),
10384 ))),
10385 source: None,
10386 },
10387 ));
10388 Ok(Expression::Function(Box::new(Function::new(
10389 "ORD".to_string(),
10390 vec![convert_expr],
10391 ))))
10392 }
10393 _ => Ok(Expression::Function(Box::new(Function::new(
10394 "ASCII".to_string(),
10395 f.args,
10396 )))),
10397 }
10398 }
10399 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
10400 "XOR" if f.args.len() >= 2 => {
10401 match target {
10402 DialectType::ClickHouse => {
10403 // ClickHouse: keep as xor() function with lowercase name
10404 Ok(Expression::Function(Box::new(Function::new(
10405 "xor".to_string(),
10406 f.args,
10407 ))))
10408 }
10409 DialectType::Presto | DialectType::Trino => {
10410 if f.args.len() == 2 {
10411 Ok(Expression::Function(Box::new(Function::new(
10412 "BITWISE_XOR".to_string(),
10413 f.args,
10414 ))))
10415 } else {
10416 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
10417 let mut args = f.args;
10418 let first = args.remove(0);
10419 let second = args.remove(0);
10420 let mut result =
10421 Expression::Function(Box::new(Function::new(
10422 "BITWISE_XOR".to_string(),
10423 vec![first, second],
10424 )));
10425 for arg in args {
10426 result =
10427 Expression::Function(Box::new(Function::new(
10428 "BITWISE_XOR".to_string(),
10429 vec![result, arg],
10430 )));
10431 }
10432 Ok(result)
10433 }
10434 }
10435 DialectType::MySQL
10436 | DialectType::SingleStore
10437 | DialectType::Doris
10438 | DialectType::StarRocks => {
10439 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
10440 let args = f.args;
10441 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
10442 this: None,
10443 expression: None,
10444 expressions: args,
10445 })))
10446 }
10447 DialectType::PostgreSQL | DialectType::Redshift => {
10448 // PostgreSQL: a # b (hash operator for XOR)
10449 let mut args = f.args;
10450 let first = args.remove(0);
10451 let second = args.remove(0);
10452 let mut result = Expression::BitwiseXor(Box::new(
10453 BinaryOp::new(first, second),
10454 ));
10455 for arg in args {
10456 result = Expression::BitwiseXor(Box::new(
10457 BinaryOp::new(result, arg),
10458 ));
10459 }
10460 Ok(result)
10461 }
10462 DialectType::DuckDB => {
10463 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
10464 Ok(Expression::Function(Box::new(Function::new(
10465 "XOR".to_string(),
10466 f.args,
10467 ))))
10468 }
10469 DialectType::BigQuery => {
10470 // BigQuery: a ^ b (caret operator for XOR)
10471 let mut args = f.args;
10472 let first = args.remove(0);
10473 let second = args.remove(0);
10474 let mut result = Expression::BitwiseXor(Box::new(
10475 BinaryOp::new(first, second),
10476 ));
10477 for arg in args {
10478 result = Expression::BitwiseXor(Box::new(
10479 BinaryOp::new(result, arg),
10480 ));
10481 }
10482 Ok(result)
10483 }
10484 _ => Ok(Expression::Function(Box::new(Function::new(
10485 "XOR".to_string(),
10486 f.args,
10487 )))),
10488 }
10489 }
10490 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
10491 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
10492 match target {
10493 DialectType::Spark
10494 | DialectType::Databricks
10495 | DialectType::Hive => {
10496 let mut args = f.args;
10497 args.push(Expression::Identifier(
10498 crate::expressions::Identifier::new("FALSE"),
10499 ));
10500 Ok(Expression::Function(Box::new(Function::new(
10501 "SORT_ARRAY".to_string(),
10502 args,
10503 ))))
10504 }
10505 DialectType::Presto
10506 | DialectType::Trino
10507 | DialectType::Athena => {
10508 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
10509 let arr = f.args.into_iter().next().unwrap();
10510 let lambda = Expression::Lambda(Box::new(
10511 crate::expressions::LambdaExpr {
10512 parameters: vec![
10513 Identifier::new("a"),
10514 Identifier::new("b"),
10515 ],
10516 colon: false,
10517 parameter_types: Vec::new(),
10518 body: Expression::Case(Box::new(Case {
10519 operand: None,
10520 whens: vec![
10521 (
10522 Expression::Lt(Box::new(
10523 BinaryOp::new(
10524 Expression::Identifier(
10525 Identifier::new("a"),
10526 ),
10527 Expression::Identifier(
10528 Identifier::new("b"),
10529 ),
10530 ),
10531 )),
10532 Expression::number(1),
10533 ),
10534 (
10535 Expression::Gt(Box::new(
10536 BinaryOp::new(
10537 Expression::Identifier(
10538 Identifier::new("a"),
10539 ),
10540 Expression::Identifier(
10541 Identifier::new("b"),
10542 ),
10543 ),
10544 )),
10545 Expression::Neg(Box::new(
10546 crate::expressions::UnaryOp {
10547 this: Expression::number(1),
10548 inferred_type: None,
10549 },
10550 )),
10551 ),
10552 ],
10553 else_: Some(Expression::number(0)),
10554 comments: Vec::new(),
10555 inferred_type: None,
10556 })),
10557 },
10558 ));
10559 Ok(Expression::Function(Box::new(Function::new(
10560 "ARRAY_SORT".to_string(),
10561 vec![arr, lambda],
10562 ))))
10563 }
10564 _ => Ok(Expression::Function(Box::new(Function::new(
10565 "ARRAY_REVERSE_SORT".to_string(),
10566 f.args,
10567 )))),
10568 }
10569 }
10570 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
10571 "ENCODE" if f.args.len() == 1 => match target {
10572 DialectType::Spark
10573 | DialectType::Databricks
10574 | DialectType::Hive => {
10575 let mut args = f.args;
10576 args.push(Expression::string("utf-8"));
10577 Ok(Expression::Function(Box::new(Function::new(
10578 "ENCODE".to_string(),
10579 args,
10580 ))))
10581 }
10582 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10583 Ok(Expression::Function(Box::new(Function::new(
10584 "TO_UTF8".to_string(),
10585 f.args,
10586 ))))
10587 }
10588 _ => Ok(Expression::Function(Box::new(Function::new(
10589 "ENCODE".to_string(),
10590 f.args,
10591 )))),
10592 },
10593 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
10594 "DECODE" if f.args.len() == 1 => match target {
10595 DialectType::Spark
10596 | DialectType::Databricks
10597 | DialectType::Hive => {
10598 let mut args = f.args;
10599 args.push(Expression::string("utf-8"));
10600 Ok(Expression::Function(Box::new(Function::new(
10601 "DECODE".to_string(),
10602 args,
10603 ))))
10604 }
10605 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
10606 Ok(Expression::Function(Box::new(Function::new(
10607 "FROM_UTF8".to_string(),
10608 f.args,
10609 ))))
10610 }
10611 _ => Ok(Expression::Function(Box::new(Function::new(
10612 "DECODE".to_string(),
10613 f.args,
10614 )))),
10615 },
10616 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
10617 "QUANTILE" if f.args.len() == 2 => {
10618 let name = match target {
10619 DialectType::Spark
10620 | DialectType::Databricks
10621 | DialectType::Hive => "PERCENTILE",
10622 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
10623 DialectType::BigQuery => "PERCENTILE_CONT",
10624 _ => "QUANTILE",
10625 };
10626 Ok(Expression::Function(Box::new(Function::new(
10627 name.to_string(),
10628 f.args,
10629 ))))
10630 }
10631 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
10632 "QUANTILE_CONT" if f.args.len() == 2 => {
10633 let mut args = f.args;
10634 let column = args.remove(0);
10635 let quantile = args.remove(0);
10636 match target {
10637 DialectType::DuckDB => {
10638 Ok(Expression::Function(Box::new(Function::new(
10639 "QUANTILE_CONT".to_string(),
10640 vec![column, quantile],
10641 ))))
10642 }
10643 DialectType::PostgreSQL
10644 | DialectType::Redshift
10645 | DialectType::Snowflake => {
10646 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
10647 let inner = Expression::PercentileCont(Box::new(
10648 crate::expressions::PercentileFunc {
10649 this: column.clone(),
10650 percentile: quantile,
10651 order_by: None,
10652 filter: None,
10653 },
10654 ));
10655 Ok(Expression::WithinGroup(Box::new(
10656 crate::expressions::WithinGroup {
10657 this: inner,
10658 order_by: vec![crate::expressions::Ordered {
10659 this: column,
10660 desc: false,
10661 nulls_first: None,
10662 explicit_asc: false,
10663 with_fill: None,
10664 }],
10665 },
10666 )))
10667 }
10668 _ => Ok(Expression::Function(Box::new(Function::new(
10669 "QUANTILE_CONT".to_string(),
10670 vec![column, quantile],
10671 )))),
10672 }
10673 }
10674 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
10675 "QUANTILE_DISC" if f.args.len() == 2 => {
10676 let mut args = f.args;
10677 let column = args.remove(0);
10678 let quantile = args.remove(0);
10679 match target {
10680 DialectType::DuckDB => {
10681 Ok(Expression::Function(Box::new(Function::new(
10682 "QUANTILE_DISC".to_string(),
10683 vec![column, quantile],
10684 ))))
10685 }
10686 DialectType::PostgreSQL
10687 | DialectType::Redshift
10688 | DialectType::Snowflake => {
10689 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
10690 let inner = Expression::PercentileDisc(Box::new(
10691 crate::expressions::PercentileFunc {
10692 this: column.clone(),
10693 percentile: quantile,
10694 order_by: None,
10695 filter: None,
10696 },
10697 ));
10698 Ok(Expression::WithinGroup(Box::new(
10699 crate::expressions::WithinGroup {
10700 this: inner,
10701 order_by: vec![crate::expressions::Ordered {
10702 this: column,
10703 desc: false,
10704 nulls_first: None,
10705 explicit_asc: false,
10706 with_fill: None,
10707 }],
10708 },
10709 )))
10710 }
10711 _ => Ok(Expression::Function(Box::new(Function::new(
10712 "QUANTILE_DISC".to_string(),
10713 vec![column, quantile],
10714 )))),
10715 }
10716 }
10717 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
10718 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
10719 let name = match target {
10720 DialectType::Presto
10721 | DialectType::Trino
10722 | DialectType::Athena => "APPROX_PERCENTILE",
10723 DialectType::Spark
10724 | DialectType::Databricks
10725 | DialectType::Hive => "PERCENTILE_APPROX",
10726 DialectType::DuckDB => "APPROX_QUANTILE",
10727 DialectType::PostgreSQL | DialectType::Redshift => {
10728 "PERCENTILE_CONT"
10729 }
10730 _ => &f.name,
10731 };
10732 Ok(Expression::Function(Box::new(Function::new(
10733 name.to_string(),
10734 f.args,
10735 ))))
10736 }
10737 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
10738 "EPOCH" if f.args.len() == 1 => {
10739 let name = match target {
10740 DialectType::Spark
10741 | DialectType::Databricks
10742 | DialectType::Hive => "UNIX_TIMESTAMP",
10743 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
10744 _ => "EPOCH",
10745 };
10746 Ok(Expression::Function(Box::new(Function::new(
10747 name.to_string(),
10748 f.args,
10749 ))))
10750 }
10751 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
10752 "EPOCH_MS" if f.args.len() == 1 => {
10753 match target {
10754 DialectType::Spark | DialectType::Databricks => {
10755 Ok(Expression::Function(Box::new(Function::new(
10756 "TIMESTAMP_MILLIS".to_string(),
10757 f.args,
10758 ))))
10759 }
10760 DialectType::Hive => {
10761 // Hive: FROM_UNIXTIME(x / 1000)
10762 let arg = f.args.into_iter().next().unwrap();
10763 let div_expr = Expression::Div(Box::new(
10764 crate::expressions::BinaryOp::new(
10765 arg,
10766 Expression::number(1000),
10767 ),
10768 ));
10769 Ok(Expression::Function(Box::new(Function::new(
10770 "FROM_UNIXTIME".to_string(),
10771 vec![div_expr],
10772 ))))
10773 }
10774 DialectType::Presto | DialectType::Trino => {
10775 Ok(Expression::Function(Box::new(Function::new(
10776 "FROM_UNIXTIME".to_string(),
10777 vec![Expression::Div(Box::new(
10778 crate::expressions::BinaryOp::new(
10779 f.args.into_iter().next().unwrap(),
10780 Expression::number(1000),
10781 ),
10782 ))],
10783 ))))
10784 }
10785 _ => Ok(Expression::Function(Box::new(Function::new(
10786 "EPOCH_MS".to_string(),
10787 f.args,
10788 )))),
10789 }
10790 }
10791 // HASHBYTES('algorithm', x) -> target-specific hash function
10792 "HASHBYTES" if f.args.len() == 2 => {
10793 // Keep HASHBYTES as-is for TSQL target
10794 if matches!(target, DialectType::TSQL) {
10795 return Ok(Expression::Function(f));
10796 }
10797 let algo_expr = &f.args[0];
10798 let algo = match algo_expr {
10799 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
10800 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
10801 s.to_ascii_uppercase()
10802 }
10803 _ => return Ok(Expression::Function(f)),
10804 };
10805 let data_arg = f.args.into_iter().nth(1).unwrap();
10806 match algo.as_str() {
10807 "SHA1" => {
10808 let name = match target {
10809 DialectType::Spark | DialectType::Databricks => "SHA",
10810 DialectType::Hive => "SHA1",
10811 _ => "SHA1",
10812 };
10813 Ok(Expression::Function(Box::new(Function::new(
10814 name.to_string(),
10815 vec![data_arg],
10816 ))))
10817 }
10818 "SHA2_256" => {
10819 Ok(Expression::Function(Box::new(Function::new(
10820 "SHA2".to_string(),
10821 vec![data_arg, Expression::number(256)],
10822 ))))
10823 }
10824 "SHA2_512" => {
10825 Ok(Expression::Function(Box::new(Function::new(
10826 "SHA2".to_string(),
10827 vec![data_arg, Expression::number(512)],
10828 ))))
10829 }
10830 "MD5" => Ok(Expression::Function(Box::new(Function::new(
10831 "MD5".to_string(),
10832 vec![data_arg],
10833 )))),
10834 _ => Ok(Expression::Function(Box::new(Function::new(
10835 "HASHBYTES".to_string(),
10836 vec![Expression::string(&algo), data_arg],
10837 )))),
10838 }
10839 }
10840 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
10841 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
10842 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
10843 let mut args = f.args;
10844 let json_expr = args.remove(0);
10845 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
10846 let mut json_path = "$".to_string();
10847 for a in &args {
10848 match a {
10849 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
10850 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
10851 // Numeric string keys become array indices: [0]
10852 if s.chars().all(|c| c.is_ascii_digit()) {
10853 json_path.push('[');
10854 json_path.push_str(s);
10855 json_path.push(']');
10856 } else {
10857 json_path.push('.');
10858 json_path.push_str(s);
10859 }
10860 }
10861 _ => {
10862 json_path.push_str(".?");
10863 }
10864 }
10865 }
10866 match target {
10867 DialectType::Spark
10868 | DialectType::Databricks
10869 | DialectType::Hive => {
10870 Ok(Expression::Function(Box::new(Function::new(
10871 "GET_JSON_OBJECT".to_string(),
10872 vec![json_expr, Expression::string(&json_path)],
10873 ))))
10874 }
10875 DialectType::Presto | DialectType::Trino => {
10876 let func_name = if is_text {
10877 "JSON_EXTRACT_SCALAR"
10878 } else {
10879 "JSON_EXTRACT"
10880 };
10881 Ok(Expression::Function(Box::new(Function::new(
10882 func_name.to_string(),
10883 vec![json_expr, Expression::string(&json_path)],
10884 ))))
10885 }
10886 DialectType::BigQuery | DialectType::MySQL => {
10887 let func_name = if is_text {
10888 "JSON_EXTRACT_SCALAR"
10889 } else {
10890 "JSON_EXTRACT"
10891 };
10892 Ok(Expression::Function(Box::new(Function::new(
10893 func_name.to_string(),
10894 vec![json_expr, Expression::string(&json_path)],
10895 ))))
10896 }
10897 DialectType::PostgreSQL | DialectType::Materialize => {
10898 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
10899 let func_name = if is_text {
10900 "JSON_EXTRACT_PATH_TEXT"
10901 } else {
10902 "JSON_EXTRACT_PATH"
10903 };
10904 let mut new_args = vec![json_expr];
10905 new_args.extend(args);
10906 Ok(Expression::Function(Box::new(Function::new(
10907 func_name.to_string(),
10908 new_args,
10909 ))))
10910 }
10911 DialectType::DuckDB | DialectType::SQLite => {
10912 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
10913 if is_text {
10914 Ok(Expression::JsonExtractScalar(Box::new(
10915 crate::expressions::JsonExtractFunc {
10916 this: json_expr,
10917 path: Expression::string(&json_path),
10918 returning: None,
10919 arrow_syntax: true,
10920 hash_arrow_syntax: false,
10921 wrapper_option: None,
10922 quotes_option: None,
10923 on_scalar_string: false,
10924 on_error: None,
10925 },
10926 )))
10927 } else {
10928 Ok(Expression::JsonExtract(Box::new(
10929 crate::expressions::JsonExtractFunc {
10930 this: json_expr,
10931 path: Expression::string(&json_path),
10932 returning: None,
10933 arrow_syntax: true,
10934 hash_arrow_syntax: false,
10935 wrapper_option: None,
10936 quotes_option: None,
10937 on_scalar_string: false,
10938 on_error: None,
10939 },
10940 )))
10941 }
10942 }
10943 DialectType::Redshift => {
10944 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
10945 let mut new_args = vec![json_expr];
10946 new_args.extend(args);
10947 Ok(Expression::Function(Box::new(Function::new(
10948 "JSON_EXTRACT_PATH_TEXT".to_string(),
10949 new_args,
10950 ))))
10951 }
10952 DialectType::TSQL => {
10953 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
10954 let jq = Expression::Function(Box::new(Function::new(
10955 "JSON_QUERY".to_string(),
10956 vec![json_expr.clone(), Expression::string(&json_path)],
10957 )));
10958 let jv = Expression::Function(Box::new(Function::new(
10959 "JSON_VALUE".to_string(),
10960 vec![json_expr, Expression::string(&json_path)],
10961 )));
10962 Ok(Expression::Function(Box::new(Function::new(
10963 "ISNULL".to_string(),
10964 vec![jq, jv],
10965 ))))
10966 }
10967 DialectType::ClickHouse => {
10968 let func_name = if is_text {
10969 "JSONExtractString"
10970 } else {
10971 "JSONExtractRaw"
10972 };
10973 let mut new_args = vec![json_expr];
10974 new_args.extend(args);
10975 Ok(Expression::Function(Box::new(Function::new(
10976 func_name.to_string(),
10977 new_args,
10978 ))))
10979 }
10980 _ => {
10981 let func_name = if is_text {
10982 "JSON_EXTRACT_SCALAR"
10983 } else {
10984 "JSON_EXTRACT"
10985 };
10986 Ok(Expression::Function(Box::new(Function::new(
10987 func_name.to_string(),
10988 vec![json_expr, Expression::string(&json_path)],
10989 ))))
10990 }
10991 }
10992 }
10993 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
10994 "APPROX_DISTINCT" if f.args.len() >= 1 => {
10995 let name = match target {
10996 DialectType::Spark
10997 | DialectType::Databricks
10998 | DialectType::Hive
10999 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
11000 _ => "APPROX_DISTINCT",
11001 };
11002 let mut args = f.args;
11003 // Hive doesn't support the accuracy parameter
11004 if name == "APPROX_COUNT_DISTINCT"
11005 && matches!(target, DialectType::Hive)
11006 {
11007 args.truncate(1);
11008 }
11009 Ok(Expression::Function(Box::new(Function::new(
11010 name.to_string(),
11011 args,
11012 ))))
11013 }
11014 // REGEXP_EXTRACT(x, pattern) - normalize default group index
11015 "REGEXP_EXTRACT" if f.args.len() == 2 => {
11016 // Determine source default group index
11017 let source_default = match source {
11018 DialectType::Presto
11019 | DialectType::Trino
11020 | DialectType::DuckDB => 0,
11021 _ => 1, // Hive/Spark/Databricks default = 1
11022 };
11023 // Determine target default group index
11024 let target_default = match target {
11025 DialectType::Presto
11026 | DialectType::Trino
11027 | DialectType::DuckDB
11028 | DialectType::BigQuery => 0,
11029 DialectType::Snowflake => {
11030 // Snowflake uses REGEXP_SUBSTR
11031 return Ok(Expression::Function(Box::new(Function::new(
11032 "REGEXP_SUBSTR".to_string(),
11033 f.args,
11034 ))));
11035 }
11036 _ => 1, // Hive/Spark/Databricks default = 1
11037 };
11038 if source_default != target_default {
11039 let mut args = f.args;
11040 args.push(Expression::number(source_default));
11041 Ok(Expression::Function(Box::new(Function::new(
11042 "REGEXP_EXTRACT".to_string(),
11043 args,
11044 ))))
11045 } else {
11046 Ok(Expression::Function(Box::new(Function::new(
11047 "REGEXP_EXTRACT".to_string(),
11048 f.args,
11049 ))))
11050 }
11051 }
11052 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
11053 "RLIKE" if f.args.len() == 2 => {
11054 let mut args = f.args;
11055 let str_expr = args.remove(0);
11056 let pattern = args.remove(0);
11057 match target {
11058 DialectType::DuckDB => {
11059 // REGEXP_MATCHES(str, pattern)
11060 Ok(Expression::Function(Box::new(Function::new(
11061 "REGEXP_MATCHES".to_string(),
11062 vec![str_expr, pattern],
11063 ))))
11064 }
11065 _ => {
11066 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
11067 Ok(Expression::RegexpLike(Box::new(
11068 crate::expressions::RegexpFunc {
11069 this: str_expr,
11070 pattern,
11071 flags: None,
11072 },
11073 )))
11074 }
11075 }
11076 }
11077 // EOMONTH(date[, month_offset]) -> target-specific
11078 "EOMONTH" if f.args.len() >= 1 => {
11079 let mut args = f.args;
11080 let date_arg = args.remove(0);
11081 let month_offset = if !args.is_empty() {
11082 Some(args.remove(0))
11083 } else {
11084 None
11085 };
11086
11087 // Helper: wrap date in CAST to DATE
11088 let cast_to_date = |e: Expression| -> Expression {
11089 Expression::Cast(Box::new(Cast {
11090 this: e,
11091 to: DataType::Date,
11092 trailing_comments: vec![],
11093 double_colon_syntax: false,
11094 format: None,
11095 default: None,
11096 inferred_type: None,
11097 }))
11098 };
11099
11100 match target {
11101 DialectType::TSQL | DialectType::Fabric => {
11102 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
11103 let date = cast_to_date(date_arg);
11104 let date = if let Some(offset) = month_offset {
11105 Expression::Function(Box::new(Function::new(
11106 "DATEADD".to_string(),
11107 vec![
11108 Expression::Identifier(Identifier::new(
11109 "MONTH",
11110 )),
11111 offset,
11112 date,
11113 ],
11114 )))
11115 } else {
11116 date
11117 };
11118 Ok(Expression::Function(Box::new(Function::new(
11119 "EOMONTH".to_string(),
11120 vec![date],
11121 ))))
11122 }
11123 DialectType::Presto
11124 | DialectType::Trino
11125 | DialectType::Athena => {
11126 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
11127 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
11128 let cast_ts = Expression::Cast(Box::new(Cast {
11129 this: date_arg,
11130 to: DataType::Timestamp {
11131 timezone: false,
11132 precision: None,
11133 },
11134 trailing_comments: vec![],
11135 double_colon_syntax: false,
11136 format: None,
11137 default: None,
11138 inferred_type: None,
11139 }));
11140 let date = cast_to_date(cast_ts);
11141 let date = if let Some(offset) = month_offset {
11142 Expression::Function(Box::new(Function::new(
11143 "DATE_ADD".to_string(),
11144 vec![Expression::string("MONTH"), offset, date],
11145 )))
11146 } else {
11147 date
11148 };
11149 Ok(Expression::Function(Box::new(Function::new(
11150 "LAST_DAY_OF_MONTH".to_string(),
11151 vec![date],
11152 ))))
11153 }
11154 DialectType::PostgreSQL => {
11155 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
11156 let date = cast_to_date(date_arg);
11157 let date = if let Some(offset) = month_offset {
11158 let interval_str = format!(
11159 "{} MONTH",
11160 Self::expr_to_string_static(&offset)
11161 );
11162 Expression::Add(Box::new(
11163 crate::expressions::BinaryOp::new(
11164 date,
11165 Expression::Interval(Box::new(
11166 crate::expressions::Interval {
11167 this: Some(Expression::string(
11168 &interval_str,
11169 )),
11170 unit: None,
11171 },
11172 )),
11173 ),
11174 ))
11175 } else {
11176 date
11177 };
11178 let truncated =
11179 Expression::Function(Box::new(Function::new(
11180 "DATE_TRUNC".to_string(),
11181 vec![Expression::string("MONTH"), date],
11182 )));
11183 let plus_month = Expression::Add(Box::new(
11184 crate::expressions::BinaryOp::new(
11185 truncated,
11186 Expression::Interval(Box::new(
11187 crate::expressions::Interval {
11188 this: Some(Expression::string("1 MONTH")),
11189 unit: None,
11190 },
11191 )),
11192 ),
11193 ));
11194 let minus_day = Expression::Sub(Box::new(
11195 crate::expressions::BinaryOp::new(
11196 plus_month,
11197 Expression::Interval(Box::new(
11198 crate::expressions::Interval {
11199 this: Some(Expression::string("1 DAY")),
11200 unit: None,
11201 },
11202 )),
11203 ),
11204 ));
11205 Ok(Expression::Cast(Box::new(Cast {
11206 this: minus_day,
11207 to: DataType::Date,
11208 trailing_comments: vec![],
11209 double_colon_syntax: false,
11210 format: None,
11211 default: None,
11212 inferred_type: None,
11213 })))
11214 }
11215 DialectType::DuckDB => {
11216 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
11217 let date = cast_to_date(date_arg);
11218 let date = if let Some(offset) = month_offset {
11219 // Wrap negative numbers in parentheses for DuckDB INTERVAL
11220 let interval_val =
11221 if matches!(&offset, Expression::Neg(_)) {
11222 Expression::Paren(Box::new(
11223 crate::expressions::Paren {
11224 this: offset,
11225 trailing_comments: Vec::new(),
11226 },
11227 ))
11228 } else {
11229 offset
11230 };
11231 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
11232 date,
11233 Expression::Interval(Box::new(crate::expressions::Interval {
11234 this: Some(interval_val),
11235 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
11236 unit: crate::expressions::IntervalUnit::Month,
11237 use_plural: false,
11238 }),
11239 })),
11240 )))
11241 } else {
11242 date
11243 };
11244 Ok(Expression::Function(Box::new(Function::new(
11245 "LAST_DAY".to_string(),
11246 vec![date],
11247 ))))
11248 }
11249 DialectType::Snowflake | DialectType::Redshift => {
11250 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
11251 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
11252 let date = if matches!(target, DialectType::Snowflake) {
11253 Expression::Function(Box::new(Function::new(
11254 "TO_DATE".to_string(),
11255 vec![date_arg],
11256 )))
11257 } else {
11258 cast_to_date(date_arg)
11259 };
11260 let date = if let Some(offset) = month_offset {
11261 Expression::Function(Box::new(Function::new(
11262 "DATEADD".to_string(),
11263 vec![
11264 Expression::Identifier(Identifier::new(
11265 "MONTH",
11266 )),
11267 offset,
11268 date,
11269 ],
11270 )))
11271 } else {
11272 date
11273 };
11274 Ok(Expression::Function(Box::new(Function::new(
11275 "LAST_DAY".to_string(),
11276 vec![date],
11277 ))))
11278 }
11279 DialectType::Spark | DialectType::Databricks => {
11280 // Spark: LAST_DAY(TO_DATE(date))
11281 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
11282 let date = Expression::Function(Box::new(Function::new(
11283 "TO_DATE".to_string(),
11284 vec![date_arg],
11285 )));
11286 let date = if let Some(offset) = month_offset {
11287 Expression::Function(Box::new(Function::new(
11288 "ADD_MONTHS".to_string(),
11289 vec![date, offset],
11290 )))
11291 } else {
11292 date
11293 };
11294 Ok(Expression::Function(Box::new(Function::new(
11295 "LAST_DAY".to_string(),
11296 vec![date],
11297 ))))
11298 }
11299 DialectType::MySQL => {
11300 // MySQL: LAST_DAY(DATE(date)) - no offset
11301 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
11302 let date = if let Some(offset) = month_offset {
11303 let iu = crate::expressions::IntervalUnit::Month;
11304 Expression::DateAdd(Box::new(
11305 crate::expressions::DateAddFunc {
11306 this: date_arg,
11307 interval: offset,
11308 unit: iu,
11309 },
11310 ))
11311 } else {
11312 Expression::Function(Box::new(Function::new(
11313 "DATE".to_string(),
11314 vec![date_arg],
11315 )))
11316 };
11317 Ok(Expression::Function(Box::new(Function::new(
11318 "LAST_DAY".to_string(),
11319 vec![date],
11320 ))))
11321 }
11322 DialectType::BigQuery => {
11323 // BigQuery: LAST_DAY(CAST(date AS DATE))
11324 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
11325 let date = cast_to_date(date_arg);
11326 let date = if let Some(offset) = month_offset {
11327 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
11328 this: Some(offset),
11329 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
11330 unit: crate::expressions::IntervalUnit::Month,
11331 use_plural: false,
11332 }),
11333 }));
11334 Expression::Function(Box::new(Function::new(
11335 "DATE_ADD".to_string(),
11336 vec![date, interval],
11337 )))
11338 } else {
11339 date
11340 };
11341 Ok(Expression::Function(Box::new(Function::new(
11342 "LAST_DAY".to_string(),
11343 vec![date],
11344 ))))
11345 }
11346 DialectType::ClickHouse => {
11347 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
11348 let date = Expression::Cast(Box::new(Cast {
11349 this: date_arg,
11350 to: DataType::Nullable {
11351 inner: Box::new(DataType::Date),
11352 },
11353 trailing_comments: vec![],
11354 double_colon_syntax: false,
11355 format: None,
11356 default: None,
11357 inferred_type: None,
11358 }));
11359 let date = if let Some(offset) = month_offset {
11360 Expression::Function(Box::new(Function::new(
11361 "DATE_ADD".to_string(),
11362 vec![
11363 Expression::Identifier(Identifier::new(
11364 "MONTH",
11365 )),
11366 offset,
11367 date,
11368 ],
11369 )))
11370 } else {
11371 date
11372 };
11373 Ok(Expression::Function(Box::new(Function::new(
11374 "LAST_DAY".to_string(),
11375 vec![date],
11376 ))))
11377 }
11378 DialectType::Hive => {
11379 // Hive: LAST_DAY(date)
11380 let date = if let Some(offset) = month_offset {
11381 Expression::Function(Box::new(Function::new(
11382 "ADD_MONTHS".to_string(),
11383 vec![date_arg, offset],
11384 )))
11385 } else {
11386 date_arg
11387 };
11388 Ok(Expression::Function(Box::new(Function::new(
11389 "LAST_DAY".to_string(),
11390 vec![date],
11391 ))))
11392 }
11393 _ => {
11394 // Default: LAST_DAY(date)
11395 let date = if let Some(offset) = month_offset {
11396 let unit =
11397 Expression::Identifier(Identifier::new("MONTH"));
11398 Expression::Function(Box::new(Function::new(
11399 "DATEADD".to_string(),
11400 vec![unit, offset, date_arg],
11401 )))
11402 } else {
11403 date_arg
11404 };
11405 Ok(Expression::Function(Box::new(Function::new(
11406 "LAST_DAY".to_string(),
11407 vec![date],
11408 ))))
11409 }
11410 }
11411 }
11412 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
11413 "LAST_DAY" | "LAST_DAY_OF_MONTH"
11414 if !matches!(source, DialectType::BigQuery)
11415 && f.args.len() >= 1 =>
11416 {
11417 let first_arg = f.args.into_iter().next().unwrap();
11418 match target {
11419 DialectType::TSQL | DialectType::Fabric => {
11420 Ok(Expression::Function(Box::new(Function::new(
11421 "EOMONTH".to_string(),
11422 vec![first_arg],
11423 ))))
11424 }
11425 DialectType::Presto
11426 | DialectType::Trino
11427 | DialectType::Athena => {
11428 Ok(Expression::Function(Box::new(Function::new(
11429 "LAST_DAY_OF_MONTH".to_string(),
11430 vec![first_arg],
11431 ))))
11432 }
11433 _ => Ok(Expression::Function(Box::new(Function::new(
11434 "LAST_DAY".to_string(),
11435 vec![first_arg],
11436 )))),
11437 }
11438 }
11439 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
11440 "MAP"
11441 if f.args.len() == 2
11442 && matches!(
11443 source,
11444 DialectType::Presto
11445 | DialectType::Trino
11446 | DialectType::Athena
11447 ) =>
11448 {
11449 let keys_arg = f.args[0].clone();
11450 let vals_arg = f.args[1].clone();
11451
11452 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
11453 fn extract_array_elements(
11454 expr: &Expression,
11455 ) -> Option<&Vec<Expression>> {
11456 match expr {
11457 Expression::Array(arr) => Some(&arr.expressions),
11458 Expression::ArrayFunc(arr) => Some(&arr.expressions),
11459 Expression::Function(f)
11460 if f.name.eq_ignore_ascii_case("ARRAY") =>
11461 {
11462 Some(&f.args)
11463 }
11464 _ => None,
11465 }
11466 }
11467
11468 match target {
11469 DialectType::Spark | DialectType::Databricks => {
11470 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
11471 Ok(Expression::Function(Box::new(Function::new(
11472 "MAP_FROM_ARRAYS".to_string(),
11473 f.args,
11474 ))))
11475 }
11476 DialectType::Hive => {
11477 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
11478 if let (Some(keys), Some(vals)) = (
11479 extract_array_elements(&keys_arg),
11480 extract_array_elements(&vals_arg),
11481 ) {
11482 if keys.len() == vals.len() {
11483 let mut interleaved = Vec::new();
11484 for (k, v) in keys.iter().zip(vals.iter()) {
11485 interleaved.push(k.clone());
11486 interleaved.push(v.clone());
11487 }
11488 Ok(Expression::Function(Box::new(Function::new(
11489 "MAP".to_string(),
11490 interleaved,
11491 ))))
11492 } else {
11493 Ok(Expression::Function(Box::new(Function::new(
11494 "MAP".to_string(),
11495 f.args,
11496 ))))
11497 }
11498 } else {
11499 Ok(Expression::Function(Box::new(Function::new(
11500 "MAP".to_string(),
11501 f.args,
11502 ))))
11503 }
11504 }
11505 DialectType::Snowflake => {
11506 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
11507 if let (Some(keys), Some(vals)) = (
11508 extract_array_elements(&keys_arg),
11509 extract_array_elements(&vals_arg),
11510 ) {
11511 if keys.len() == vals.len() {
11512 let mut interleaved = Vec::new();
11513 for (k, v) in keys.iter().zip(vals.iter()) {
11514 interleaved.push(k.clone());
11515 interleaved.push(v.clone());
11516 }
11517 Ok(Expression::Function(Box::new(Function::new(
11518 "OBJECT_CONSTRUCT".to_string(),
11519 interleaved,
11520 ))))
11521 } else {
11522 Ok(Expression::Function(Box::new(Function::new(
11523 "MAP".to_string(),
11524 f.args,
11525 ))))
11526 }
11527 } else {
11528 Ok(Expression::Function(Box::new(Function::new(
11529 "MAP".to_string(),
11530 f.args,
11531 ))))
11532 }
11533 }
11534 _ => Ok(Expression::Function(f)),
11535 }
11536 }
11537 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
11538 "MAP"
11539 if f.args.is_empty()
11540 && matches!(
11541 source,
11542 DialectType::Hive
11543 | DialectType::Spark
11544 | DialectType::Databricks
11545 )
11546 && matches!(
11547 target,
11548 DialectType::Presto
11549 | DialectType::Trino
11550 | DialectType::Athena
11551 ) =>
11552 {
11553 let empty_keys =
11554 Expression::Array(Box::new(crate::expressions::Array {
11555 expressions: vec![],
11556 }));
11557 let empty_vals =
11558 Expression::Array(Box::new(crate::expressions::Array {
11559 expressions: vec![],
11560 }));
11561 Ok(Expression::Function(Box::new(Function::new(
11562 "MAP".to_string(),
11563 vec![empty_keys, empty_vals],
11564 ))))
11565 }
11566 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
11567 "MAP"
11568 if f.args.len() >= 2
11569 && f.args.len() % 2 == 0
11570 && matches!(
11571 source,
11572 DialectType::Hive
11573 | DialectType::Spark
11574 | DialectType::Databricks
11575 | DialectType::ClickHouse
11576 ) =>
11577 {
11578 let args = f.args;
11579 match target {
11580 DialectType::DuckDB => {
11581 // MAP([k1, k2], [v1, v2])
11582 let mut keys = Vec::new();
11583 let mut vals = Vec::new();
11584 for (i, arg) in args.into_iter().enumerate() {
11585 if i % 2 == 0 {
11586 keys.push(arg);
11587 } else {
11588 vals.push(arg);
11589 }
11590 }
11591 let keys_arr = Expression::Array(Box::new(
11592 crate::expressions::Array { expressions: keys },
11593 ));
11594 let vals_arr = Expression::Array(Box::new(
11595 crate::expressions::Array { expressions: vals },
11596 ));
11597 Ok(Expression::Function(Box::new(Function::new(
11598 "MAP".to_string(),
11599 vec![keys_arr, vals_arr],
11600 ))))
11601 }
11602 DialectType::Presto | DialectType::Trino => {
11603 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
11604 let mut keys = Vec::new();
11605 let mut vals = Vec::new();
11606 for (i, arg) in args.into_iter().enumerate() {
11607 if i % 2 == 0 {
11608 keys.push(arg);
11609 } else {
11610 vals.push(arg);
11611 }
11612 }
11613 let keys_arr = Expression::Array(Box::new(
11614 crate::expressions::Array { expressions: keys },
11615 ));
11616 let vals_arr = Expression::Array(Box::new(
11617 crate::expressions::Array { expressions: vals },
11618 ));
11619 Ok(Expression::Function(Box::new(Function::new(
11620 "MAP".to_string(),
11621 vec![keys_arr, vals_arr],
11622 ))))
11623 }
11624 DialectType::Snowflake => Ok(Expression::Function(Box::new(
11625 Function::new("OBJECT_CONSTRUCT".to_string(), args),
11626 ))),
11627 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
11628 Function::new("map".to_string(), args),
11629 ))),
11630 _ => Ok(Expression::Function(Box::new(Function::new(
11631 "MAP".to_string(),
11632 args,
11633 )))),
11634 }
11635 }
11636 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
11637 "COLLECT_LIST" if f.args.len() >= 1 => {
11638 let name = match target {
11639 DialectType::Spark
11640 | DialectType::Databricks
11641 | DialectType::Hive => "COLLECT_LIST",
11642 DialectType::DuckDB
11643 | DialectType::PostgreSQL
11644 | DialectType::Redshift
11645 | DialectType::Snowflake
11646 | DialectType::BigQuery => "ARRAY_AGG",
11647 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
11648 _ => "ARRAY_AGG",
11649 };
11650 Ok(Expression::Function(Box::new(Function::new(
11651 name.to_string(),
11652 f.args,
11653 ))))
11654 }
11655 // COLLECT_SET(x) -> target-specific distinct array aggregation
11656 "COLLECT_SET" if f.args.len() >= 1 => {
11657 let name = match target {
11658 DialectType::Spark
11659 | DialectType::Databricks
11660 | DialectType::Hive => "COLLECT_SET",
11661 DialectType::Presto
11662 | DialectType::Trino
11663 | DialectType::Athena => "SET_AGG",
11664 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
11665 _ => "ARRAY_AGG",
11666 };
11667 Ok(Expression::Function(Box::new(Function::new(
11668 name.to_string(),
11669 f.args,
11670 ))))
11671 }
11672 // ISNAN(x) / IS_NAN(x) - normalize
11673 "ISNAN" | "IS_NAN" => {
11674 let name = match target {
11675 DialectType::Spark
11676 | DialectType::Databricks
11677 | DialectType::Hive => "ISNAN",
11678 DialectType::Presto
11679 | DialectType::Trino
11680 | DialectType::Athena => "IS_NAN",
11681 DialectType::BigQuery
11682 | DialectType::PostgreSQL
11683 | DialectType::Redshift => "IS_NAN",
11684 DialectType::ClickHouse => "IS_NAN",
11685 _ => "ISNAN",
11686 };
11687 Ok(Expression::Function(Box::new(Function::new(
11688 name.to_string(),
11689 f.args,
11690 ))))
11691 }
11692 // SPLIT_PART(str, delim, index) -> target-specific
11693 "SPLIT_PART" if f.args.len() == 3 => {
11694 match target {
11695 DialectType::Spark | DialectType::Databricks => {
11696 // Keep as SPLIT_PART (Spark 3.4+)
11697 Ok(Expression::Function(Box::new(Function::new(
11698 "SPLIT_PART".to_string(),
11699 f.args,
11700 ))))
11701 }
11702 DialectType::DuckDB
11703 if matches!(source, DialectType::Snowflake) =>
11704 {
11705 // Snowflake SPLIT_PART -> DuckDB with CASE wrapper:
11706 // - part_index 0 treated as 1
11707 // - empty delimiter: return whole string if index 1 or -1, else ''
11708 let mut args = f.args;
11709 let str_arg = args.remove(0);
11710 let delim_arg = args.remove(0);
11711 let idx_arg = args.remove(0);
11712
11713 // (CASE WHEN idx = 0 THEN 1 ELSE idx END)
11714 let adjusted_idx = Expression::Paren(Box::new(Paren {
11715 this: Expression::Case(Box::new(Case {
11716 operand: None,
11717 whens: vec![(
11718 Expression::Eq(Box::new(BinaryOp {
11719 left: idx_arg.clone(),
11720 right: Expression::number(0),
11721 left_comments: vec![],
11722 operator_comments: vec![],
11723 trailing_comments: vec![],
11724 inferred_type: None,
11725 })),
11726 Expression::number(1),
11727 )],
11728 else_: Some(idx_arg.clone()),
11729 comments: vec![],
11730 inferred_type: None,
11731 })),
11732 trailing_comments: vec![],
11733 }));
11734
11735 // SPLIT_PART(str, delim, adjusted_idx)
11736 let base_func = Expression::Function(Box::new(Function::new(
11737 "SPLIT_PART".to_string(),
11738 vec![str_arg.clone(), delim_arg.clone(), adjusted_idx.clone()],
11739 )));
11740
11741 // (CASE WHEN adjusted_idx = 1 OR adjusted_idx = -1 THEN str ELSE '' END)
11742 let empty_delim_case = Expression::Paren(Box::new(Paren {
11743 this: Expression::Case(Box::new(Case {
11744 operand: None,
11745 whens: vec![(
11746 Expression::Or(Box::new(BinaryOp {
11747 left: Expression::Eq(Box::new(BinaryOp {
11748 left: adjusted_idx.clone(),
11749 right: Expression::number(1),
11750 left_comments: vec![],
11751 operator_comments: vec![],
11752 trailing_comments: vec![],
11753 inferred_type: None,
11754 })),
11755 right: Expression::Eq(Box::new(BinaryOp {
11756 left: adjusted_idx,
11757 right: Expression::number(-1),
11758 left_comments: vec![],
11759 operator_comments: vec![],
11760 trailing_comments: vec![],
11761 inferred_type: None,
11762 })),
11763 left_comments: vec![],
11764 operator_comments: vec![],
11765 trailing_comments: vec![],
11766 inferred_type: None,
11767 })),
11768 str_arg,
11769 )],
11770 else_: Some(Expression::string("")),
11771 comments: vec![],
11772 inferred_type: None,
11773 })),
11774 trailing_comments: vec![],
11775 }));
11776
11777 // CASE WHEN delim = '' THEN (empty case) ELSE SPLIT_PART(...) END
11778 Ok(Expression::Case(Box::new(Case {
11779 operand: None,
11780 whens: vec![(
11781 Expression::Eq(Box::new(BinaryOp {
11782 left: delim_arg,
11783 right: Expression::string(""),
11784 left_comments: vec![],
11785 operator_comments: vec![],
11786 trailing_comments: vec![],
11787 inferred_type: None,
11788 })),
11789 empty_delim_case,
11790 )],
11791 else_: Some(base_func),
11792 comments: vec![],
11793 inferred_type: None,
11794 })))
11795 }
11796 DialectType::DuckDB
11797 | DialectType::PostgreSQL
11798 | DialectType::Snowflake
11799 | DialectType::Redshift
11800 | DialectType::Trino
11801 | DialectType::Presto => Ok(Expression::Function(Box::new(
11802 Function::new("SPLIT_PART".to_string(), f.args),
11803 ))),
11804 DialectType::Hive => {
11805 // SPLIT(str, delim)[index]
11806 // Complex conversion, just keep as-is for now
11807 Ok(Expression::Function(Box::new(Function::new(
11808 "SPLIT_PART".to_string(),
11809 f.args,
11810 ))))
11811 }
11812 _ => Ok(Expression::Function(Box::new(Function::new(
11813 "SPLIT_PART".to_string(),
11814 f.args,
11815 )))),
11816 }
11817 }
11818 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
11819 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
11820 let is_scalar = name == "JSON_EXTRACT_SCALAR";
11821 match target {
11822 DialectType::Spark
11823 | DialectType::Databricks
11824 | DialectType::Hive => {
11825 let mut args = f.args;
11826 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
11827 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
11828 if let Some(Expression::Function(inner)) = args.first() {
11829 if inner.name.eq_ignore_ascii_case("TRY")
11830 && inner.args.len() == 1
11831 {
11832 let mut inner_args = inner.args.clone();
11833 args[0] = inner_args.remove(0);
11834 }
11835 }
11836 Ok(Expression::Function(Box::new(Function::new(
11837 "GET_JSON_OBJECT".to_string(),
11838 args,
11839 ))))
11840 }
11841 DialectType::DuckDB | DialectType::SQLite => {
11842 // json -> path syntax
11843 let mut args = f.args;
11844 let json_expr = args.remove(0);
11845 let path = args.remove(0);
11846 Ok(Expression::JsonExtract(Box::new(
11847 crate::expressions::JsonExtractFunc {
11848 this: json_expr,
11849 path,
11850 returning: None,
11851 arrow_syntax: true,
11852 hash_arrow_syntax: false,
11853 wrapper_option: None,
11854 quotes_option: None,
11855 on_scalar_string: false,
11856 on_error: None,
11857 },
11858 )))
11859 }
11860 DialectType::TSQL => {
11861 let func_name = if is_scalar {
11862 "JSON_VALUE"
11863 } else {
11864 "JSON_QUERY"
11865 };
11866 Ok(Expression::Function(Box::new(Function::new(
11867 func_name.to_string(),
11868 f.args,
11869 ))))
11870 }
11871 DialectType::PostgreSQL | DialectType::Redshift => {
11872 let func_name = if is_scalar {
11873 "JSON_EXTRACT_PATH_TEXT"
11874 } else {
11875 "JSON_EXTRACT_PATH"
11876 };
11877 Ok(Expression::Function(Box::new(Function::new(
11878 func_name.to_string(),
11879 f.args,
11880 ))))
11881 }
11882 _ => Ok(Expression::Function(Box::new(Function::new(
11883 name.to_string(),
11884 f.args,
11885 )))),
11886 }
11887 }
11888 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
11889 "JSON_SEARCH"
11890 if matches!(target, DialectType::DuckDB)
11891 && (3..=5).contains(&f.args.len()) =>
11892 {
11893 let args = &f.args;
11894
11895 // Only rewrite deterministic modes and NULL/no escape-char variant.
11896 let mode = match &args[1] {
11897 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
11898 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
11899 s.to_ascii_lowercase()
11900 }
11901 _ => return Ok(Expression::Function(f)),
11902 };
11903 if mode != "one" && mode != "all" {
11904 return Ok(Expression::Function(f));
11905 }
11906 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
11907 return Ok(Expression::Function(f));
11908 }
11909
11910 let json_doc_sql = match Generator::sql(&args[0]) {
11911 Ok(sql) => sql,
11912 Err(_) => return Ok(Expression::Function(f)),
11913 };
11914 let search_sql = match Generator::sql(&args[2]) {
11915 Ok(sql) => sql,
11916 Err(_) => return Ok(Expression::Function(f)),
11917 };
11918 let path_sql = if args.len() == 5 {
11919 match Generator::sql(&args[4]) {
11920 Ok(sql) => sql,
11921 Err(_) => return Ok(Expression::Function(f)),
11922 }
11923 } else {
11924 "'$'".to_string()
11925 };
11926
11927 let rewrite_sql = if mode == "all" {
11928 format!(
11929 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
11930 json_doc_sql, path_sql, search_sql
11931 )
11932 } else {
11933 format!(
11934 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
11935 json_doc_sql, path_sql, search_sql
11936 )
11937 };
11938
11939 Ok(Expression::Raw(crate::expressions::Raw {
11940 sql: rewrite_sql,
11941 }))
11942 }
11943 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
11944 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
11945 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
11946 if f.args.len() >= 2
11947 && matches!(source, DialectType::SingleStore) =>
11948 {
11949 let is_bson = name == "BSON_EXTRACT_BSON";
11950 let mut args = f.args;
11951 let json_expr = args.remove(0);
11952
11953 // Build JSONPath from remaining arguments
11954 let mut path = String::from("$");
11955 for arg in &args {
11956 if let Expression::Literal(lit) = arg
11957 {
11958 if let crate::expressions::Literal::String(s) = lit.as_ref() {
11959 // Check if it's a numeric string (array index)
11960 if s.parse::<i64>().is_ok() {
11961 path.push('[');
11962 path.push_str(s);
11963 path.push(']');
11964 } else {
11965 path.push('.');
11966 path.push_str(s);
11967 }
11968 }
11969 }
11970 }
11971
11972 let target_func = if is_bson {
11973 "JSONB_EXTRACT"
11974 } else {
11975 "JSON_EXTRACT"
11976 };
11977 Ok(Expression::Function(Box::new(Function::new(
11978 target_func.to_string(),
11979 vec![json_expr, Expression::string(&path)],
11980 ))))
11981 }
11982 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
11983 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
11984 Ok(Expression::Function(Box::new(Function {
11985 name: "arraySum".to_string(),
11986 args: f.args,
11987 distinct: f.distinct,
11988 trailing_comments: f.trailing_comments,
11989 use_bracket_syntax: f.use_bracket_syntax,
11990 no_parens: f.no_parens,
11991 quoted: f.quoted,
11992 span: None,
11993 inferred_type: None,
11994 })))
11995 }
11996 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
11997 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
11998 // and is handled by JsonQueryValueConvert action. This handles the case where
11999 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
12000 "JSON_QUERY" | "JSON_VALUE"
12001 if f.args.len() == 2
12002 && matches!(
12003 source,
12004 DialectType::TSQL | DialectType::Fabric
12005 ) =>
12006 {
12007 match target {
12008 DialectType::Spark
12009 | DialectType::Databricks
12010 | DialectType::Hive => Ok(Expression::Function(Box::new(
12011 Function::new("GET_JSON_OBJECT".to_string(), f.args),
12012 ))),
12013 _ => Ok(Expression::Function(Box::new(Function::new(
12014 name.to_string(),
12015 f.args,
12016 )))),
12017 }
12018 }
12019 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
12020 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
12021 let arg = f.args.into_iter().next().unwrap();
12022 let is_hive_source = matches!(
12023 source,
12024 DialectType::Hive
12025 | DialectType::Spark
12026 | DialectType::Databricks
12027 );
12028 match target {
12029 DialectType::DuckDB if is_hive_source => {
12030 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
12031 let strptime =
12032 Expression::Function(Box::new(Function::new(
12033 "STRPTIME".to_string(),
12034 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
12035 )));
12036 Ok(Expression::Function(Box::new(Function::new(
12037 "EPOCH".to_string(),
12038 vec![strptime],
12039 ))))
12040 }
12041 DialectType::Presto | DialectType::Trino if is_hive_source => {
12042 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
12043 let cast_varchar =
12044 Expression::Cast(Box::new(crate::expressions::Cast {
12045 this: arg.clone(),
12046 to: DataType::VarChar {
12047 length: None,
12048 parenthesized_length: false,
12049 },
12050 trailing_comments: vec![],
12051 double_colon_syntax: false,
12052 format: None,
12053 default: None,
12054 inferred_type: None,
12055 }));
12056 let date_parse =
12057 Expression::Function(Box::new(Function::new(
12058 "DATE_PARSE".to_string(),
12059 vec![
12060 cast_varchar,
12061 Expression::string("%Y-%m-%d %T"),
12062 ],
12063 )));
12064 let try_expr = Expression::Function(Box::new(
12065 Function::new("TRY".to_string(), vec![date_parse]),
12066 ));
12067 let date_format =
12068 Expression::Function(Box::new(Function::new(
12069 "DATE_FORMAT".to_string(),
12070 vec![arg, Expression::string("%Y-%m-%d %T")],
12071 )));
12072 let parse_datetime =
12073 Expression::Function(Box::new(Function::new(
12074 "PARSE_DATETIME".to_string(),
12075 vec![
12076 date_format,
12077 Expression::string("yyyy-MM-dd HH:mm:ss"),
12078 ],
12079 )));
12080 let coalesce =
12081 Expression::Function(Box::new(Function::new(
12082 "COALESCE".to_string(),
12083 vec![try_expr, parse_datetime],
12084 )));
12085 Ok(Expression::Function(Box::new(Function::new(
12086 "TO_UNIXTIME".to_string(),
12087 vec![coalesce],
12088 ))))
12089 }
12090 DialectType::Presto | DialectType::Trino => {
12091 Ok(Expression::Function(Box::new(Function::new(
12092 "TO_UNIXTIME".to_string(),
12093 vec![arg],
12094 ))))
12095 }
12096 _ => Ok(Expression::Function(Box::new(Function::new(
12097 "UNIX_TIMESTAMP".to_string(),
12098 vec![arg],
12099 )))),
12100 }
12101 }
12102 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
12103 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
12104 DialectType::Spark
12105 | DialectType::Databricks
12106 | DialectType::Hive => Ok(Expression::Function(Box::new(
12107 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
12108 ))),
12109 _ => Ok(Expression::Function(Box::new(Function::new(
12110 "TO_UNIX_TIMESTAMP".to_string(),
12111 f.args,
12112 )))),
12113 },
12114 // CURDATE() -> CURRENT_DATE
12115 "CURDATE" => {
12116 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
12117 }
12118 // CURTIME() -> CURRENT_TIME
12119 "CURTIME" => {
12120 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
12121 precision: None,
12122 }))
12123 }
12124 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive, LIST_SORT for DuckDB
12125 "ARRAY_SORT" if f.args.len() >= 1 => {
12126 match target {
12127 DialectType::Hive => {
12128 let mut args = f.args;
12129 args.truncate(1); // Drop lambda comparator
12130 Ok(Expression::Function(Box::new(Function::new(
12131 "SORT_ARRAY".to_string(),
12132 args,
12133 ))))
12134 }
12135 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
12136 // Snowflake ARRAY_SORT(arr[, asc_bool[, nulls_first_bool]]) -> DuckDB LIST_SORT(arr[, 'ASC'/'DESC'[, 'NULLS FIRST']])
12137 let mut args_iter = f.args.into_iter();
12138 let arr = args_iter.next().unwrap();
12139 let asc_arg = args_iter.next();
12140 let nulls_first_arg = args_iter.next();
12141
12142 let is_asc_bool = asc_arg.as_ref().map(|a| matches!(a, Expression::Boolean(_))).unwrap_or(false);
12143 let is_nf_bool = nulls_first_arg.as_ref().map(|a| matches!(a, Expression::Boolean(_))).unwrap_or(false);
12144
12145 // No boolean args: pass through as-is
12146 if !is_asc_bool && !is_nf_bool {
12147 let mut result_args = vec![arr];
12148 if let Some(asc) = asc_arg {
12149 result_args.push(asc);
12150 if let Some(nf) = nulls_first_arg {
12151 result_args.push(nf);
12152 }
12153 }
12154 Ok(Expression::Function(Box::new(Function::new(
12155 "LIST_SORT".to_string(),
12156 result_args,
12157 ))))
12158 } else {
12159 // Has boolean args: convert to DuckDB LIST_SORT format
12160 let descending = matches!(&asc_arg, Some(Expression::Boolean(b)) if !b.value);
12161
12162 // Snowflake defaults: nulls_first = TRUE for DESC, FALSE for ASC
12163 let nulls_are_first = match &nulls_first_arg {
12164 Some(Expression::Boolean(b)) => b.value,
12165 None if is_asc_bool => descending, // Snowflake default
12166 _ => false,
12167 };
12168 let nulls_first_sql = if nulls_are_first {
12169 Some(Expression::string("NULLS FIRST"))
12170 } else {
12171 None
12172 };
12173
12174 if !is_asc_bool {
12175 // asc is non-boolean expression, nulls_first is boolean
12176 let mut result_args = vec![arr];
12177 if let Some(asc) = asc_arg {
12178 result_args.push(asc);
12179 }
12180 if let Some(nf) = nulls_first_sql {
12181 result_args.push(nf);
12182 }
12183 Ok(Expression::Function(Box::new(Function::new(
12184 "LIST_SORT".to_string(),
12185 result_args,
12186 ))))
12187 } else {
12188 if !descending && !nulls_are_first {
12189 // ASC, NULLS LAST (default) -> LIST_SORT(arr)
12190 Ok(Expression::Function(Box::new(Function::new(
12191 "LIST_SORT".to_string(),
12192 vec![arr],
12193 ))))
12194 } else if descending && !nulls_are_first {
12195 // DESC, NULLS LAST -> ARRAY_REVERSE_SORT(arr)
12196 Ok(Expression::Function(Box::new(Function::new(
12197 "ARRAY_REVERSE_SORT".to_string(),
12198 vec![arr],
12199 ))))
12200 } else {
12201 // NULLS FIRST -> LIST_SORT(arr, 'ASC'/'DESC', 'NULLS FIRST')
12202 let order_str = if descending { "DESC" } else { "ASC" };
12203 Ok(Expression::Function(Box::new(Function::new(
12204 "LIST_SORT".to_string(),
12205 vec![
12206 arr,
12207 Expression::string(order_str),
12208 Expression::string("NULLS FIRST"),
12209 ],
12210 ))))
12211 }
12212 }
12213 }
12214 }
12215 DialectType::DuckDB => {
12216 // Non-Snowflake source: ARRAY_SORT(x, lambda) -> ARRAY_SORT(x) (drop comparator)
12217 let mut args = f.args;
12218 args.truncate(1); // Drop lambda comparator for DuckDB
12219 Ok(Expression::Function(Box::new(Function::new(
12220 "ARRAY_SORT".to_string(),
12221 args,
12222 ))))
12223 }
12224 _ => Ok(Expression::Function(f)),
12225 }
12226 }
12227 // SORT_ARRAY(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, keep for Hive/Spark
12228 "SORT_ARRAY" if f.args.len() == 1 => match target {
12229 DialectType::Hive
12230 | DialectType::Spark
12231 | DialectType::Databricks => Ok(Expression::Function(f)),
12232 DialectType::DuckDB => Ok(Expression::Function(Box::new(Function::new(
12233 "LIST_SORT".to_string(),
12234 f.args,
12235 )))),
12236 _ => Ok(Expression::Function(Box::new(Function::new(
12237 "ARRAY_SORT".to_string(),
12238 f.args,
12239 )))),
12240 },
12241 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
12242 "SORT_ARRAY" if f.args.len() == 2 => {
12243 let is_desc =
12244 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
12245 if is_desc {
12246 match target {
12247 DialectType::DuckDB => {
12248 Ok(Expression::Function(Box::new(Function::new(
12249 "ARRAY_REVERSE_SORT".to_string(),
12250 vec![f.args.into_iter().next().unwrap()],
12251 ))))
12252 }
12253 DialectType::Presto | DialectType::Trino => {
12254 let arr_arg = f.args.into_iter().next().unwrap();
12255 let a =
12256 Expression::Column(Box::new(crate::expressions::Column {
12257 name: crate::expressions::Identifier::new("a"),
12258 table: None,
12259 join_mark: false,
12260 trailing_comments: Vec::new(),
12261 span: None,
12262 inferred_type: None,
12263 }));
12264 let b =
12265 Expression::Column(Box::new(crate::expressions::Column {
12266 name: crate::expressions::Identifier::new("b"),
12267 table: None,
12268 join_mark: false,
12269 trailing_comments: Vec::new(),
12270 span: None,
12271 inferred_type: None,
12272 }));
12273 let case_expr = Expression::Case(Box::new(
12274 crate::expressions::Case {
12275 operand: None,
12276 whens: vec![
12277 (
12278 Expression::Lt(Box::new(
12279 BinaryOp::new(a.clone(), b.clone()),
12280 )),
12281 Expression::Literal(Box::new(Literal::Number(
12282 "1".to_string(),
12283 ))),
12284 ),
12285 (
12286 Expression::Gt(Box::new(
12287 BinaryOp::new(a.clone(), b.clone()),
12288 )),
12289 Expression::Literal(Box::new(Literal::Number(
12290 "-1".to_string(),
12291 ))),
12292 ),
12293 ],
12294 else_: Some(Expression::Literal(Box::new(Literal::Number("0".to_string()),))),
12295 comments: Vec::new(),
12296 inferred_type: None,
12297 },
12298 ));
12299 let lambda = Expression::Lambda(Box::new(
12300 crate::expressions::LambdaExpr {
12301 parameters: vec![
12302 crate::expressions::Identifier::new("a"),
12303 crate::expressions::Identifier::new("b"),
12304 ],
12305 body: case_expr,
12306 colon: false,
12307 parameter_types: Vec::new(),
12308 },
12309 ));
12310 Ok(Expression::Function(Box::new(Function::new(
12311 "ARRAY_SORT".to_string(),
12312 vec![arr_arg, lambda],
12313 ))))
12314 }
12315 _ => Ok(Expression::Function(f)),
12316 }
12317 } else {
12318 // SORT_ARRAY(x, TRUE) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for others
12319 match target {
12320 DialectType::Hive => Ok(Expression::Function(f)),
12321 DialectType::DuckDB => Ok(Expression::Function(Box::new(Function::new(
12322 "LIST_SORT".to_string(),
12323 vec![f.args.into_iter().next().unwrap()],
12324 )))),
12325 _ => Ok(Expression::Function(Box::new(Function::new(
12326 "ARRAY_SORT".to_string(),
12327 vec![f.args.into_iter().next().unwrap()],
12328 )))),
12329 }
12330 }
12331 }
12332 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
12333 "LEFT" if f.args.len() == 2 => {
12334 match target {
12335 DialectType::Hive
12336 | DialectType::Presto
12337 | DialectType::Trino
12338 | DialectType::Athena => {
12339 let x = f.args[0].clone();
12340 let n = f.args[1].clone();
12341 Ok(Expression::Function(Box::new(Function::new(
12342 "SUBSTRING".to_string(),
12343 vec![x, Expression::number(1), n],
12344 ))))
12345 }
12346 DialectType::Spark | DialectType::Databricks
12347 if matches!(
12348 source,
12349 DialectType::TSQL | DialectType::Fabric
12350 ) =>
12351 {
12352 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
12353 let x = f.args[0].clone();
12354 let n = f.args[1].clone();
12355 let cast_x = Expression::Cast(Box::new(Cast {
12356 this: x,
12357 to: DataType::VarChar {
12358 length: None,
12359 parenthesized_length: false,
12360 },
12361 double_colon_syntax: false,
12362 trailing_comments: Vec::new(),
12363 format: None,
12364 default: None,
12365 inferred_type: None,
12366 }));
12367 Ok(Expression::Function(Box::new(Function::new(
12368 "LEFT".to_string(),
12369 vec![cast_x, n],
12370 ))))
12371 }
12372 _ => Ok(Expression::Function(f)),
12373 }
12374 }
12375 "RIGHT" if f.args.len() == 2 => {
12376 match target {
12377 DialectType::Hive
12378 | DialectType::Presto
12379 | DialectType::Trino
12380 | DialectType::Athena => {
12381 let x = f.args[0].clone();
12382 let n = f.args[1].clone();
12383 // SUBSTRING(x, LENGTH(x) - (n - 1))
12384 let len_x = Expression::Function(Box::new(Function::new(
12385 "LENGTH".to_string(),
12386 vec![x.clone()],
12387 )));
12388 let n_minus_1 = Expression::Sub(Box::new(
12389 crate::expressions::BinaryOp::new(
12390 n,
12391 Expression::number(1),
12392 ),
12393 ));
12394 let n_minus_1_paren = Expression::Paren(Box::new(
12395 crate::expressions::Paren {
12396 this: n_minus_1,
12397 trailing_comments: Vec::new(),
12398 },
12399 ));
12400 let offset = Expression::Sub(Box::new(
12401 crate::expressions::BinaryOp::new(
12402 len_x,
12403 n_minus_1_paren,
12404 ),
12405 ));
12406 Ok(Expression::Function(Box::new(Function::new(
12407 "SUBSTRING".to_string(),
12408 vec![x, offset],
12409 ))))
12410 }
12411 DialectType::Spark | DialectType::Databricks
12412 if matches!(
12413 source,
12414 DialectType::TSQL | DialectType::Fabric
12415 ) =>
12416 {
12417 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
12418 let x = f.args[0].clone();
12419 let n = f.args[1].clone();
12420 let cast_x = Expression::Cast(Box::new(Cast {
12421 this: x,
12422 to: DataType::VarChar {
12423 length: None,
12424 parenthesized_length: false,
12425 },
12426 double_colon_syntax: false,
12427 trailing_comments: Vec::new(),
12428 format: None,
12429 default: None,
12430 inferred_type: None,
12431 }));
12432 Ok(Expression::Function(Box::new(Function::new(
12433 "RIGHT".to_string(),
12434 vec![cast_x, n],
12435 ))))
12436 }
12437 _ => Ok(Expression::Function(f)),
12438 }
12439 }
12440 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
12441 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
12442 DialectType::Snowflake => Ok(Expression::Function(Box::new(
12443 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
12444 ))),
12445 DialectType::Spark | DialectType::Databricks => {
12446 Ok(Expression::Function(Box::new(Function::new(
12447 "MAP_FROM_ARRAYS".to_string(),
12448 f.args,
12449 ))))
12450 }
12451 _ => Ok(Expression::Function(Box::new(Function::new(
12452 "MAP".to_string(),
12453 f.args,
12454 )))),
12455 },
12456 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
12457 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
12458 "LIKE" if f.args.len() >= 2 => {
12459 let (this, pattern) = if matches!(source, DialectType::SQLite) {
12460 // SQLite: LIKE(pattern, string) -> string LIKE pattern
12461 (f.args[1].clone(), f.args[0].clone())
12462 } else {
12463 // Standard: LIKE(string, pattern) -> string LIKE pattern
12464 (f.args[0].clone(), f.args[1].clone())
12465 };
12466 let escape = if f.args.len() >= 3 {
12467 Some(f.args[2].clone())
12468 } else {
12469 None
12470 };
12471 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
12472 left: this,
12473 right: pattern,
12474 escape,
12475 quantifier: None,
12476 inferred_type: None,
12477 })))
12478 }
12479 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
12480 "ILIKE" if f.args.len() >= 2 => {
12481 let this = f.args[0].clone();
12482 let pattern = f.args[1].clone();
12483 let escape = if f.args.len() >= 3 {
12484 Some(f.args[2].clone())
12485 } else {
12486 None
12487 };
12488 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
12489 left: this,
12490 right: pattern,
12491 escape,
12492 quantifier: None,
12493 inferred_type: None,
12494 })))
12495 }
12496 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
12497 "CHAR" if f.args.len() == 1 => match target {
12498 DialectType::MySQL
12499 | DialectType::SingleStore
12500 | DialectType::TSQL => Ok(Expression::Function(f)),
12501 _ => Ok(Expression::Function(Box::new(Function::new(
12502 "CHR".to_string(),
12503 f.args,
12504 )))),
12505 },
12506 // CONCAT(a, b) -> a || b for PostgreSQL
12507 "CONCAT"
12508 if f.args.len() == 2
12509 && matches!(target, DialectType::PostgreSQL)
12510 && matches!(
12511 source,
12512 DialectType::ClickHouse | DialectType::MySQL
12513 ) =>
12514 {
12515 let mut args = f.args;
12516 let right = args.pop().unwrap();
12517 let left = args.pop().unwrap();
12518 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
12519 this: Box::new(left),
12520 expression: Box::new(right),
12521 safe: None,
12522 })))
12523 }
12524 // ARRAY_TO_STRING(arr, delim) -> target-specific
12525 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
12526 DialectType::Presto | DialectType::Trino => {
12527 Ok(Expression::Function(Box::new(Function::new(
12528 "ARRAY_JOIN".to_string(),
12529 f.args,
12530 ))))
12531 }
12532 DialectType::TSQL => Ok(Expression::Function(Box::new(
12533 Function::new("STRING_AGG".to_string(), f.args),
12534 ))),
12535 _ => Ok(Expression::Function(f)),
12536 },
12537 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
12538 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
12539 DialectType::Spark
12540 | DialectType::Databricks
12541 | DialectType::Hive => Ok(Expression::Function(Box::new(
12542 Function::new("CONCAT".to_string(), f.args),
12543 ))),
12544 DialectType::Snowflake => Ok(Expression::Function(Box::new(
12545 Function::new("ARRAY_CAT".to_string(), f.args),
12546 ))),
12547 DialectType::Redshift => Ok(Expression::Function(Box::new(
12548 Function::new("ARRAY_CONCAT".to_string(), f.args),
12549 ))),
12550 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12551 Function::new("ARRAY_CAT".to_string(), f.args),
12552 ))),
12553 DialectType::DuckDB => Ok(Expression::Function(Box::new(
12554 Function::new("LIST_CONCAT".to_string(), f.args),
12555 ))),
12556 DialectType::Presto | DialectType::Trino => {
12557 Ok(Expression::Function(Box::new(Function::new(
12558 "CONCAT".to_string(),
12559 f.args,
12560 ))))
12561 }
12562 DialectType::BigQuery => Ok(Expression::Function(Box::new(
12563 Function::new("ARRAY_CONCAT".to_string(), f.args),
12564 ))),
12565 _ => Ok(Expression::Function(f)),
12566 },
12567 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
12568 "HAS" if f.args.len() == 2 => match target {
12569 DialectType::Spark
12570 | DialectType::Databricks
12571 | DialectType::Hive => Ok(Expression::Function(Box::new(
12572 Function::new("ARRAY_CONTAINS".to_string(), f.args),
12573 ))),
12574 DialectType::Presto | DialectType::Trino => {
12575 Ok(Expression::Function(Box::new(Function::new(
12576 "CONTAINS".to_string(),
12577 f.args,
12578 ))))
12579 }
12580 _ => Ok(Expression::Function(f)),
12581 },
12582 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
12583 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
12584 Function::new("COALESCE".to_string(), f.args),
12585 ))),
12586 // ISNULL(x) in MySQL -> (x IS NULL)
12587 "ISNULL"
12588 if f.args.len() == 1
12589 && matches!(source, DialectType::MySQL)
12590 && matches!(target, DialectType::MySQL) =>
12591 {
12592 let arg = f.args.into_iter().next().unwrap();
12593 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
12594 this: Expression::IsNull(Box::new(
12595 crate::expressions::IsNull {
12596 this: arg,
12597 not: false,
12598 postfix_form: false,
12599 },
12600 )),
12601 trailing_comments: Vec::new(),
12602 })))
12603 }
12604 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
12605 "MONTHNAME"
12606 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
12607 {
12608 let arg = f.args.into_iter().next().unwrap();
12609 Ok(Expression::Function(Box::new(Function::new(
12610 "DATE_FORMAT".to_string(),
12611 vec![arg, Expression::string("%M")],
12612 ))))
12613 }
12614 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
12615 "SPLITBYSTRING" if f.args.len() == 2 => {
12616 let sep = f.args[0].clone();
12617 let str_arg = f.args[1].clone();
12618 match target {
12619 DialectType::DuckDB => Ok(Expression::Function(Box::new(
12620 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
12621 ))),
12622 DialectType::Doris => {
12623 Ok(Expression::Function(Box::new(Function::new(
12624 "SPLIT_BY_STRING".to_string(),
12625 vec![str_arg, sep],
12626 ))))
12627 }
12628 DialectType::Hive
12629 | DialectType::Spark
12630 | DialectType::Databricks => {
12631 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
12632 let escaped =
12633 Expression::Function(Box::new(Function::new(
12634 "CONCAT".to_string(),
12635 vec![
12636 Expression::string("\\Q"),
12637 sep,
12638 Expression::string("\\E"),
12639 ],
12640 )));
12641 Ok(Expression::Function(Box::new(Function::new(
12642 "SPLIT".to_string(),
12643 vec![str_arg, escaped],
12644 ))))
12645 }
12646 _ => Ok(Expression::Function(f)),
12647 }
12648 }
12649 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
12650 "SPLITBYREGEXP" if f.args.len() == 2 => {
12651 let sep = f.args[0].clone();
12652 let str_arg = f.args[1].clone();
12653 match target {
12654 DialectType::DuckDB => {
12655 Ok(Expression::Function(Box::new(Function::new(
12656 "STR_SPLIT_REGEX".to_string(),
12657 vec![str_arg, sep],
12658 ))))
12659 }
12660 DialectType::Hive
12661 | DialectType::Spark
12662 | DialectType::Databricks => {
12663 Ok(Expression::Function(Box::new(Function::new(
12664 "SPLIT".to_string(),
12665 vec![str_arg, sep],
12666 ))))
12667 }
12668 _ => Ok(Expression::Function(f)),
12669 }
12670 }
12671 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
12672 "TOMONDAY" => {
12673 if f.args.len() == 1 {
12674 let arg = f.args.into_iter().next().unwrap();
12675 match target {
12676 DialectType::Doris => {
12677 Ok(Expression::Function(Box::new(Function::new(
12678 "DATE_TRUNC".to_string(),
12679 vec![arg, Expression::string("WEEK")],
12680 ))))
12681 }
12682 _ => Ok(Expression::Function(Box::new(Function::new(
12683 "DATE_TRUNC".to_string(),
12684 vec![Expression::string("WEEK"), arg],
12685 )))),
12686 }
12687 } else {
12688 Ok(Expression::Function(f))
12689 }
12690 }
12691 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
12692 "COLLECT_LIST" if f.args.len() == 1 => match target {
12693 DialectType::Spark
12694 | DialectType::Databricks
12695 | DialectType::Hive => Ok(Expression::Function(f)),
12696 _ => Ok(Expression::Function(Box::new(Function::new(
12697 "ARRAY_AGG".to_string(),
12698 f.args,
12699 )))),
12700 },
12701 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
12702 "TO_CHAR"
12703 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
12704 {
12705 let arg = f.args.into_iter().next().unwrap();
12706 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12707 this: arg,
12708 to: DataType::Custom {
12709 name: "STRING".to_string(),
12710 },
12711 double_colon_syntax: false,
12712 trailing_comments: Vec::new(),
12713 format: None,
12714 default: None,
12715 inferred_type: None,
12716 })))
12717 }
12718 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
12719 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
12720 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12721 Function::new("RANDOM".to_string(), vec![]),
12722 ))),
12723 _ => Ok(Expression::Function(f)),
12724 },
12725 // ClickHouse formatDateTime -> target-specific
12726 "FORMATDATETIME" if f.args.len() >= 2 => match target {
12727 DialectType::MySQL => Ok(Expression::Function(Box::new(
12728 Function::new("DATE_FORMAT".to_string(), f.args),
12729 ))),
12730 _ => Ok(Expression::Function(f)),
12731 },
12732 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
12733 "REPLICATE" if f.args.len() == 2 => match target {
12734 DialectType::TSQL => Ok(Expression::Function(f)),
12735 _ => Ok(Expression::Function(Box::new(Function::new(
12736 "REPEAT".to_string(),
12737 f.args,
12738 )))),
12739 },
12740 // LEN(x) -> LENGTH(x) for non-TSQL targets
12741 // No CAST needed when arg is already a string literal
12742 "LEN" if f.args.len() == 1 => {
12743 match target {
12744 DialectType::TSQL => Ok(Expression::Function(f)),
12745 DialectType::Spark | DialectType::Databricks => {
12746 let arg = f.args.into_iter().next().unwrap();
12747 // Don't wrap string literals with CAST - they're already strings
12748 let is_string = matches!(
12749 &arg,
12750 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
12751 );
12752 let final_arg = if is_string {
12753 arg
12754 } else {
12755 Expression::Cast(Box::new(Cast {
12756 this: arg,
12757 to: DataType::VarChar {
12758 length: None,
12759 parenthesized_length: false,
12760 },
12761 double_colon_syntax: false,
12762 trailing_comments: Vec::new(),
12763 format: None,
12764 default: None,
12765 inferred_type: None,
12766 }))
12767 };
12768 Ok(Expression::Function(Box::new(Function::new(
12769 "LENGTH".to_string(),
12770 vec![final_arg],
12771 ))))
12772 }
12773 _ => {
12774 let arg = f.args.into_iter().next().unwrap();
12775 Ok(Expression::Function(Box::new(Function::new(
12776 "LENGTH".to_string(),
12777 vec![arg],
12778 ))))
12779 }
12780 }
12781 }
12782 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
12783 "COUNT_BIG" if f.args.len() == 1 => match target {
12784 DialectType::TSQL => Ok(Expression::Function(f)),
12785 _ => Ok(Expression::Function(Box::new(Function::new(
12786 "COUNT".to_string(),
12787 f.args,
12788 )))),
12789 },
12790 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
12791 "DATEFROMPARTS" if f.args.len() == 3 => match target {
12792 DialectType::TSQL => Ok(Expression::Function(f)),
12793 _ => Ok(Expression::Function(Box::new(Function::new(
12794 "MAKE_DATE".to_string(),
12795 f.args,
12796 )))),
12797 },
12798 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
12799 "REGEXP_LIKE" if f.args.len() >= 2 => {
12800 let str_expr = f.args[0].clone();
12801 let pattern = f.args[1].clone();
12802 let flags = if f.args.len() >= 3 {
12803 Some(f.args[2].clone())
12804 } else {
12805 None
12806 };
12807 match target {
12808 DialectType::DuckDB => {
12809 let mut new_args = vec![str_expr, pattern];
12810 if let Some(fl) = flags {
12811 new_args.push(fl);
12812 }
12813 Ok(Expression::Function(Box::new(Function::new(
12814 "REGEXP_MATCHES".to_string(),
12815 new_args,
12816 ))))
12817 }
12818 _ => Ok(Expression::RegexpLike(Box::new(
12819 crate::expressions::RegexpFunc {
12820 this: str_expr,
12821 pattern,
12822 flags,
12823 },
12824 ))),
12825 }
12826 }
12827 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
12828 "ARRAYJOIN" if f.args.len() == 1 => match target {
12829 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12830 Function::new("UNNEST".to_string(), f.args),
12831 ))),
12832 _ => Ok(Expression::Function(f)),
12833 },
12834 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
12835 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
12836 match target {
12837 DialectType::TSQL => Ok(Expression::Function(f)),
12838 DialectType::DuckDB => {
12839 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
12840 let mut args = f.args;
12841 let ms = args.pop().unwrap();
12842 let s = args.pop().unwrap();
12843 // s + (ms / 1000.0)
12844 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
12845 ms,
12846 Expression::Literal(Box::new(crate::expressions::Literal::Number(
12847 "1000.0".to_string(),
12848 ),)),
12849 )));
12850 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
12851 s,
12852 Expression::Paren(Box::new(Paren {
12853 this: ms_frac,
12854 trailing_comments: vec![],
12855 })),
12856 )));
12857 args.push(s_with_ms);
12858 Ok(Expression::Function(Box::new(Function::new(
12859 "MAKE_TIMESTAMP".to_string(),
12860 args,
12861 ))))
12862 }
12863 DialectType::Snowflake => {
12864 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
12865 let mut args = f.args;
12866 let ms = args.pop().unwrap();
12867 // ms * 1000000
12868 let ns = Expression::Mul(Box::new(BinaryOp::new(
12869 ms,
12870 Expression::number(1000000),
12871 )));
12872 args.push(ns);
12873 Ok(Expression::Function(Box::new(Function::new(
12874 "TIMESTAMP_FROM_PARTS".to_string(),
12875 args,
12876 ))))
12877 }
12878 _ => {
12879 // Default: keep function name for other targets
12880 Ok(Expression::Function(Box::new(Function::new(
12881 "DATETIMEFROMPARTS".to_string(),
12882 f.args,
12883 ))))
12884 }
12885 }
12886 }
12887 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
12888 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
12889 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
12890 let is_try = name == "TRY_CONVERT";
12891 let type_expr = f.args[0].clone();
12892 let value_expr = f.args[1].clone();
12893 let style = if f.args.len() >= 3 {
12894 Some(&f.args[2])
12895 } else {
12896 None
12897 };
12898
12899 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
12900 if matches!(target, DialectType::TSQL) {
12901 let normalized_type = match &type_expr {
12902 Expression::DataType(dt) => {
12903 let new_dt = match dt {
12904 DataType::Int { .. } => DataType::Custom {
12905 name: "INTEGER".to_string(),
12906 },
12907 _ => dt.clone(),
12908 };
12909 Expression::DataType(new_dt)
12910 }
12911 Expression::Identifier(id) => {
12912 if id.name.eq_ignore_ascii_case("INT") {
12913 Expression::Identifier(
12914 crate::expressions::Identifier::new("INTEGER"),
12915 )
12916 } else {
12917 let upper = id.name.to_ascii_uppercase();
12918 Expression::Identifier(
12919 crate::expressions::Identifier::new(upper),
12920 )
12921 }
12922 }
12923 Expression::Column(col) => {
12924 if col.name.name.eq_ignore_ascii_case("INT") {
12925 Expression::Identifier(
12926 crate::expressions::Identifier::new("INTEGER"),
12927 )
12928 } else {
12929 let upper = col.name.name.to_ascii_uppercase();
12930 Expression::Identifier(
12931 crate::expressions::Identifier::new(upper),
12932 )
12933 }
12934 }
12935 _ => type_expr.clone(),
12936 };
12937 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
12938 let mut new_args = vec![normalized_type, value_expr];
12939 if let Some(s) = style {
12940 new_args.push(s.clone());
12941 }
12942 return Ok(Expression::Function(Box::new(Function::new(
12943 func_name.to_string(),
12944 new_args,
12945 ))));
12946 }
12947
12948 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
12949 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
12950 match e {
12951 Expression::DataType(dt) => {
12952 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
12953 match dt {
12954 DataType::Custom { name }
12955 if name.starts_with("NVARCHAR(")
12956 || name.starts_with("NCHAR(") =>
12957 {
12958 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
12959 let inner = &name[name.find('(').unwrap() + 1
12960 ..name.len() - 1];
12961 if inner.eq_ignore_ascii_case("MAX") {
12962 Some(DataType::Text)
12963 } else if let Ok(len) = inner.parse::<u32>() {
12964 if name.starts_with("NCHAR") {
12965 Some(DataType::Char {
12966 length: Some(len),
12967 })
12968 } else {
12969 Some(DataType::VarChar {
12970 length: Some(len),
12971 parenthesized_length: false,
12972 })
12973 }
12974 } else {
12975 Some(dt.clone())
12976 }
12977 }
12978 DataType::Custom { name } if name == "NVARCHAR" => {
12979 Some(DataType::VarChar {
12980 length: None,
12981 parenthesized_length: false,
12982 })
12983 }
12984 DataType::Custom { name } if name == "NCHAR" => {
12985 Some(DataType::Char { length: None })
12986 }
12987 DataType::Custom { name }
12988 if name == "NVARCHAR(MAX)"
12989 || name == "VARCHAR(MAX)" =>
12990 {
12991 Some(DataType::Text)
12992 }
12993 _ => Some(dt.clone()),
12994 }
12995 }
12996 Expression::Identifier(id) => {
12997 let name = id.name.to_ascii_uppercase();
12998 match name.as_str() {
12999 "INT" | "INTEGER" => Some(DataType::Int {
13000 length: None,
13001 integer_spelling: false,
13002 }),
13003 "BIGINT" => Some(DataType::BigInt { length: None }),
13004 "SMALLINT" => {
13005 Some(DataType::SmallInt { length: None })
13006 }
13007 "TINYINT" => {
13008 Some(DataType::TinyInt { length: None })
13009 }
13010 "FLOAT" => Some(DataType::Float {
13011 precision: None,
13012 scale: None,
13013 real_spelling: false,
13014 }),
13015 "REAL" => Some(DataType::Float {
13016 precision: None,
13017 scale: None,
13018 real_spelling: true,
13019 }),
13020 "DATETIME" | "DATETIME2" => {
13021 Some(DataType::Timestamp {
13022 timezone: false,
13023 precision: None,
13024 })
13025 }
13026 "DATE" => Some(DataType::Date),
13027 "BIT" => Some(DataType::Boolean),
13028 "TEXT" => Some(DataType::Text),
13029 "NUMERIC" => Some(DataType::Decimal {
13030 precision: None,
13031 scale: None,
13032 }),
13033 "MONEY" => Some(DataType::Decimal {
13034 precision: Some(15),
13035 scale: Some(4),
13036 }),
13037 "SMALLMONEY" => Some(DataType::Decimal {
13038 precision: Some(6),
13039 scale: Some(4),
13040 }),
13041 "VARCHAR" => Some(DataType::VarChar {
13042 length: None,
13043 parenthesized_length: false,
13044 }),
13045 "NVARCHAR" => Some(DataType::VarChar {
13046 length: None,
13047 parenthesized_length: false,
13048 }),
13049 "CHAR" => Some(DataType::Char { length: None }),
13050 "NCHAR" => Some(DataType::Char { length: None }),
13051 _ => Some(DataType::Custom { name }),
13052 }
13053 }
13054 Expression::Column(col) => {
13055 let name = col.name.name.to_ascii_uppercase();
13056 match name.as_str() {
13057 "INT" | "INTEGER" => Some(DataType::Int {
13058 length: None,
13059 integer_spelling: false,
13060 }),
13061 "BIGINT" => Some(DataType::BigInt { length: None }),
13062 "FLOAT" => Some(DataType::Float {
13063 precision: None,
13064 scale: None,
13065 real_spelling: false,
13066 }),
13067 "DATETIME" | "DATETIME2" => {
13068 Some(DataType::Timestamp {
13069 timezone: false,
13070 precision: None,
13071 })
13072 }
13073 "DATE" => Some(DataType::Date),
13074 "NUMERIC" => Some(DataType::Decimal {
13075 precision: None,
13076 scale: None,
13077 }),
13078 "VARCHAR" => Some(DataType::VarChar {
13079 length: None,
13080 parenthesized_length: false,
13081 }),
13082 "NVARCHAR" => Some(DataType::VarChar {
13083 length: None,
13084 parenthesized_length: false,
13085 }),
13086 "CHAR" => Some(DataType::Char { length: None }),
13087 "NCHAR" => Some(DataType::Char { length: None }),
13088 _ => Some(DataType::Custom { name }),
13089 }
13090 }
13091 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
13092 Expression::Function(f) => {
13093 let fname = f.name.to_ascii_uppercase();
13094 match fname.as_str() {
13095 "VARCHAR" | "NVARCHAR" => {
13096 let len = f.args.first().and_then(|a| {
13097 if let Expression::Literal(lit) = a
13098 {
13099 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13100 n.parse::<u32>().ok()
13101 } else { None }
13102 } else if let Expression::Identifier(id) = a
13103 {
13104 if id.name.eq_ignore_ascii_case("MAX") {
13105 None
13106 } else {
13107 None
13108 }
13109 } else {
13110 None
13111 }
13112 });
13113 // Check for VARCHAR(MAX) -> TEXT
13114 let is_max = f.args.first().map_or(false, |a| {
13115 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
13116 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
13117 });
13118 if is_max {
13119 Some(DataType::Text)
13120 } else {
13121 Some(DataType::VarChar {
13122 length: len,
13123 parenthesized_length: false,
13124 })
13125 }
13126 }
13127 "NCHAR" | "CHAR" => {
13128 let len = f.args.first().and_then(|a| {
13129 if let Expression::Literal(lit) = a
13130 {
13131 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13132 n.parse::<u32>().ok()
13133 } else { None }
13134 } else {
13135 None
13136 }
13137 });
13138 Some(DataType::Char { length: len })
13139 }
13140 "NUMERIC" | "DECIMAL" => {
13141 let precision = f.args.first().and_then(|a| {
13142 if let Expression::Literal(lit) = a
13143 {
13144 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13145 n.parse::<u32>().ok()
13146 } else { None }
13147 } else {
13148 None
13149 }
13150 });
13151 let scale = f.args.get(1).and_then(|a| {
13152 if let Expression::Literal(lit) = a
13153 {
13154 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13155 n.parse::<u32>().ok()
13156 } else { None }
13157 } else {
13158 None
13159 }
13160 });
13161 Some(DataType::Decimal { precision, scale })
13162 }
13163 _ => None,
13164 }
13165 }
13166 _ => None,
13167 }
13168 }
13169
13170 if let Some(mut dt) = expr_to_datatype(&type_expr) {
13171 // For TSQL source: VARCHAR/CHAR without length defaults to 30
13172 let is_tsql_source =
13173 matches!(source, DialectType::TSQL | DialectType::Fabric);
13174 if is_tsql_source {
13175 match &dt {
13176 DataType::VarChar { length: None, .. } => {
13177 dt = DataType::VarChar {
13178 length: Some(30),
13179 parenthesized_length: false,
13180 };
13181 }
13182 DataType::Char { length: None } => {
13183 dt = DataType::Char { length: Some(30) };
13184 }
13185 _ => {}
13186 }
13187 }
13188
13189 // Determine if this is a string type
13190 let is_string_type = matches!(
13191 dt,
13192 DataType::VarChar { .. }
13193 | DataType::Char { .. }
13194 | DataType::Text
13195 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
13196 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
13197 || name.starts_with("VARCHAR(") || name == "VARCHAR"
13198 || name == "STRING");
13199
13200 // Determine if this is a date/time type
13201 let is_datetime_type = matches!(
13202 dt,
13203 DataType::Timestamp { .. } | DataType::Date
13204 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
13205 || name == "DATETIME2" || name == "SMALLDATETIME");
13206
13207 // Check for date conversion with style
13208 if style.is_some() {
13209 let style_num = style.and_then(|s| {
13210 if let Expression::Literal(lit) = s
13211 {
13212 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
13213 n.parse::<u32>().ok()
13214 } else { None }
13215 } else {
13216 None
13217 }
13218 });
13219
13220 // TSQL CONVERT date styles (Java format)
13221 let format_str = style_num.and_then(|n| match n {
13222 101 => Some("MM/dd/yyyy"),
13223 102 => Some("yyyy.MM.dd"),
13224 103 => Some("dd/MM/yyyy"),
13225 104 => Some("dd.MM.yyyy"),
13226 105 => Some("dd-MM-yyyy"),
13227 108 => Some("HH:mm:ss"),
13228 110 => Some("MM-dd-yyyy"),
13229 112 => Some("yyyyMMdd"),
13230 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
13231 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
13232 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
13233 _ => None,
13234 });
13235
13236 // Non-string, non-datetime types with style: just CAST, ignore the style
13237 if !is_string_type && !is_datetime_type {
13238 let cast_expr = if is_try {
13239 Expression::TryCast(Box::new(
13240 crate::expressions::Cast {
13241 this: value_expr,
13242 to: dt,
13243 trailing_comments: Vec::new(),
13244 double_colon_syntax: false,
13245 format: None,
13246 default: None,
13247 inferred_type: None,
13248 },
13249 ))
13250 } else {
13251 Expression::Cast(Box::new(
13252 crate::expressions::Cast {
13253 this: value_expr,
13254 to: dt,
13255 trailing_comments: Vec::new(),
13256 double_colon_syntax: false,
13257 format: None,
13258 default: None,
13259 inferred_type: None,
13260 },
13261 ))
13262 };
13263 return Ok(cast_expr);
13264 }
13265
13266 if let Some(java_fmt) = format_str {
13267 let c_fmt = java_fmt
13268 .replace("yyyy", "%Y")
13269 .replace("MM", "%m")
13270 .replace("dd", "%d")
13271 .replace("HH", "%H")
13272 .replace("mm", "%M")
13273 .replace("ss", "%S")
13274 .replace("SSSSSS", "%f")
13275 .replace("SSS", "%f")
13276 .replace("'T'", "T");
13277
13278 // For datetime target types: style is the INPUT format for parsing strings -> dates
13279 if is_datetime_type {
13280 match target {
13281 DialectType::DuckDB => {
13282 return Ok(Expression::Function(Box::new(
13283 Function::new(
13284 "STRPTIME".to_string(),
13285 vec![
13286 value_expr,
13287 Expression::string(&c_fmt),
13288 ],
13289 ),
13290 )));
13291 }
13292 DialectType::Spark
13293 | DialectType::Databricks => {
13294 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
13295 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
13296 let func_name =
13297 if matches!(dt, DataType::Date) {
13298 "TO_DATE"
13299 } else {
13300 "TO_TIMESTAMP"
13301 };
13302 return Ok(Expression::Function(Box::new(
13303 Function::new(
13304 func_name.to_string(),
13305 vec![
13306 value_expr,
13307 Expression::string(java_fmt),
13308 ],
13309 ),
13310 )));
13311 }
13312 DialectType::Hive => {
13313 return Ok(Expression::Function(Box::new(
13314 Function::new(
13315 "TO_TIMESTAMP".to_string(),
13316 vec![
13317 value_expr,
13318 Expression::string(java_fmt),
13319 ],
13320 ),
13321 )));
13322 }
13323 _ => {
13324 return Ok(Expression::Cast(Box::new(
13325 crate::expressions::Cast {
13326 this: value_expr,
13327 to: dt,
13328 trailing_comments: Vec::new(),
13329 double_colon_syntax: false,
13330 format: None,
13331 default: None,
13332 inferred_type: None,
13333 },
13334 )));
13335 }
13336 }
13337 }
13338
13339 // For string target types: style is the OUTPUT format for dates -> strings
13340 match target {
13341 DialectType::DuckDB => Ok(Expression::Function(
13342 Box::new(Function::new(
13343 "STRPTIME".to_string(),
13344 vec![
13345 value_expr,
13346 Expression::string(&c_fmt),
13347 ],
13348 )),
13349 )),
13350 DialectType::Spark | DialectType::Databricks => {
13351 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
13352 // Determine the target string type
13353 let string_dt = match &dt {
13354 DataType::VarChar {
13355 length: Some(l),
13356 ..
13357 } => DataType::VarChar {
13358 length: Some(*l),
13359 parenthesized_length: false,
13360 },
13361 DataType::Text => DataType::Custom {
13362 name: "STRING".to_string(),
13363 },
13364 _ => DataType::Custom {
13365 name: "STRING".to_string(),
13366 },
13367 };
13368 let date_format_expr = Expression::Function(
13369 Box::new(Function::new(
13370 "DATE_FORMAT".to_string(),
13371 vec![
13372 value_expr,
13373 Expression::string(java_fmt),
13374 ],
13375 )),
13376 );
13377 let cast_expr = if is_try {
13378 Expression::TryCast(Box::new(
13379 crate::expressions::Cast {
13380 this: date_format_expr,
13381 to: string_dt,
13382 trailing_comments: Vec::new(),
13383 double_colon_syntax: false,
13384 format: None,
13385 default: None,
13386 inferred_type: None,
13387 },
13388 ))
13389 } else {
13390 Expression::Cast(Box::new(
13391 crate::expressions::Cast {
13392 this: date_format_expr,
13393 to: string_dt,
13394 trailing_comments: Vec::new(),
13395 double_colon_syntax: false,
13396 format: None,
13397 default: None,
13398 inferred_type: None,
13399 },
13400 ))
13401 };
13402 Ok(cast_expr)
13403 }
13404 DialectType::MySQL | DialectType::SingleStore => {
13405 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
13406 let mysql_fmt = java_fmt
13407 .replace("yyyy", "%Y")
13408 .replace("MM", "%m")
13409 .replace("dd", "%d")
13410 .replace("HH:mm:ss.SSSSSS", "%T")
13411 .replace("HH:mm:ss", "%T")
13412 .replace("HH", "%H")
13413 .replace("mm", "%i")
13414 .replace("ss", "%S");
13415 let date_format_expr = Expression::Function(
13416 Box::new(Function::new(
13417 "DATE_FORMAT".to_string(),
13418 vec![
13419 value_expr,
13420 Expression::string(&mysql_fmt),
13421 ],
13422 )),
13423 );
13424 // MySQL uses CHAR for string casts
13425 let mysql_dt = match &dt {
13426 DataType::VarChar { length, .. } => {
13427 DataType::Char { length: *length }
13428 }
13429 _ => dt,
13430 };
13431 Ok(Expression::Cast(Box::new(
13432 crate::expressions::Cast {
13433 this: date_format_expr,
13434 to: mysql_dt,
13435 trailing_comments: Vec::new(),
13436 double_colon_syntax: false,
13437 format: None,
13438 default: None,
13439 inferred_type: None,
13440 },
13441 )))
13442 }
13443 DialectType::Hive => {
13444 let func_name = "TO_TIMESTAMP";
13445 Ok(Expression::Function(Box::new(
13446 Function::new(
13447 func_name.to_string(),
13448 vec![
13449 value_expr,
13450 Expression::string(java_fmt),
13451 ],
13452 ),
13453 )))
13454 }
13455 _ => Ok(Expression::Cast(Box::new(
13456 crate::expressions::Cast {
13457 this: value_expr,
13458 to: dt,
13459 trailing_comments: Vec::new(),
13460 double_colon_syntax: false,
13461 format: None,
13462 default: None,
13463 inferred_type: None,
13464 },
13465 ))),
13466 }
13467 } else {
13468 // Unknown style, just CAST
13469 let cast_expr = if is_try {
13470 Expression::TryCast(Box::new(
13471 crate::expressions::Cast {
13472 this: value_expr,
13473 to: dt,
13474 trailing_comments: Vec::new(),
13475 double_colon_syntax: false,
13476 format: None,
13477 default: None,
13478 inferred_type: None,
13479 },
13480 ))
13481 } else {
13482 Expression::Cast(Box::new(
13483 crate::expressions::Cast {
13484 this: value_expr,
13485 to: dt,
13486 trailing_comments: Vec::new(),
13487 double_colon_syntax: false,
13488 format: None,
13489 default: None,
13490 inferred_type: None,
13491 },
13492 ))
13493 };
13494 Ok(cast_expr)
13495 }
13496 } else {
13497 // No style - simple CAST
13498 let final_dt = if matches!(
13499 target,
13500 DialectType::MySQL | DialectType::SingleStore
13501 ) {
13502 match &dt {
13503 DataType::Int { .. }
13504 | DataType::BigInt { .. }
13505 | DataType::SmallInt { .. }
13506 | DataType::TinyInt { .. } => DataType::Custom {
13507 name: "SIGNED".to_string(),
13508 },
13509 DataType::VarChar { length, .. } => {
13510 DataType::Char { length: *length }
13511 }
13512 _ => dt,
13513 }
13514 } else {
13515 dt
13516 };
13517 let cast_expr = if is_try {
13518 Expression::TryCast(Box::new(
13519 crate::expressions::Cast {
13520 this: value_expr,
13521 to: final_dt,
13522 trailing_comments: Vec::new(),
13523 double_colon_syntax: false,
13524 format: None,
13525 default: None,
13526 inferred_type: None,
13527 },
13528 ))
13529 } else {
13530 Expression::Cast(Box::new(crate::expressions::Cast {
13531 this: value_expr,
13532 to: final_dt,
13533 trailing_comments: Vec::new(),
13534 double_colon_syntax: false,
13535 format: None,
13536 default: None,
13537 inferred_type: None,
13538 }))
13539 };
13540 Ok(cast_expr)
13541 }
13542 } else {
13543 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
13544 Ok(Expression::Function(f))
13545 }
13546 }
13547 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
13548 "STRFTIME" if f.args.len() == 2 => {
13549 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
13550 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
13551 // SQLite: args[0] = format, args[1] = value
13552 (f.args[1].clone(), &f.args[0])
13553 } else {
13554 // DuckDB and others: args[0] = value, args[1] = format
13555 (f.args[0].clone(), &f.args[1])
13556 };
13557
13558 // Helper to convert C-style format to Java-style
13559 fn c_to_java_format(fmt: &str) -> String {
13560 fmt.replace("%Y", "yyyy")
13561 .replace("%m", "MM")
13562 .replace("%d", "dd")
13563 .replace("%H", "HH")
13564 .replace("%M", "mm")
13565 .replace("%S", "ss")
13566 .replace("%f", "SSSSSS")
13567 .replace("%y", "yy")
13568 .replace("%-m", "M")
13569 .replace("%-d", "d")
13570 .replace("%-H", "H")
13571 .replace("%-I", "h")
13572 .replace("%I", "hh")
13573 .replace("%p", "a")
13574 .replace("%j", "DDD")
13575 .replace("%a", "EEE")
13576 .replace("%b", "MMM")
13577 .replace("%F", "yyyy-MM-dd")
13578 .replace("%T", "HH:mm:ss")
13579 }
13580
13581 // Helper: recursively convert format strings within expressions (handles CONCAT)
13582 fn convert_fmt_expr(
13583 expr: &Expression,
13584 converter: &dyn Fn(&str) -> String,
13585 ) -> Expression {
13586 match expr {
13587 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
13588 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
13589 Expression::string(&converter(s))
13590 }
13591 Expression::Function(func)
13592 if func.name.eq_ignore_ascii_case("CONCAT") =>
13593 {
13594 let new_args: Vec<Expression> = func
13595 .args
13596 .iter()
13597 .map(|a| convert_fmt_expr(a, converter))
13598 .collect();
13599 Expression::Function(Box::new(Function::new(
13600 "CONCAT".to_string(),
13601 new_args,
13602 )))
13603 }
13604 other => other.clone(),
13605 }
13606 }
13607
13608 match target {
13609 DialectType::DuckDB => {
13610 if matches!(source, DialectType::SQLite) {
13611 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
13612 let cast_val = Expression::Cast(Box::new(Cast {
13613 this: val,
13614 to: crate::expressions::DataType::Timestamp {
13615 precision: None,
13616 timezone: false,
13617 },
13618 trailing_comments: Vec::new(),
13619 double_colon_syntax: false,
13620 format: None,
13621 default: None,
13622 inferred_type: None,
13623 }));
13624 Ok(Expression::Function(Box::new(Function::new(
13625 "STRFTIME".to_string(),
13626 vec![cast_val, fmt_expr.clone()],
13627 ))))
13628 } else {
13629 Ok(Expression::Function(f))
13630 }
13631 }
13632 DialectType::Spark
13633 | DialectType::Databricks
13634 | DialectType::Hive => {
13635 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
13636 let converted_fmt =
13637 convert_fmt_expr(fmt_expr, &c_to_java_format);
13638 Ok(Expression::Function(Box::new(Function::new(
13639 "DATE_FORMAT".to_string(),
13640 vec![val, converted_fmt],
13641 ))))
13642 }
13643 DialectType::TSQL | DialectType::Fabric => {
13644 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
13645 let converted_fmt =
13646 convert_fmt_expr(fmt_expr, &c_to_java_format);
13647 Ok(Expression::Function(Box::new(Function::new(
13648 "FORMAT".to_string(),
13649 vec![val, converted_fmt],
13650 ))))
13651 }
13652 DialectType::Presto
13653 | DialectType::Trino
13654 | DialectType::Athena => {
13655 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
13656 if let Expression::Literal(lit) = fmt_expr {
13657 if let crate::expressions::Literal::String(s) = lit.as_ref() {
13658 let presto_fmt = duckdb_to_presto_format(s);
13659 Ok(Expression::Function(Box::new(Function::new(
13660 "DATE_FORMAT".to_string(),
13661 vec![val, Expression::string(&presto_fmt)],
13662 ))))
13663 } else {
13664 Ok(Expression::Function(Box::new(Function::new(
13665 "DATE_FORMAT".to_string(),
13666 vec![val, fmt_expr.clone()],
13667 ))))
13668 }
13669 } else {
13670 Ok(Expression::Function(Box::new(Function::new(
13671 "DATE_FORMAT".to_string(),
13672 vec![val, fmt_expr.clone()],
13673 ))))
13674 }
13675 }
13676 DialectType::BigQuery => {
13677 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
13678 if let Expression::Literal(lit) = fmt_expr
13679 {
13680 if let crate::expressions::Literal::String(s) = lit.as_ref() {
13681 let bq_fmt = duckdb_to_bigquery_format(s);
13682 Ok(Expression::Function(Box::new(Function::new(
13683 "FORMAT_DATE".to_string(),
13684 vec![Expression::string(&bq_fmt), val],
13685 ))))
13686 } else {
13687 Ok(Expression::Function(Box::new(Function::new(
13688 "FORMAT_DATE".to_string(),
13689 vec![fmt_expr.clone(), val],
13690 ))))
13691 }
13692 } else {
13693 Ok(Expression::Function(Box::new(Function::new(
13694 "FORMAT_DATE".to_string(),
13695 vec![fmt_expr.clone(), val],
13696 ))))
13697 }
13698 }
13699 DialectType::PostgreSQL | DialectType::Redshift => {
13700 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
13701 if let Expression::Literal(lit) = fmt_expr
13702 {
13703 if let crate::expressions::Literal::String(s) = lit.as_ref() {
13704 let pg_fmt = s
13705 .replace("%Y", "YYYY")
13706 .replace("%m", "MM")
13707 .replace("%d", "DD")
13708 .replace("%H", "HH24")
13709 .replace("%M", "MI")
13710 .replace("%S", "SS")
13711 .replace("%y", "YY")
13712 .replace("%-m", "FMMM")
13713 .replace("%-d", "FMDD")
13714 .replace("%-H", "FMHH24")
13715 .replace("%-I", "FMHH12")
13716 .replace("%p", "AM")
13717 .replace("%F", "YYYY-MM-DD")
13718 .replace("%T", "HH24:MI:SS");
13719 Ok(Expression::Function(Box::new(Function::new(
13720 "TO_CHAR".to_string(),
13721 vec![val, Expression::string(&pg_fmt)],
13722 ))))
13723 } else {
13724 Ok(Expression::Function(Box::new(Function::new(
13725 "TO_CHAR".to_string(),
13726 vec![val, fmt_expr.clone()],
13727 ))))
13728 }
13729 } else {
13730 Ok(Expression::Function(Box::new(Function::new(
13731 "TO_CHAR".to_string(),
13732 vec![val, fmt_expr.clone()],
13733 ))))
13734 }
13735 }
13736 _ => Ok(Expression::Function(f)),
13737 }
13738 }
13739 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
13740 "STRPTIME" if f.args.len() == 2 => {
13741 let val = f.args[0].clone();
13742 let fmt_expr = &f.args[1];
13743
13744 fn c_to_java_format_parse(fmt: &str) -> String {
13745 fmt.replace("%Y", "yyyy")
13746 .replace("%m", "MM")
13747 .replace("%d", "dd")
13748 .replace("%H", "HH")
13749 .replace("%M", "mm")
13750 .replace("%S", "ss")
13751 .replace("%f", "SSSSSS")
13752 .replace("%y", "yy")
13753 .replace("%-m", "M")
13754 .replace("%-d", "d")
13755 .replace("%-H", "H")
13756 .replace("%-I", "h")
13757 .replace("%I", "hh")
13758 .replace("%p", "a")
13759 .replace("%F", "yyyy-MM-dd")
13760 .replace("%T", "HH:mm:ss")
13761 }
13762
13763 match target {
13764 DialectType::DuckDB => Ok(Expression::Function(f)),
13765 DialectType::Spark | DialectType::Databricks => {
13766 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
13767 if let Expression::Literal(lit) = fmt_expr
13768 {
13769 if let crate::expressions::Literal::String(s) = lit.as_ref() {
13770 let java_fmt = c_to_java_format_parse(s);
13771 Ok(Expression::Function(Box::new(Function::new(
13772 "TO_TIMESTAMP".to_string(),
13773 vec![val, Expression::string(&java_fmt)],
13774 ))))
13775 } else {
13776 Ok(Expression::Function(Box::new(Function::new(
13777 "TO_TIMESTAMP".to_string(),
13778 vec![val, fmt_expr.clone()],
13779 ))))
13780 }
13781 } else {
13782 Ok(Expression::Function(Box::new(Function::new(
13783 "TO_TIMESTAMP".to_string(),
13784 vec![val, fmt_expr.clone()],
13785 ))))
13786 }
13787 }
13788 DialectType::Hive => {
13789 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
13790 if let Expression::Literal(lit) = fmt_expr
13791 {
13792 if let crate::expressions::Literal::String(s) = lit.as_ref() {
13793 let java_fmt = c_to_java_format_parse(s);
13794 let unix_ts =
13795 Expression::Function(Box::new(Function::new(
13796 "UNIX_TIMESTAMP".to_string(),
13797 vec![val, Expression::string(&java_fmt)],
13798 )));
13799 let from_unix =
13800 Expression::Function(Box::new(Function::new(
13801 "FROM_UNIXTIME".to_string(),
13802 vec![unix_ts],
13803 )));
13804 Ok(Expression::Cast(Box::new(
13805 crate::expressions::Cast {
13806 this: from_unix,
13807 to: DataType::Timestamp {
13808 timezone: false,
13809 precision: None,
13810 },
13811 trailing_comments: Vec::new(),
13812 double_colon_syntax: false,
13813 format: None,
13814 default: None,
13815 inferred_type: None,
13816 },
13817 )))
13818 } else {
13819 Ok(Expression::Function(f))
13820 }
13821 } else {
13822 Ok(Expression::Function(f))
13823 }
13824 }
13825 DialectType::Presto
13826 | DialectType::Trino
13827 | DialectType::Athena => {
13828 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
13829 if let Expression::Literal(lit) = fmt_expr
13830 {
13831 if let crate::expressions::Literal::String(s) = lit.as_ref() {
13832 let presto_fmt = duckdb_to_presto_format(s);
13833 Ok(Expression::Function(Box::new(Function::new(
13834 "DATE_PARSE".to_string(),
13835 vec![val, Expression::string(&presto_fmt)],
13836 ))))
13837 } else {
13838 Ok(Expression::Function(Box::new(Function::new(
13839 "DATE_PARSE".to_string(),
13840 vec![val, fmt_expr.clone()],
13841 ))))
13842 }
13843 } else {
13844 Ok(Expression::Function(Box::new(Function::new(
13845 "DATE_PARSE".to_string(),
13846 vec![val, fmt_expr.clone()],
13847 ))))
13848 }
13849 }
13850 DialectType::BigQuery => {
13851 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
13852 if let Expression::Literal(lit) = fmt_expr
13853 {
13854 if let crate::expressions::Literal::String(s) = lit.as_ref() {
13855 let bq_fmt = duckdb_to_bigquery_format(s);
13856 Ok(Expression::Function(Box::new(Function::new(
13857 "PARSE_TIMESTAMP".to_string(),
13858 vec![Expression::string(&bq_fmt), val],
13859 ))))
13860 } else {
13861 Ok(Expression::Function(Box::new(Function::new(
13862 "PARSE_TIMESTAMP".to_string(),
13863 vec![fmt_expr.clone(), val],
13864 ))))
13865 }
13866 } else {
13867 Ok(Expression::Function(Box::new(Function::new(
13868 "PARSE_TIMESTAMP".to_string(),
13869 vec![fmt_expr.clone(), val],
13870 ))))
13871 }
13872 }
13873 _ => Ok(Expression::Function(f)),
13874 }
13875 }
13876 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
13877 "DATE_FORMAT"
13878 if f.args.len() >= 2
13879 && matches!(
13880 source,
13881 DialectType::Presto
13882 | DialectType::Trino
13883 | DialectType::Athena
13884 ) =>
13885 {
13886 let val = f.args[0].clone();
13887 let fmt_expr = &f.args[1];
13888
13889 match target {
13890 DialectType::Presto
13891 | DialectType::Trino
13892 | DialectType::Athena => {
13893 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
13894 if let Expression::Literal(lit) = fmt_expr
13895 {
13896 if let crate::expressions::Literal::String(s) = lit.as_ref() {
13897 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
13898 Ok(Expression::Function(Box::new(Function::new(
13899 "DATE_FORMAT".to_string(),
13900 vec![val, Expression::string(&normalized)],
13901 ))))
13902 } else {
13903 Ok(Expression::Function(f))
13904 }
13905 } else {
13906 Ok(Expression::Function(f))
13907 }
13908 }
13909 DialectType::Hive
13910 | DialectType::Spark
13911 | DialectType::Databricks => {
13912 // Convert Presto C-style to Java-style format
13913 if let Expression::Literal(lit) = fmt_expr
13914 {
13915 if let crate::expressions::Literal::String(s) = lit.as_ref() {
13916 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
13917 Ok(Expression::Function(Box::new(Function::new(
13918 "DATE_FORMAT".to_string(),
13919 vec![val, Expression::string(&java_fmt)],
13920 ))))
13921 } else {
13922 Ok(Expression::Function(f))
13923 }
13924 } else {
13925 Ok(Expression::Function(f))
13926 }
13927 }
13928 DialectType::DuckDB => {
13929 // Convert to STRFTIME(val, duckdb_fmt)
13930 if let Expression::Literal(lit) = fmt_expr
13931 {
13932 if let crate::expressions::Literal::String(s) = lit.as_ref() {
13933 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
13934 Ok(Expression::Function(Box::new(Function::new(
13935 "STRFTIME".to_string(),
13936 vec![val, Expression::string(&duckdb_fmt)],
13937 ))))
13938 } else {
13939 Ok(Expression::Function(Box::new(Function::new(
13940 "STRFTIME".to_string(),
13941 vec![val, fmt_expr.clone()],
13942 ))))
13943 }
13944 } else {
13945 Ok(Expression::Function(Box::new(Function::new(
13946 "STRFTIME".to_string(),
13947 vec![val, fmt_expr.clone()],
13948 ))))
13949 }
13950 }
13951 DialectType::BigQuery => {
13952 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
13953 if let Expression::Literal(lit) = fmt_expr
13954 {
13955 if let crate::expressions::Literal::String(s) = lit.as_ref() {
13956 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
13957 Ok(Expression::Function(Box::new(Function::new(
13958 "FORMAT_DATE".to_string(),
13959 vec![Expression::string(&bq_fmt), val],
13960 ))))
13961 } else {
13962 Ok(Expression::Function(Box::new(Function::new(
13963 "FORMAT_DATE".to_string(),
13964 vec![fmt_expr.clone(), val],
13965 ))))
13966 }
13967 } else {
13968 Ok(Expression::Function(Box::new(Function::new(
13969 "FORMAT_DATE".to_string(),
13970 vec![fmt_expr.clone(), val],
13971 ))))
13972 }
13973 }
13974 _ => Ok(Expression::Function(f)),
13975 }
13976 }
13977 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
13978 "DATE_PARSE"
13979 if f.args.len() >= 2
13980 && matches!(
13981 source,
13982 DialectType::Presto
13983 | DialectType::Trino
13984 | DialectType::Athena
13985 ) =>
13986 {
13987 let val = f.args[0].clone();
13988 let fmt_expr = &f.args[1];
13989
13990 match target {
13991 DialectType::Presto
13992 | DialectType::Trino
13993 | DialectType::Athena => {
13994 // Presto -> Presto: normalize format
13995 if let Expression::Literal(lit) = fmt_expr
13996 {
13997 if let crate::expressions::Literal::String(s) = lit.as_ref() {
13998 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
13999 Ok(Expression::Function(Box::new(Function::new(
14000 "DATE_PARSE".to_string(),
14001 vec![val, Expression::string(&normalized)],
14002 ))))
14003 } else {
14004 Ok(Expression::Function(f))
14005 }
14006 } else {
14007 Ok(Expression::Function(f))
14008 }
14009 }
14010 DialectType::Hive => {
14011 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
14012 if let Expression::Literal(lit) = fmt_expr
14013 {
14014 if let crate::expressions::Literal::String(s) = lit.as_ref() {
14015 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
14016 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
14017 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
14018 this: val,
14019 to: DataType::Timestamp { timezone: false, precision: None },
14020 trailing_comments: Vec::new(),
14021 double_colon_syntax: false,
14022 format: None,
14023 default: None,
14024 inferred_type: None,
14025 })))
14026 } else {
14027 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
14028 Ok(Expression::Function(Box::new(Function::new(
14029 "TO_TIMESTAMP".to_string(),
14030 vec![val, Expression::string(&java_fmt)],
14031 ))))
14032 }
14033 } else {
14034 Ok(Expression::Function(f))
14035 }
14036 } else {
14037 Ok(Expression::Function(f))
14038 }
14039 }
14040 DialectType::Spark | DialectType::Databricks => {
14041 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
14042 if let Expression::Literal(lit) = fmt_expr
14043 {
14044 if let crate::expressions::Literal::String(s) = lit.as_ref() {
14045 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
14046 Ok(Expression::Function(Box::new(Function::new(
14047 "TO_TIMESTAMP".to_string(),
14048 vec![val, Expression::string(&java_fmt)],
14049 ))))
14050 } else {
14051 Ok(Expression::Function(f))
14052 }
14053 } else {
14054 Ok(Expression::Function(f))
14055 }
14056 }
14057 DialectType::DuckDB => {
14058 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
14059 if let Expression::Literal(lit) = fmt_expr
14060 {
14061 if let crate::expressions::Literal::String(s) = lit.as_ref() {
14062 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
14063 Ok(Expression::Function(Box::new(Function::new(
14064 "STRPTIME".to_string(),
14065 vec![val, Expression::string(&duckdb_fmt)],
14066 ))))
14067 } else {
14068 Ok(Expression::Function(Box::new(Function::new(
14069 "STRPTIME".to_string(),
14070 vec![val, fmt_expr.clone()],
14071 ))))
14072 }
14073 } else {
14074 Ok(Expression::Function(Box::new(Function::new(
14075 "STRPTIME".to_string(),
14076 vec![val, fmt_expr.clone()],
14077 ))))
14078 }
14079 }
14080 _ => Ok(Expression::Function(f)),
14081 }
14082 }
14083 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
14084 "FROM_BASE64"
14085 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
14086 {
14087 Ok(Expression::Function(Box::new(Function::new(
14088 "UNBASE64".to_string(),
14089 f.args,
14090 ))))
14091 }
14092 "TO_BASE64"
14093 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
14094 {
14095 Ok(Expression::Function(Box::new(Function::new(
14096 "BASE64".to_string(),
14097 f.args,
14098 ))))
14099 }
14100 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
14101 "FROM_UNIXTIME"
14102 if f.args.len() == 1
14103 && matches!(
14104 source,
14105 DialectType::Presto
14106 | DialectType::Trino
14107 | DialectType::Athena
14108 )
14109 && matches!(
14110 target,
14111 DialectType::Spark | DialectType::Databricks
14112 ) =>
14113 {
14114 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
14115 let from_unix = Expression::Function(Box::new(Function::new(
14116 "FROM_UNIXTIME".to_string(),
14117 f.args,
14118 )));
14119 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
14120 this: from_unix,
14121 to: DataType::Timestamp {
14122 timezone: false,
14123 precision: None,
14124 },
14125 trailing_comments: Vec::new(),
14126 double_colon_syntax: false,
14127 format: None,
14128 default: None,
14129 inferred_type: None,
14130 })))
14131 }
14132 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
14133 "DATE_FORMAT"
14134 if f.args.len() >= 2
14135 && !matches!(
14136 target,
14137 DialectType::Hive
14138 | DialectType::Spark
14139 | DialectType::Databricks
14140 | DialectType::MySQL
14141 | DialectType::SingleStore
14142 ) =>
14143 {
14144 let val = f.args[0].clone();
14145 let fmt_expr = &f.args[1];
14146 let is_hive_source = matches!(
14147 source,
14148 DialectType::Hive
14149 | DialectType::Spark
14150 | DialectType::Databricks
14151 );
14152
14153 fn java_to_c_format(fmt: &str) -> String {
14154 // Replace Java patterns with C strftime patterns.
14155 // Uses multi-pass to handle patterns that conflict.
14156 // First pass: replace multi-char patterns (longer first)
14157 let result = fmt
14158 .replace("yyyy", "%Y")
14159 .replace("SSSSSS", "%f")
14160 .replace("EEEE", "%W")
14161 .replace("MM", "%m")
14162 .replace("dd", "%d")
14163 .replace("HH", "%H")
14164 .replace("mm", "%M")
14165 .replace("ss", "%S")
14166 .replace("yy", "%y");
14167 // Second pass: handle single-char timezone patterns
14168 // z -> %Z (timezone name), Z -> %z (timezone offset)
14169 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
14170 let mut out = String::new();
14171 let chars: Vec<char> = result.chars().collect();
14172 let mut i = 0;
14173 while i < chars.len() {
14174 if chars[i] == '%' && i + 1 < chars.len() {
14175 // Already a format specifier, skip both chars
14176 out.push(chars[i]);
14177 out.push(chars[i + 1]);
14178 i += 2;
14179 } else if chars[i] == 'z' {
14180 out.push_str("%Z");
14181 i += 1;
14182 } else if chars[i] == 'Z' {
14183 out.push_str("%z");
14184 i += 1;
14185 } else {
14186 out.push(chars[i]);
14187 i += 1;
14188 }
14189 }
14190 out
14191 }
14192
14193 fn java_to_presto_format(fmt: &str) -> String {
14194 // Presto uses %T for HH:MM:SS
14195 let c_fmt = java_to_c_format(fmt);
14196 c_fmt.replace("%H:%M:%S", "%T")
14197 }
14198
14199 fn java_to_bq_format(fmt: &str) -> String {
14200 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
14201 let c_fmt = java_to_c_format(fmt);
14202 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
14203 }
14204
14205 // For Hive source, CAST string literals to appropriate type
14206 let cast_val = if is_hive_source {
14207 match &val {
14208 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
14209 match target {
14210 DialectType::DuckDB
14211 | DialectType::Presto
14212 | DialectType::Trino
14213 | DialectType::Athena => {
14214 Self::ensure_cast_timestamp(val.clone())
14215 }
14216 DialectType::BigQuery => {
14217 // BigQuery: CAST(val AS DATETIME)
14218 Expression::Cast(Box::new(
14219 crate::expressions::Cast {
14220 this: val.clone(),
14221 to: DataType::Custom {
14222 name: "DATETIME".to_string(),
14223 },
14224 trailing_comments: vec![],
14225 double_colon_syntax: false,
14226 format: None,
14227 default: None,
14228 inferred_type: None,
14229 },
14230 ))
14231 }
14232 _ => val.clone(),
14233 }
14234 }
14235 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
14236 Expression::Cast(c)
14237 if matches!(c.to, DataType::Date)
14238 && matches!(
14239 target,
14240 DialectType::Presto
14241 | DialectType::Trino
14242 | DialectType::Athena
14243 ) =>
14244 {
14245 Expression::Cast(Box::new(crate::expressions::Cast {
14246 this: val.clone(),
14247 to: DataType::Timestamp {
14248 timezone: false,
14249 precision: None,
14250 },
14251 trailing_comments: vec![],
14252 double_colon_syntax: false,
14253 format: None,
14254 default: None,
14255 inferred_type: None,
14256 }))
14257 }
14258 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::Date(_)) && matches!(
14259 target,
14260 DialectType::Presto
14261 | DialectType::Trino
14262 | DialectType::Athena
14263 ) =>
14264 {
14265 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
14266 let cast_date = Self::date_literal_to_cast(val.clone());
14267 Expression::Cast(Box::new(crate::expressions::Cast {
14268 this: cast_date,
14269 to: DataType::Timestamp {
14270 timezone: false,
14271 precision: None,
14272 },
14273 trailing_comments: vec![],
14274 double_colon_syntax: false,
14275 format: None,
14276 default: None,
14277 inferred_type: None,
14278 }))
14279 }
14280 _ => val.clone(),
14281 }
14282 } else {
14283 val.clone()
14284 };
14285
14286 match target {
14287 DialectType::DuckDB => {
14288 if let Expression::Literal(lit) = fmt_expr
14289 {
14290 if let crate::expressions::Literal::String(s) = lit.as_ref() {
14291 let c_fmt = if is_hive_source {
14292 java_to_c_format(s)
14293 } else {
14294 s.clone()
14295 };
14296 Ok(Expression::Function(Box::new(Function::new(
14297 "STRFTIME".to_string(),
14298 vec![cast_val, Expression::string(&c_fmt)],
14299 ))))
14300 } else {
14301 Ok(Expression::Function(Box::new(Function::new(
14302 "STRFTIME".to_string(),
14303 vec![cast_val, fmt_expr.clone()],
14304 ))))
14305 }
14306 } else {
14307 Ok(Expression::Function(Box::new(Function::new(
14308 "STRFTIME".to_string(),
14309 vec![cast_val, fmt_expr.clone()],
14310 ))))
14311 }
14312 }
14313 DialectType::Presto
14314 | DialectType::Trino
14315 | DialectType::Athena => {
14316 if is_hive_source {
14317 if let Expression::Literal(lit) = fmt_expr
14318 {
14319 if let crate::expressions::Literal::String(s) = lit.as_ref() {
14320 let p_fmt = java_to_presto_format(s);
14321 Ok(Expression::Function(Box::new(Function::new(
14322 "DATE_FORMAT".to_string(),
14323 vec![cast_val, Expression::string(&p_fmt)],
14324 ))))
14325 } else {
14326 Ok(Expression::Function(Box::new(Function::new(
14327 "DATE_FORMAT".to_string(),
14328 vec![cast_val, fmt_expr.clone()],
14329 ))))
14330 }
14331 } else {
14332 Ok(Expression::Function(Box::new(Function::new(
14333 "DATE_FORMAT".to_string(),
14334 vec![cast_val, fmt_expr.clone()],
14335 ))))
14336 }
14337 } else {
14338 Ok(Expression::Function(Box::new(Function::new(
14339 "DATE_FORMAT".to_string(),
14340 f.args,
14341 ))))
14342 }
14343 }
14344 DialectType::BigQuery => {
14345 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
14346 if let Expression::Literal(lit) = fmt_expr
14347 {
14348 if let crate::expressions::Literal::String(s) = lit.as_ref() {
14349 let bq_fmt = if is_hive_source {
14350 java_to_bq_format(s)
14351 } else {
14352 java_to_c_format(s)
14353 };
14354 Ok(Expression::Function(Box::new(Function::new(
14355 "FORMAT_DATE".to_string(),
14356 vec![Expression::string(&bq_fmt), cast_val],
14357 ))))
14358 } else {
14359 Ok(Expression::Function(Box::new(Function::new(
14360 "FORMAT_DATE".to_string(),
14361 vec![fmt_expr.clone(), cast_val],
14362 ))))
14363 }
14364 } else {
14365 Ok(Expression::Function(Box::new(Function::new(
14366 "FORMAT_DATE".to_string(),
14367 vec![fmt_expr.clone(), cast_val],
14368 ))))
14369 }
14370 }
14371 DialectType::PostgreSQL | DialectType::Redshift => {
14372 if let Expression::Literal(lit) = fmt_expr
14373 {
14374 if let crate::expressions::Literal::String(s) = lit.as_ref() {
14375 let pg_fmt = s
14376 .replace("yyyy", "YYYY")
14377 .replace("MM", "MM")
14378 .replace("dd", "DD")
14379 .replace("HH", "HH24")
14380 .replace("mm", "MI")
14381 .replace("ss", "SS")
14382 .replace("yy", "YY");
14383 Ok(Expression::Function(Box::new(Function::new(
14384 "TO_CHAR".to_string(),
14385 vec![val, Expression::string(&pg_fmt)],
14386 ))))
14387 } else {
14388 Ok(Expression::Function(Box::new(Function::new(
14389 "TO_CHAR".to_string(),
14390 vec![val, fmt_expr.clone()],
14391 ))))
14392 }
14393 } else {
14394 Ok(Expression::Function(Box::new(Function::new(
14395 "TO_CHAR".to_string(),
14396 vec![val, fmt_expr.clone()],
14397 ))))
14398 }
14399 }
14400 _ => Ok(Expression::Function(f)),
14401 }
14402 }
14403 // DATEDIFF(unit, start, end) - 3-arg form
14404 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
14405 "DATEDIFF" if f.args.len() == 3 => {
14406 let mut args = f.args;
14407 // SQLite source: args = (date1, date2, unit_string)
14408 // Standard source: args = (unit, start, end)
14409 let (_arg0, arg1, arg2, unit_str) =
14410 if matches!(source, DialectType::SQLite) {
14411 let date1 = args.remove(0);
14412 let date2 = args.remove(0);
14413 let unit_expr = args.remove(0);
14414 let unit_s = Self::get_unit_str_static(&unit_expr);
14415
14416 // For SQLite target, generate JULIANDAY arithmetic directly
14417 if matches!(target, DialectType::SQLite) {
14418 let jd_first = Expression::Function(Box::new(
14419 Function::new("JULIANDAY".to_string(), vec![date1]),
14420 ));
14421 let jd_second = Expression::Function(Box::new(
14422 Function::new("JULIANDAY".to_string(), vec![date2]),
14423 ));
14424 let diff = Expression::Sub(Box::new(
14425 crate::expressions::BinaryOp::new(
14426 jd_first, jd_second,
14427 ),
14428 ));
14429 let paren_diff = Expression::Paren(Box::new(
14430 crate::expressions::Paren {
14431 this: diff,
14432 trailing_comments: Vec::new(),
14433 },
14434 ));
14435 let adjusted = match unit_s.as_str() {
14436 "HOUR" => Expression::Mul(Box::new(
14437 crate::expressions::BinaryOp::new(
14438 paren_diff,
14439 Expression::Literal(Box::new(Literal::Number(
14440 "24.0".to_string(),
14441 ))),
14442 ),
14443 )),
14444 "MINUTE" => Expression::Mul(Box::new(
14445 crate::expressions::BinaryOp::new(
14446 paren_diff,
14447 Expression::Literal(Box::new(Literal::Number(
14448 "1440.0".to_string(),
14449 ))),
14450 ),
14451 )),
14452 "SECOND" => Expression::Mul(Box::new(
14453 crate::expressions::BinaryOp::new(
14454 paren_diff,
14455 Expression::Literal(Box::new(Literal::Number(
14456 "86400.0".to_string(),
14457 ))),
14458 ),
14459 )),
14460 "MONTH" => Expression::Div(Box::new(
14461 crate::expressions::BinaryOp::new(
14462 paren_diff,
14463 Expression::Literal(Box::new(Literal::Number(
14464 "30.0".to_string(),
14465 ))),
14466 ),
14467 )),
14468 "YEAR" => Expression::Div(Box::new(
14469 crate::expressions::BinaryOp::new(
14470 paren_diff,
14471 Expression::Literal(Box::new(Literal::Number(
14472 "365.0".to_string(),
14473 ))),
14474 ),
14475 )),
14476 _ => paren_diff,
14477 };
14478 return Ok(Expression::Cast(Box::new(Cast {
14479 this: adjusted,
14480 to: DataType::Int {
14481 length: None,
14482 integer_spelling: true,
14483 },
14484 trailing_comments: vec![],
14485 double_colon_syntax: false,
14486 format: None,
14487 default: None,
14488 inferred_type: None,
14489 })));
14490 }
14491
14492 // For other targets, remap to standard (unit, start, end) form
14493 let unit_ident =
14494 Expression::Identifier(Identifier::new(&unit_s));
14495 (unit_ident, date1, date2, unit_s)
14496 } else {
14497 let arg0 = args.remove(0);
14498 let arg1 = args.remove(0);
14499 let arg2 = args.remove(0);
14500 let unit_s = Self::get_unit_str_static(&arg0);
14501 (arg0, arg1, arg2, unit_s)
14502 };
14503
14504 // For Hive/Spark source, string literal dates need to be cast
14505 // Note: Databricks is excluded - it handles string args like standard SQL
14506 let is_hive_spark =
14507 matches!(source, DialectType::Hive | DialectType::Spark);
14508
14509 match target {
14510 DialectType::Snowflake => {
14511 let unit =
14512 Expression::Identifier(Identifier::new(&unit_str));
14513 // Use ensure_to_date_preserved to add TO_DATE with a marker
14514 // that prevents the Snowflake TO_DATE handler from converting it to CAST
14515 let d1 = if is_hive_spark {
14516 Self::ensure_to_date_preserved(arg1)
14517 } else {
14518 arg1
14519 };
14520 let d2 = if is_hive_spark {
14521 Self::ensure_to_date_preserved(arg2)
14522 } else {
14523 arg2
14524 };
14525 Ok(Expression::Function(Box::new(Function::new(
14526 "DATEDIFF".to_string(),
14527 vec![unit, d1, d2],
14528 ))))
14529 }
14530 DialectType::Redshift => {
14531 let unit =
14532 Expression::Identifier(Identifier::new(&unit_str));
14533 let d1 = if is_hive_spark {
14534 Self::ensure_cast_date(arg1)
14535 } else {
14536 arg1
14537 };
14538 let d2 = if is_hive_spark {
14539 Self::ensure_cast_date(arg2)
14540 } else {
14541 arg2
14542 };
14543 Ok(Expression::Function(Box::new(Function::new(
14544 "DATEDIFF".to_string(),
14545 vec![unit, d1, d2],
14546 ))))
14547 }
14548 DialectType::TSQL => {
14549 let unit =
14550 Expression::Identifier(Identifier::new(&unit_str));
14551 Ok(Expression::Function(Box::new(Function::new(
14552 "DATEDIFF".to_string(),
14553 vec![unit, arg1, arg2],
14554 ))))
14555 }
14556 DialectType::DuckDB => {
14557 let is_redshift_tsql = matches!(
14558 source,
14559 DialectType::Redshift | DialectType::TSQL
14560 );
14561 if is_hive_spark {
14562 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
14563 let d1 = Self::ensure_cast_date(arg1);
14564 let d2 = Self::ensure_cast_date(arg2);
14565 Ok(Expression::Function(Box::new(Function::new(
14566 "DATE_DIFF".to_string(),
14567 vec![Expression::string(&unit_str), d1, d2],
14568 ))))
14569 } else if matches!(source, DialectType::Snowflake) {
14570 // For Snowflake source: special handling per unit
14571 match unit_str.as_str() {
14572 "NANOSECOND" => {
14573 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
14574 fn cast_to_timestamp_ns(
14575 expr: Expression,
14576 ) -> Expression
14577 {
14578 Expression::Cast(Box::new(Cast {
14579 this: expr,
14580 to: DataType::Custom {
14581 name: "TIMESTAMP_NS".to_string(),
14582 },
14583 trailing_comments: vec![],
14584 double_colon_syntax: false,
14585 format: None,
14586 default: None,
14587 inferred_type: None,
14588 }))
14589 }
14590 let epoch_end = Expression::Function(Box::new(
14591 Function::new(
14592 "EPOCH_NS".to_string(),
14593 vec![cast_to_timestamp_ns(arg2)],
14594 ),
14595 ));
14596 let epoch_start = Expression::Function(
14597 Box::new(Function::new(
14598 "EPOCH_NS".to_string(),
14599 vec![cast_to_timestamp_ns(arg1)],
14600 )),
14601 );
14602 Ok(Expression::Sub(Box::new(BinaryOp::new(
14603 epoch_end,
14604 epoch_start,
14605 ))))
14606 }
14607 "WEEK" => {
14608 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
14609 let d1 = Self::force_cast_date(arg1);
14610 let d2 = Self::force_cast_date(arg2);
14611 let dt1 = Expression::Function(Box::new(
14612 Function::new(
14613 "DATE_TRUNC".to_string(),
14614 vec![Expression::string("WEEK"), d1],
14615 ),
14616 ));
14617 let dt2 = Expression::Function(Box::new(
14618 Function::new(
14619 "DATE_TRUNC".to_string(),
14620 vec![Expression::string("WEEK"), d2],
14621 ),
14622 ));
14623 Ok(Expression::Function(Box::new(
14624 Function::new(
14625 "DATE_DIFF".to_string(),
14626 vec![
14627 Expression::string(&unit_str),
14628 dt1,
14629 dt2,
14630 ],
14631 ),
14632 )))
14633 }
14634 _ => {
14635 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
14636 let d1 = Self::force_cast_date(arg1);
14637 let d2 = Self::force_cast_date(arg2);
14638 Ok(Expression::Function(Box::new(
14639 Function::new(
14640 "DATE_DIFF".to_string(),
14641 vec![
14642 Expression::string(&unit_str),
14643 d1,
14644 d2,
14645 ],
14646 ),
14647 )))
14648 }
14649 }
14650 } else if is_redshift_tsql {
14651 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
14652 let d1 = Self::force_cast_timestamp(arg1);
14653 let d2 = Self::force_cast_timestamp(arg2);
14654 Ok(Expression::Function(Box::new(Function::new(
14655 "DATE_DIFF".to_string(),
14656 vec![Expression::string(&unit_str), d1, d2],
14657 ))))
14658 } else {
14659 // Keep as DATEDIFF so DuckDB's transform_datediff handles
14660 // DATE_TRUNC for WEEK, CAST for string literals, etc.
14661 let unit =
14662 Expression::Identifier(Identifier::new(&unit_str));
14663 Ok(Expression::Function(Box::new(Function::new(
14664 "DATEDIFF".to_string(),
14665 vec![unit, arg1, arg2],
14666 ))))
14667 }
14668 }
14669 DialectType::BigQuery => {
14670 let is_redshift_tsql = matches!(
14671 source,
14672 DialectType::Redshift
14673 | DialectType::TSQL
14674 | DialectType::Snowflake
14675 );
14676 let cast_d1 = if is_hive_spark {
14677 Self::ensure_cast_date(arg1)
14678 } else if is_redshift_tsql {
14679 Self::force_cast_datetime(arg1)
14680 } else {
14681 Self::ensure_cast_datetime(arg1)
14682 };
14683 let cast_d2 = if is_hive_spark {
14684 Self::ensure_cast_date(arg2)
14685 } else if is_redshift_tsql {
14686 Self::force_cast_datetime(arg2)
14687 } else {
14688 Self::ensure_cast_datetime(arg2)
14689 };
14690 let unit =
14691 Expression::Identifier(Identifier::new(&unit_str));
14692 Ok(Expression::Function(Box::new(Function::new(
14693 "DATE_DIFF".to_string(),
14694 vec![cast_d2, cast_d1, unit],
14695 ))))
14696 }
14697 DialectType::Presto
14698 | DialectType::Trino
14699 | DialectType::Athena => {
14700 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
14701 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
14702 let is_redshift_tsql = matches!(
14703 source,
14704 DialectType::Redshift
14705 | DialectType::TSQL
14706 | DialectType::Snowflake
14707 );
14708 let d1 = if is_hive_spark {
14709 Self::double_cast_timestamp_date(arg1)
14710 } else if is_redshift_tsql {
14711 Self::force_cast_timestamp(arg1)
14712 } else {
14713 arg1
14714 };
14715 let d2 = if is_hive_spark {
14716 Self::double_cast_timestamp_date(arg2)
14717 } else if is_redshift_tsql {
14718 Self::force_cast_timestamp(arg2)
14719 } else {
14720 arg2
14721 };
14722 Ok(Expression::Function(Box::new(Function::new(
14723 "DATE_DIFF".to_string(),
14724 vec![Expression::string(&unit_str), d1, d2],
14725 ))))
14726 }
14727 DialectType::Hive => match unit_str.as_str() {
14728 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
14729 this: Expression::Function(Box::new(Function::new(
14730 "MONTHS_BETWEEN".to_string(),
14731 vec![arg2, arg1],
14732 ))),
14733 to: DataType::Int {
14734 length: None,
14735 integer_spelling: false,
14736 },
14737 trailing_comments: vec![],
14738 double_colon_syntax: false,
14739 format: None,
14740 default: None,
14741 inferred_type: None,
14742 }))),
14743 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
14744 this: Expression::Div(Box::new(
14745 crate::expressions::BinaryOp::new(
14746 Expression::Function(Box::new(Function::new(
14747 "DATEDIFF".to_string(),
14748 vec![arg2, arg1],
14749 ))),
14750 Expression::number(7),
14751 ),
14752 )),
14753 to: DataType::Int {
14754 length: None,
14755 integer_spelling: false,
14756 },
14757 trailing_comments: vec![],
14758 double_colon_syntax: false,
14759 format: None,
14760 default: None,
14761 inferred_type: None,
14762 }))),
14763 _ => Ok(Expression::Function(Box::new(Function::new(
14764 "DATEDIFF".to_string(),
14765 vec![arg2, arg1],
14766 )))),
14767 },
14768 DialectType::Spark | DialectType::Databricks => {
14769 let unit =
14770 Expression::Identifier(Identifier::new(&unit_str));
14771 Ok(Expression::Function(Box::new(Function::new(
14772 "DATEDIFF".to_string(),
14773 vec![unit, arg1, arg2],
14774 ))))
14775 }
14776 _ => {
14777 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
14778 let d1 = if is_hive_spark {
14779 Self::ensure_cast_date(arg1)
14780 } else {
14781 arg1
14782 };
14783 let d2 = if is_hive_spark {
14784 Self::ensure_cast_date(arg2)
14785 } else {
14786 arg2
14787 };
14788 let unit =
14789 Expression::Identifier(Identifier::new(&unit_str));
14790 Ok(Expression::Function(Box::new(Function::new(
14791 "DATEDIFF".to_string(),
14792 vec![unit, d1, d2],
14793 ))))
14794 }
14795 }
14796 }
14797 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
14798 "DATEDIFF" if f.args.len() == 2 => {
14799 let mut args = f.args;
14800 let arg0 = args.remove(0);
14801 let arg1 = args.remove(0);
14802
14803 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
14804 // Also recognizes TryCast/Cast to DATE that may have been produced by
14805 // cross-dialect TO_DATE -> TRY_CAST conversion
14806 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
14807 if let Expression::Function(ref f) = e {
14808 if f.name.eq_ignore_ascii_case("TO_DATE")
14809 && f.args.len() == 1
14810 {
14811 return (f.args[0].clone(), true);
14812 }
14813 }
14814 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
14815 if let Expression::TryCast(ref c) = e {
14816 if matches!(c.to, DataType::Date) {
14817 return (e, true); // Already properly cast, return as-is
14818 }
14819 }
14820 (e, false)
14821 };
14822
14823 match target {
14824 DialectType::DuckDB => {
14825 // For Hive source, always CAST to DATE
14826 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
14827 let cast_d0 = if matches!(
14828 source,
14829 DialectType::Hive
14830 | DialectType::Spark
14831 | DialectType::Databricks
14832 ) {
14833 let (inner, was_to_date) = unwrap_to_date(arg1);
14834 if was_to_date {
14835 // Already a date expression, use directly
14836 if matches!(&inner, Expression::TryCast(_)) {
14837 inner // Already TRY_CAST(x AS DATE)
14838 } else {
14839 Self::try_cast_date(inner)
14840 }
14841 } else {
14842 Self::force_cast_date(inner)
14843 }
14844 } else {
14845 Self::ensure_cast_date(arg1)
14846 };
14847 let cast_d1 = if matches!(
14848 source,
14849 DialectType::Hive
14850 | DialectType::Spark
14851 | DialectType::Databricks
14852 ) {
14853 let (inner, was_to_date) = unwrap_to_date(arg0);
14854 if was_to_date {
14855 if matches!(&inner, Expression::TryCast(_)) {
14856 inner
14857 } else {
14858 Self::try_cast_date(inner)
14859 }
14860 } else {
14861 Self::force_cast_date(inner)
14862 }
14863 } else {
14864 Self::ensure_cast_date(arg0)
14865 };
14866 Ok(Expression::Function(Box::new(Function::new(
14867 "DATE_DIFF".to_string(),
14868 vec![Expression::string("DAY"), cast_d0, cast_d1],
14869 ))))
14870 }
14871 DialectType::Presto
14872 | DialectType::Trino
14873 | DialectType::Athena => {
14874 // For Hive/Spark source, apply double_cast_timestamp_date
14875 // For other sources (MySQL etc.), just swap args without casting
14876 if matches!(
14877 source,
14878 DialectType::Hive
14879 | DialectType::Spark
14880 | DialectType::Databricks
14881 ) {
14882 let cast_fn = |e: Expression| -> Expression {
14883 let (inner, was_to_date) = unwrap_to_date(e);
14884 if was_to_date {
14885 let first_cast =
14886 Self::double_cast_timestamp_date(inner);
14887 Self::double_cast_timestamp_date(first_cast)
14888 } else {
14889 Self::double_cast_timestamp_date(inner)
14890 }
14891 };
14892 Ok(Expression::Function(Box::new(Function::new(
14893 "DATE_DIFF".to_string(),
14894 vec![
14895 Expression::string("DAY"),
14896 cast_fn(arg1),
14897 cast_fn(arg0),
14898 ],
14899 ))))
14900 } else {
14901 Ok(Expression::Function(Box::new(Function::new(
14902 "DATE_DIFF".to_string(),
14903 vec![Expression::string("DAY"), arg1, arg0],
14904 ))))
14905 }
14906 }
14907 DialectType::Redshift => {
14908 let unit = Expression::Identifier(Identifier::new("DAY"));
14909 Ok(Expression::Function(Box::new(Function::new(
14910 "DATEDIFF".to_string(),
14911 vec![unit, arg1, arg0],
14912 ))))
14913 }
14914 _ => Ok(Expression::Function(Box::new(Function::new(
14915 "DATEDIFF".to_string(),
14916 vec![arg0, arg1],
14917 )))),
14918 }
14919 }
14920 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
14921 "DATE_DIFF" if f.args.len() == 3 => {
14922 let mut args = f.args;
14923 let arg0 = args.remove(0);
14924 let arg1 = args.remove(0);
14925 let arg2 = args.remove(0);
14926 let unit_str = Self::get_unit_str_static(&arg0);
14927
14928 match target {
14929 DialectType::DuckDB => {
14930 // DuckDB: DATE_DIFF('UNIT', start, end)
14931 Ok(Expression::Function(Box::new(Function::new(
14932 "DATE_DIFF".to_string(),
14933 vec![Expression::string(&unit_str), arg1, arg2],
14934 ))))
14935 }
14936 DialectType::Presto
14937 | DialectType::Trino
14938 | DialectType::Athena => {
14939 Ok(Expression::Function(Box::new(Function::new(
14940 "DATE_DIFF".to_string(),
14941 vec![Expression::string(&unit_str), arg1, arg2],
14942 ))))
14943 }
14944 DialectType::ClickHouse => {
14945 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
14946 let unit =
14947 Expression::Identifier(Identifier::new(&unit_str));
14948 Ok(Expression::Function(Box::new(Function::new(
14949 "DATE_DIFF".to_string(),
14950 vec![unit, arg1, arg2],
14951 ))))
14952 }
14953 DialectType::Snowflake | DialectType::Redshift => {
14954 let unit =
14955 Expression::Identifier(Identifier::new(&unit_str));
14956 Ok(Expression::Function(Box::new(Function::new(
14957 "DATEDIFF".to_string(),
14958 vec![unit, arg1, arg2],
14959 ))))
14960 }
14961 _ => {
14962 let unit =
14963 Expression::Identifier(Identifier::new(&unit_str));
14964 Ok(Expression::Function(Box::new(Function::new(
14965 "DATEDIFF".to_string(),
14966 vec![unit, arg1, arg2],
14967 ))))
14968 }
14969 }
14970 }
14971 // DATEADD(unit, val, date) - 3-arg form
14972 "DATEADD" if f.args.len() == 3 => {
14973 let mut args = f.args;
14974 let arg0 = args.remove(0);
14975 let arg1 = args.remove(0);
14976 let arg2 = args.remove(0);
14977 let unit_str = Self::get_unit_str_static(&arg0);
14978
14979 // Normalize TSQL unit abbreviations to standard names
14980 let unit_str = match unit_str.as_str() {
14981 "YY" | "YYYY" => "YEAR".to_string(),
14982 "QQ" | "Q" => "QUARTER".to_string(),
14983 "MM" | "M" => "MONTH".to_string(),
14984 "WK" | "WW" => "WEEK".to_string(),
14985 "DD" | "D" | "DY" => "DAY".to_string(),
14986 "HH" => "HOUR".to_string(),
14987 "MI" | "N" => "MINUTE".to_string(),
14988 "SS" | "S" => "SECOND".to_string(),
14989 "MS" => "MILLISECOND".to_string(),
14990 "MCS" | "US" => "MICROSECOND".to_string(),
14991 _ => unit_str,
14992 };
14993 match target {
14994 DialectType::Snowflake => {
14995 let unit =
14996 Expression::Identifier(Identifier::new(&unit_str));
14997 // Cast string literal to TIMESTAMP, but not for Snowflake source
14998 // (Snowflake natively accepts string literals in DATEADD)
14999 let arg2 = if matches!(
15000 &arg2,
15001 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15002 ) && !matches!(source, DialectType::Snowflake)
15003 {
15004 Expression::Cast(Box::new(Cast {
15005 this: arg2,
15006 to: DataType::Timestamp {
15007 precision: None,
15008 timezone: false,
15009 },
15010 trailing_comments: Vec::new(),
15011 double_colon_syntax: false,
15012 format: None,
15013 default: None,
15014 inferred_type: None,
15015 }))
15016 } else {
15017 arg2
15018 };
15019 Ok(Expression::Function(Box::new(Function::new(
15020 "DATEADD".to_string(),
15021 vec![unit, arg1, arg2],
15022 ))))
15023 }
15024 DialectType::TSQL => {
15025 let unit =
15026 Expression::Identifier(Identifier::new(&unit_str));
15027 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
15028 let arg2 = if matches!(
15029 &arg2,
15030 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15031 ) && !matches!(
15032 source,
15033 DialectType::Spark
15034 | DialectType::Databricks
15035 | DialectType::Hive
15036 ) {
15037 Expression::Cast(Box::new(Cast {
15038 this: arg2,
15039 to: DataType::Custom {
15040 name: "DATETIME2".to_string(),
15041 },
15042 trailing_comments: Vec::new(),
15043 double_colon_syntax: false,
15044 format: None,
15045 default: None,
15046 inferred_type: None,
15047 }))
15048 } else {
15049 arg2
15050 };
15051 Ok(Expression::Function(Box::new(Function::new(
15052 "DATEADD".to_string(),
15053 vec![unit, arg1, arg2],
15054 ))))
15055 }
15056 DialectType::Redshift => {
15057 let unit =
15058 Expression::Identifier(Identifier::new(&unit_str));
15059 Ok(Expression::Function(Box::new(Function::new(
15060 "DATEADD".to_string(),
15061 vec![unit, arg1, arg2],
15062 ))))
15063 }
15064 DialectType::Databricks => {
15065 let unit =
15066 Expression::Identifier(Identifier::new(&unit_str));
15067 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
15068 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
15069 let func_name = if matches!(
15070 source,
15071 DialectType::TSQL
15072 | DialectType::Fabric
15073 | DialectType::Databricks
15074 | DialectType::Snowflake
15075 ) {
15076 "DATEADD"
15077 } else {
15078 "DATE_ADD"
15079 };
15080 Ok(Expression::Function(Box::new(Function::new(
15081 func_name.to_string(),
15082 vec![unit, arg1, arg2],
15083 ))))
15084 }
15085 DialectType::DuckDB => {
15086 // Special handling for NANOSECOND from Snowflake
15087 if unit_str == "NANOSECOND"
15088 && matches!(source, DialectType::Snowflake)
15089 {
15090 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
15091 let cast_ts = Expression::Cast(Box::new(Cast {
15092 this: arg2,
15093 to: DataType::Custom {
15094 name: "TIMESTAMP_NS".to_string(),
15095 },
15096 trailing_comments: vec![],
15097 double_colon_syntax: false,
15098 format: None,
15099 default: None,
15100 inferred_type: None,
15101 }));
15102 let epoch_ns =
15103 Expression::Function(Box::new(Function::new(
15104 "EPOCH_NS".to_string(),
15105 vec![cast_ts],
15106 )));
15107 let sum = Expression::Add(Box::new(BinaryOp::new(
15108 epoch_ns, arg1,
15109 )));
15110 Ok(Expression::Function(Box::new(Function::new(
15111 "MAKE_TIMESTAMP_NS".to_string(),
15112 vec![sum],
15113 ))))
15114 } else {
15115 // DuckDB: convert to date + INTERVAL syntax with CAST
15116 let iu = Self::parse_interval_unit_static(&unit_str);
15117 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
15118 this: Some(arg1),
15119 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
15120 }));
15121 // Cast string literal to TIMESTAMP
15122 let arg2 = if matches!(
15123 &arg2,
15124 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15125 ) {
15126 Expression::Cast(Box::new(Cast {
15127 this: arg2,
15128 to: DataType::Timestamp {
15129 precision: None,
15130 timezone: false,
15131 },
15132 trailing_comments: Vec::new(),
15133 double_colon_syntax: false,
15134 format: None,
15135 default: None,
15136 inferred_type: None,
15137 }))
15138 } else {
15139 arg2
15140 };
15141 Ok(Expression::Add(Box::new(
15142 crate::expressions::BinaryOp::new(arg2, interval),
15143 )))
15144 }
15145 }
15146 DialectType::Spark => {
15147 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
15148 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
15149 if matches!(source, DialectType::TSQL | DialectType::Fabric)
15150 {
15151 fn multiply_expr_spark(
15152 expr: Expression,
15153 factor: i64,
15154 ) -> Expression
15155 {
15156 if let Expression::Literal(lit) = &expr
15157 {
15158 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
15159 if let Ok(val) = n.parse::<i64>() {
15160 return Expression::Literal(Box::new(crate::expressions::Literal::Number(
15161 (val * factor).to_string(),
15162 ),));
15163 }
15164 }
15165 }
15166 Expression::Mul(Box::new(
15167 crate::expressions::BinaryOp::new(
15168 expr,
15169 Expression::Literal(Box::new(crate::expressions::Literal::Number(
15170 factor.to_string(),
15171 ),)),
15172 ),
15173 ))
15174 }
15175 let normalized_unit = match unit_str.as_str() {
15176 "YEAR" | "YY" | "YYYY" => "YEAR",
15177 "QUARTER" | "QQ" | "Q" => "QUARTER",
15178 "MONTH" | "MM" | "M" => "MONTH",
15179 "WEEK" | "WK" | "WW" => "WEEK",
15180 "DAY" | "DD" | "D" | "DY" => "DAY",
15181 _ => &unit_str,
15182 };
15183 match normalized_unit {
15184 "YEAR" => {
15185 let months = multiply_expr_spark(arg1, 12);
15186 Ok(Expression::Function(Box::new(
15187 Function::new(
15188 "ADD_MONTHS".to_string(),
15189 vec![arg2, months],
15190 ),
15191 )))
15192 }
15193 "QUARTER" => {
15194 let months = multiply_expr_spark(arg1, 3);
15195 Ok(Expression::Function(Box::new(
15196 Function::new(
15197 "ADD_MONTHS".to_string(),
15198 vec![arg2, months],
15199 ),
15200 )))
15201 }
15202 "MONTH" => Ok(Expression::Function(Box::new(
15203 Function::new(
15204 "ADD_MONTHS".to_string(),
15205 vec![arg2, arg1],
15206 ),
15207 ))),
15208 "WEEK" => {
15209 let days = multiply_expr_spark(arg1, 7);
15210 Ok(Expression::Function(Box::new(
15211 Function::new(
15212 "DATE_ADD".to_string(),
15213 vec![arg2, days],
15214 ),
15215 )))
15216 }
15217 "DAY" => Ok(Expression::Function(Box::new(
15218 Function::new(
15219 "DATE_ADD".to_string(),
15220 vec![arg2, arg1],
15221 ),
15222 ))),
15223 _ => {
15224 let unit = Expression::Identifier(
15225 Identifier::new(&unit_str),
15226 );
15227 Ok(Expression::Function(Box::new(
15228 Function::new(
15229 "DATE_ADD".to_string(),
15230 vec![unit, arg1, arg2],
15231 ),
15232 )))
15233 }
15234 }
15235 } else {
15236 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
15237 let unit =
15238 Expression::Identifier(Identifier::new(&unit_str));
15239 Ok(Expression::Function(Box::new(Function::new(
15240 "DATE_ADD".to_string(),
15241 vec![unit, arg1, arg2],
15242 ))))
15243 }
15244 }
15245 DialectType::Hive => match unit_str.as_str() {
15246 "MONTH" => {
15247 Ok(Expression::Function(Box::new(Function::new(
15248 "ADD_MONTHS".to_string(),
15249 vec![arg2, arg1],
15250 ))))
15251 }
15252 _ => Ok(Expression::Function(Box::new(Function::new(
15253 "DATE_ADD".to_string(),
15254 vec![arg2, arg1],
15255 )))),
15256 },
15257 DialectType::Presto
15258 | DialectType::Trino
15259 | DialectType::Athena => {
15260 // Cast string literal date to TIMESTAMP
15261 let arg2 = if matches!(
15262 &arg2,
15263 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15264 ) {
15265 Expression::Cast(Box::new(Cast {
15266 this: arg2,
15267 to: DataType::Timestamp {
15268 precision: None,
15269 timezone: false,
15270 },
15271 trailing_comments: Vec::new(),
15272 double_colon_syntax: false,
15273 format: None,
15274 default: None,
15275 inferred_type: None,
15276 }))
15277 } else {
15278 arg2
15279 };
15280 Ok(Expression::Function(Box::new(Function::new(
15281 "DATE_ADD".to_string(),
15282 vec![Expression::string(&unit_str), arg1, arg2],
15283 ))))
15284 }
15285 DialectType::MySQL => {
15286 let iu = Self::parse_interval_unit_static(&unit_str);
15287 Ok(Expression::DateAdd(Box::new(
15288 crate::expressions::DateAddFunc {
15289 this: arg2,
15290 interval: arg1,
15291 unit: iu,
15292 },
15293 )))
15294 }
15295 DialectType::PostgreSQL => {
15296 // Cast string literal date to TIMESTAMP
15297 let arg2 = if matches!(
15298 &arg2,
15299 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15300 ) {
15301 Expression::Cast(Box::new(Cast {
15302 this: arg2,
15303 to: DataType::Timestamp {
15304 precision: None,
15305 timezone: false,
15306 },
15307 trailing_comments: Vec::new(),
15308 double_colon_syntax: false,
15309 format: None,
15310 default: None,
15311 inferred_type: None,
15312 }))
15313 } else {
15314 arg2
15315 };
15316 let interval = Expression::Interval(Box::new(
15317 crate::expressions::Interval {
15318 this: Some(Expression::string(&format!(
15319 "{} {}",
15320 Self::expr_to_string_static(&arg1),
15321 unit_str
15322 ))),
15323 unit: None,
15324 },
15325 ));
15326 Ok(Expression::Add(Box::new(
15327 crate::expressions::BinaryOp::new(arg2, interval),
15328 )))
15329 }
15330 DialectType::BigQuery => {
15331 let iu = Self::parse_interval_unit_static(&unit_str);
15332 let interval = Expression::Interval(Box::new(
15333 crate::expressions::Interval {
15334 this: Some(arg1),
15335 unit: Some(
15336 crate::expressions::IntervalUnitSpec::Simple {
15337 unit: iu,
15338 use_plural: false,
15339 },
15340 ),
15341 },
15342 ));
15343 // Non-TSQL sources: CAST string literal to DATETIME
15344 let arg2 = if !matches!(
15345 source,
15346 DialectType::TSQL | DialectType::Fabric
15347 ) && matches!(
15348 &arg2,
15349 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
15350 ) {
15351 Expression::Cast(Box::new(Cast {
15352 this: arg2,
15353 to: DataType::Custom {
15354 name: "DATETIME".to_string(),
15355 },
15356 trailing_comments: Vec::new(),
15357 double_colon_syntax: false,
15358 format: None,
15359 default: None,
15360 inferred_type: None,
15361 }))
15362 } else {
15363 arg2
15364 };
15365 Ok(Expression::Function(Box::new(Function::new(
15366 "DATE_ADD".to_string(),
15367 vec![arg2, interval],
15368 ))))
15369 }
15370 _ => {
15371 let unit =
15372 Expression::Identifier(Identifier::new(&unit_str));
15373 Ok(Expression::Function(Box::new(Function::new(
15374 "DATEADD".to_string(),
15375 vec![unit, arg1, arg2],
15376 ))))
15377 }
15378 }
15379 }
15380 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
15381 // or (date, val, 'UNIT') from Generic canonical form
15382 "DATE_ADD" if f.args.len() == 3 => {
15383 let mut args = f.args;
15384 let arg0 = args.remove(0);
15385 let arg1 = args.remove(0);
15386 let arg2 = args.remove(0);
15387 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
15388 // where arg2 is a string literal matching a unit name
15389 let arg2_unit = match &arg2 {
15390 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
15391 let Literal::String(s) = lit.as_ref() else { unreachable!() };
15392 let u = s.to_ascii_uppercase();
15393 if matches!(
15394 u.as_str(),
15395 "DAY"
15396 | "MONTH"
15397 | "YEAR"
15398 | "HOUR"
15399 | "MINUTE"
15400 | "SECOND"
15401 | "WEEK"
15402 | "QUARTER"
15403 | "MILLISECOND"
15404 | "MICROSECOND"
15405 ) {
15406 Some(u)
15407 } else {
15408 None
15409 }
15410 }
15411 _ => None,
15412 };
15413 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
15414 let (unit_str, val, date) = if let Some(u) = arg2_unit {
15415 (u, arg1, arg0)
15416 } else {
15417 (Self::get_unit_str_static(&arg0), arg1, arg2)
15418 };
15419 // Alias for backward compat with the rest of the match
15420 let arg1 = val;
15421 let arg2 = date;
15422
15423 match target {
15424 DialectType::Presto
15425 | DialectType::Trino
15426 | DialectType::Athena => {
15427 Ok(Expression::Function(Box::new(Function::new(
15428 "DATE_ADD".to_string(),
15429 vec![Expression::string(&unit_str), arg1, arg2],
15430 ))))
15431 }
15432 DialectType::DuckDB => {
15433 let iu = Self::parse_interval_unit_static(&unit_str);
15434 let interval = Expression::Interval(Box::new(
15435 crate::expressions::Interval {
15436 this: Some(arg1),
15437 unit: Some(
15438 crate::expressions::IntervalUnitSpec::Simple {
15439 unit: iu,
15440 use_plural: false,
15441 },
15442 ),
15443 },
15444 ));
15445 Ok(Expression::Add(Box::new(
15446 crate::expressions::BinaryOp::new(arg2, interval),
15447 )))
15448 }
15449 DialectType::PostgreSQL
15450 | DialectType::Materialize
15451 | DialectType::RisingWave => {
15452 // PostgreSQL: x + INTERVAL '1 DAY'
15453 let amount_str = Self::expr_to_string_static(&arg1);
15454 let interval = Expression::Interval(Box::new(
15455 crate::expressions::Interval {
15456 this: Some(Expression::string(&format!(
15457 "{} {}",
15458 amount_str, unit_str
15459 ))),
15460 unit: None,
15461 },
15462 ));
15463 Ok(Expression::Add(Box::new(
15464 crate::expressions::BinaryOp::new(arg2, interval),
15465 )))
15466 }
15467 DialectType::Snowflake
15468 | DialectType::TSQL
15469 | DialectType::Redshift => {
15470 let unit =
15471 Expression::Identifier(Identifier::new(&unit_str));
15472 Ok(Expression::Function(Box::new(Function::new(
15473 "DATEADD".to_string(),
15474 vec![unit, arg1, arg2],
15475 ))))
15476 }
15477 DialectType::BigQuery
15478 | DialectType::MySQL
15479 | DialectType::Doris
15480 | DialectType::StarRocks
15481 | DialectType::Drill => {
15482 // DATE_ADD(date, INTERVAL amount UNIT)
15483 let iu = Self::parse_interval_unit_static(&unit_str);
15484 let interval = Expression::Interval(Box::new(
15485 crate::expressions::Interval {
15486 this: Some(arg1),
15487 unit: Some(
15488 crate::expressions::IntervalUnitSpec::Simple {
15489 unit: iu,
15490 use_plural: false,
15491 },
15492 ),
15493 },
15494 ));
15495 Ok(Expression::Function(Box::new(Function::new(
15496 "DATE_ADD".to_string(),
15497 vec![arg2, interval],
15498 ))))
15499 }
15500 DialectType::SQLite => {
15501 // SQLite: DATE(x, '1 DAY')
15502 // Build the string '1 DAY' from amount and unit
15503 let amount_str = match &arg1 {
15504 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => { let Literal::Number(n) = lit.as_ref() else { unreachable!() }; n.clone() },
15505 _ => "1".to_string(),
15506 };
15507 Ok(Expression::Function(Box::new(Function::new(
15508 "DATE".to_string(),
15509 vec![
15510 arg2,
15511 Expression::string(format!(
15512 "{} {}",
15513 amount_str, unit_str
15514 )),
15515 ],
15516 ))))
15517 }
15518 DialectType::Dremio => {
15519 // Dremio: DATE_ADD(date, amount) - drops unit
15520 Ok(Expression::Function(Box::new(Function::new(
15521 "DATE_ADD".to_string(),
15522 vec![arg2, arg1],
15523 ))))
15524 }
15525 DialectType::Spark => {
15526 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
15527 if unit_str == "DAY" {
15528 Ok(Expression::Function(Box::new(Function::new(
15529 "DATE_ADD".to_string(),
15530 vec![arg2, arg1],
15531 ))))
15532 } else {
15533 let unit =
15534 Expression::Identifier(Identifier::new(&unit_str));
15535 Ok(Expression::Function(Box::new(Function::new(
15536 "DATE_ADD".to_string(),
15537 vec![unit, arg1, arg2],
15538 ))))
15539 }
15540 }
15541 DialectType::Databricks => {
15542 let unit =
15543 Expression::Identifier(Identifier::new(&unit_str));
15544 Ok(Expression::Function(Box::new(Function::new(
15545 "DATE_ADD".to_string(),
15546 vec![unit, arg1, arg2],
15547 ))))
15548 }
15549 DialectType::Hive => {
15550 // Hive: DATE_ADD(date, val) for DAY
15551 Ok(Expression::Function(Box::new(Function::new(
15552 "DATE_ADD".to_string(),
15553 vec![arg2, arg1],
15554 ))))
15555 }
15556 _ => {
15557 let unit =
15558 Expression::Identifier(Identifier::new(&unit_str));
15559 Ok(Expression::Function(Box::new(Function::new(
15560 "DATE_ADD".to_string(),
15561 vec![unit, arg1, arg2],
15562 ))))
15563 }
15564 }
15565 }
15566 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
15567 "DATE_ADD"
15568 if f.args.len() == 2
15569 && matches!(
15570 source,
15571 DialectType::Hive
15572 | DialectType::Spark
15573 | DialectType::Databricks
15574 | DialectType::Generic
15575 ) =>
15576 {
15577 let mut args = f.args;
15578 let date = args.remove(0);
15579 let days = args.remove(0);
15580 match target {
15581 DialectType::Hive | DialectType::Spark => {
15582 // Keep as DATE_ADD(date, days) for Hive/Spark
15583 Ok(Expression::Function(Box::new(Function::new(
15584 "DATE_ADD".to_string(),
15585 vec![date, days],
15586 ))))
15587 }
15588 DialectType::Databricks => {
15589 // Databricks: DATEADD(DAY, days, date)
15590 Ok(Expression::Function(Box::new(Function::new(
15591 "DATEADD".to_string(),
15592 vec![
15593 Expression::Identifier(Identifier::new("DAY")),
15594 days,
15595 date,
15596 ],
15597 ))))
15598 }
15599 DialectType::DuckDB => {
15600 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
15601 let cast_date = Self::ensure_cast_date(date);
15602 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
15603 let interval_val = if matches!(
15604 days,
15605 Expression::Mul(_)
15606 | Expression::Sub(_)
15607 | Expression::Add(_)
15608 ) {
15609 Expression::Paren(Box::new(crate::expressions::Paren {
15610 this: days,
15611 trailing_comments: vec![],
15612 }))
15613 } else {
15614 days
15615 };
15616 let interval = Expression::Interval(Box::new(
15617 crate::expressions::Interval {
15618 this: Some(interval_val),
15619 unit: Some(
15620 crate::expressions::IntervalUnitSpec::Simple {
15621 unit: crate::expressions::IntervalUnit::Day,
15622 use_plural: false,
15623 },
15624 ),
15625 },
15626 ));
15627 Ok(Expression::Add(Box::new(
15628 crate::expressions::BinaryOp::new(cast_date, interval),
15629 )))
15630 }
15631 DialectType::Snowflake => {
15632 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
15633 let cast_date = if matches!(
15634 source,
15635 DialectType::Hive
15636 | DialectType::Spark
15637 | DialectType::Databricks
15638 ) {
15639 if matches!(
15640 date,
15641 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
15642 ) {
15643 Self::double_cast_timestamp_date(date)
15644 } else {
15645 date
15646 }
15647 } else {
15648 date
15649 };
15650 Ok(Expression::Function(Box::new(Function::new(
15651 "DATEADD".to_string(),
15652 vec![
15653 Expression::Identifier(Identifier::new("DAY")),
15654 days,
15655 cast_date,
15656 ],
15657 ))))
15658 }
15659 DialectType::Redshift => {
15660 Ok(Expression::Function(Box::new(Function::new(
15661 "DATEADD".to_string(),
15662 vec![
15663 Expression::Identifier(Identifier::new("DAY")),
15664 days,
15665 date,
15666 ],
15667 ))))
15668 }
15669 DialectType::TSQL | DialectType::Fabric => {
15670 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
15671 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
15672 let cast_date = if matches!(
15673 source,
15674 DialectType::Hive | DialectType::Spark
15675 ) {
15676 if matches!(
15677 date,
15678 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
15679 ) {
15680 Self::double_cast_datetime2_date(date)
15681 } else {
15682 date
15683 }
15684 } else {
15685 date
15686 };
15687 Ok(Expression::Function(Box::new(Function::new(
15688 "DATEADD".to_string(),
15689 vec![
15690 Expression::Identifier(Identifier::new("DAY")),
15691 days,
15692 cast_date,
15693 ],
15694 ))))
15695 }
15696 DialectType::Presto
15697 | DialectType::Trino
15698 | DialectType::Athena => {
15699 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
15700 let cast_date = if matches!(
15701 source,
15702 DialectType::Hive
15703 | DialectType::Spark
15704 | DialectType::Databricks
15705 ) {
15706 if matches!(
15707 date,
15708 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
15709 ) {
15710 Self::double_cast_timestamp_date(date)
15711 } else {
15712 date
15713 }
15714 } else {
15715 date
15716 };
15717 Ok(Expression::Function(Box::new(Function::new(
15718 "DATE_ADD".to_string(),
15719 vec![Expression::string("DAY"), days, cast_date],
15720 ))))
15721 }
15722 DialectType::BigQuery => {
15723 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
15724 let cast_date = if matches!(
15725 source,
15726 DialectType::Hive
15727 | DialectType::Spark
15728 | DialectType::Databricks
15729 ) {
15730 Self::double_cast_datetime_date(date)
15731 } else {
15732 date
15733 };
15734 // Wrap complex expressions in Paren for interval
15735 let interval_val = if matches!(
15736 days,
15737 Expression::Mul(_)
15738 | Expression::Sub(_)
15739 | Expression::Add(_)
15740 ) {
15741 Expression::Paren(Box::new(crate::expressions::Paren {
15742 this: days,
15743 trailing_comments: vec![],
15744 }))
15745 } else {
15746 days
15747 };
15748 let interval = Expression::Interval(Box::new(
15749 crate::expressions::Interval {
15750 this: Some(interval_val),
15751 unit: Some(
15752 crate::expressions::IntervalUnitSpec::Simple {
15753 unit: crate::expressions::IntervalUnit::Day,
15754 use_plural: false,
15755 },
15756 ),
15757 },
15758 ));
15759 Ok(Expression::Function(Box::new(Function::new(
15760 "DATE_ADD".to_string(),
15761 vec![cast_date, interval],
15762 ))))
15763 }
15764 DialectType::MySQL => {
15765 let iu = crate::expressions::IntervalUnit::Day;
15766 Ok(Expression::DateAdd(Box::new(
15767 crate::expressions::DateAddFunc {
15768 this: date,
15769 interval: days,
15770 unit: iu,
15771 },
15772 )))
15773 }
15774 DialectType::PostgreSQL => {
15775 let interval = Expression::Interval(Box::new(
15776 crate::expressions::Interval {
15777 this: Some(Expression::string(&format!(
15778 "{} DAY",
15779 Self::expr_to_string_static(&days)
15780 ))),
15781 unit: None,
15782 },
15783 ));
15784 Ok(Expression::Add(Box::new(
15785 crate::expressions::BinaryOp::new(date, interval),
15786 )))
15787 }
15788 DialectType::Doris
15789 | DialectType::StarRocks
15790 | DialectType::Drill => {
15791 // DATE_ADD(date, INTERVAL days DAY)
15792 let interval = Expression::Interval(Box::new(
15793 crate::expressions::Interval {
15794 this: Some(days),
15795 unit: Some(
15796 crate::expressions::IntervalUnitSpec::Simple {
15797 unit: crate::expressions::IntervalUnit::Day,
15798 use_plural: false,
15799 },
15800 ),
15801 },
15802 ));
15803 Ok(Expression::Function(Box::new(Function::new(
15804 "DATE_ADD".to_string(),
15805 vec![date, interval],
15806 ))))
15807 }
15808 _ => Ok(Expression::Function(Box::new(Function::new(
15809 "DATE_ADD".to_string(),
15810 vec![date, days],
15811 )))),
15812 }
15813 }
15814 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
15815 "DATE_SUB"
15816 if f.args.len() == 2
15817 && matches!(
15818 source,
15819 DialectType::Hive
15820 | DialectType::Spark
15821 | DialectType::Databricks
15822 ) =>
15823 {
15824 let mut args = f.args;
15825 let date = args.remove(0);
15826 let days = args.remove(0);
15827 // Helper to create days * -1
15828 let make_neg_days = |d: Expression| -> Expression {
15829 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
15830 d,
15831 Expression::Literal(Box::new(Literal::Number("-1".to_string()))),
15832 )))
15833 };
15834 let is_string_literal =
15835 matches!(date, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)));
15836 match target {
15837 DialectType::Hive
15838 | DialectType::Spark
15839 | DialectType::Databricks => {
15840 // Keep as DATE_SUB(date, days) for Hive/Spark
15841 Ok(Expression::Function(Box::new(Function::new(
15842 "DATE_SUB".to_string(),
15843 vec![date, days],
15844 ))))
15845 }
15846 DialectType::DuckDB => {
15847 let cast_date = Self::ensure_cast_date(date);
15848 let neg = make_neg_days(days);
15849 let interval = Expression::Interval(Box::new(
15850 crate::expressions::Interval {
15851 this: Some(Expression::Paren(Box::new(
15852 crate::expressions::Paren {
15853 this: neg,
15854 trailing_comments: vec![],
15855 },
15856 ))),
15857 unit: Some(
15858 crate::expressions::IntervalUnitSpec::Simple {
15859 unit: crate::expressions::IntervalUnit::Day,
15860 use_plural: false,
15861 },
15862 ),
15863 },
15864 ));
15865 Ok(Expression::Add(Box::new(
15866 crate::expressions::BinaryOp::new(cast_date, interval),
15867 )))
15868 }
15869 DialectType::Snowflake => {
15870 let cast_date = if is_string_literal {
15871 Self::double_cast_timestamp_date(date)
15872 } else {
15873 date
15874 };
15875 let neg = make_neg_days(days);
15876 Ok(Expression::Function(Box::new(Function::new(
15877 "DATEADD".to_string(),
15878 vec![
15879 Expression::Identifier(Identifier::new("DAY")),
15880 neg,
15881 cast_date,
15882 ],
15883 ))))
15884 }
15885 DialectType::Redshift => {
15886 let neg = make_neg_days(days);
15887 Ok(Expression::Function(Box::new(Function::new(
15888 "DATEADD".to_string(),
15889 vec![
15890 Expression::Identifier(Identifier::new("DAY")),
15891 neg,
15892 date,
15893 ],
15894 ))))
15895 }
15896 DialectType::TSQL | DialectType::Fabric => {
15897 let cast_date = if is_string_literal {
15898 Self::double_cast_datetime2_date(date)
15899 } else {
15900 date
15901 };
15902 let neg = make_neg_days(days);
15903 Ok(Expression::Function(Box::new(Function::new(
15904 "DATEADD".to_string(),
15905 vec![
15906 Expression::Identifier(Identifier::new("DAY")),
15907 neg,
15908 cast_date,
15909 ],
15910 ))))
15911 }
15912 DialectType::Presto
15913 | DialectType::Trino
15914 | DialectType::Athena => {
15915 let cast_date = if is_string_literal {
15916 Self::double_cast_timestamp_date(date)
15917 } else {
15918 date
15919 };
15920 let neg = make_neg_days(days);
15921 Ok(Expression::Function(Box::new(Function::new(
15922 "DATE_ADD".to_string(),
15923 vec![Expression::string("DAY"), neg, cast_date],
15924 ))))
15925 }
15926 DialectType::BigQuery => {
15927 let cast_date = if is_string_literal {
15928 Self::double_cast_datetime_date(date)
15929 } else {
15930 date
15931 };
15932 let neg = make_neg_days(days);
15933 let interval = Expression::Interval(Box::new(
15934 crate::expressions::Interval {
15935 this: Some(Expression::Paren(Box::new(
15936 crate::expressions::Paren {
15937 this: neg,
15938 trailing_comments: vec![],
15939 },
15940 ))),
15941 unit: Some(
15942 crate::expressions::IntervalUnitSpec::Simple {
15943 unit: crate::expressions::IntervalUnit::Day,
15944 use_plural: false,
15945 },
15946 ),
15947 },
15948 ));
15949 Ok(Expression::Function(Box::new(Function::new(
15950 "DATE_ADD".to_string(),
15951 vec![cast_date, interval],
15952 ))))
15953 }
15954 _ => Ok(Expression::Function(Box::new(Function::new(
15955 "DATE_SUB".to_string(),
15956 vec![date, days],
15957 )))),
15958 }
15959 }
15960 // ADD_MONTHS(date, val) -> target-specific
15961 "ADD_MONTHS" if f.args.len() == 2 => {
15962 let mut args = f.args;
15963 let date = args.remove(0);
15964 let val = args.remove(0);
15965 match target {
15966 DialectType::TSQL => {
15967 let cast_date = Self::ensure_cast_datetime2(date);
15968 Ok(Expression::Function(Box::new(Function::new(
15969 "DATEADD".to_string(),
15970 vec![
15971 Expression::Identifier(Identifier::new("MONTH")),
15972 val,
15973 cast_date,
15974 ],
15975 ))))
15976 }
15977 DialectType::DuckDB => {
15978 let interval = Expression::Interval(Box::new(
15979 crate::expressions::Interval {
15980 this: Some(val),
15981 unit: Some(
15982 crate::expressions::IntervalUnitSpec::Simple {
15983 unit:
15984 crate::expressions::IntervalUnit::Month,
15985 use_plural: false,
15986 },
15987 ),
15988 },
15989 ));
15990 Ok(Expression::Add(Box::new(
15991 crate::expressions::BinaryOp::new(date, interval),
15992 )))
15993 }
15994 DialectType::Snowflake => {
15995 // Keep ADD_MONTHS when source is Snowflake
15996 if matches!(source, DialectType::Snowflake) {
15997 Ok(Expression::Function(Box::new(Function::new(
15998 "ADD_MONTHS".to_string(),
15999 vec![date, val],
16000 ))))
16001 } else {
16002 Ok(Expression::Function(Box::new(Function::new(
16003 "DATEADD".to_string(),
16004 vec![
16005 Expression::Identifier(Identifier::new(
16006 "MONTH",
16007 )),
16008 val,
16009 date,
16010 ],
16011 ))))
16012 }
16013 }
16014 DialectType::Redshift => {
16015 Ok(Expression::Function(Box::new(Function::new(
16016 "DATEADD".to_string(),
16017 vec![
16018 Expression::Identifier(Identifier::new("MONTH")),
16019 val,
16020 date,
16021 ],
16022 ))))
16023 }
16024 DialectType::Presto
16025 | DialectType::Trino
16026 | DialectType::Athena => {
16027 Ok(Expression::Function(Box::new(Function::new(
16028 "DATE_ADD".to_string(),
16029 vec![Expression::string("MONTH"), val, date],
16030 ))))
16031 }
16032 DialectType::BigQuery => {
16033 let interval = Expression::Interval(Box::new(
16034 crate::expressions::Interval {
16035 this: Some(val),
16036 unit: Some(
16037 crate::expressions::IntervalUnitSpec::Simple {
16038 unit:
16039 crate::expressions::IntervalUnit::Month,
16040 use_plural: false,
16041 },
16042 ),
16043 },
16044 ));
16045 Ok(Expression::Function(Box::new(Function::new(
16046 "DATE_ADD".to_string(),
16047 vec![date, interval],
16048 ))))
16049 }
16050 _ => Ok(Expression::Function(Box::new(Function::new(
16051 "ADD_MONTHS".to_string(),
16052 vec![date, val],
16053 )))),
16054 }
16055 }
16056 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
16057 "DATETRUNC" if f.args.len() == 2 => {
16058 let mut args = f.args;
16059 let arg0 = args.remove(0);
16060 let arg1 = args.remove(0);
16061 let unit_str = Self::get_unit_str_static(&arg0);
16062 match target {
16063 DialectType::TSQL | DialectType::Fabric => {
16064 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
16065 Ok(Expression::Function(Box::new(Function::new(
16066 "DATETRUNC".to_string(),
16067 vec![
16068 Expression::Identifier(Identifier::new(&unit_str)),
16069 arg1,
16070 ],
16071 ))))
16072 }
16073 DialectType::DuckDB => {
16074 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
16075 let date = Self::ensure_cast_timestamp(arg1);
16076 Ok(Expression::Function(Box::new(Function::new(
16077 "DATE_TRUNC".to_string(),
16078 vec![Expression::string(&unit_str), date],
16079 ))))
16080 }
16081 DialectType::ClickHouse => {
16082 // ClickHouse: dateTrunc('UNIT', expr)
16083 Ok(Expression::Function(Box::new(Function::new(
16084 "dateTrunc".to_string(),
16085 vec![Expression::string(&unit_str), arg1],
16086 ))))
16087 }
16088 _ => {
16089 // Standard: DATE_TRUNC('UNIT', expr)
16090 let unit = Expression::string(&unit_str);
16091 Ok(Expression::Function(Box::new(Function::new(
16092 "DATE_TRUNC".to_string(),
16093 vec![unit, arg1],
16094 ))))
16095 }
16096 }
16097 }
16098 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
16099 "GETDATE" if f.args.is_empty() => match target {
16100 DialectType::TSQL => Ok(Expression::Function(f)),
16101 DialectType::Redshift => Ok(Expression::Function(Box::new(
16102 Function::new("GETDATE".to_string(), vec![]),
16103 ))),
16104 _ => Ok(Expression::CurrentTimestamp(
16105 crate::expressions::CurrentTimestamp {
16106 precision: None,
16107 sysdate: false,
16108 },
16109 )),
16110 },
16111 // TO_HEX(x) / HEX(x) -> target-specific hex function
16112 "TO_HEX" | "HEX" if f.args.len() == 1 => {
16113 let name = match target {
16114 DialectType::Presto | DialectType::Trino => "TO_HEX",
16115 DialectType::Spark
16116 | DialectType::Databricks
16117 | DialectType::Hive => "HEX",
16118 DialectType::DuckDB
16119 | DialectType::PostgreSQL
16120 | DialectType::Redshift => "TO_HEX",
16121 _ => &f.name,
16122 };
16123 Ok(Expression::Function(Box::new(Function::new(
16124 name.to_string(),
16125 f.args,
16126 ))))
16127 }
16128 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
16129 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
16130 match target {
16131 DialectType::BigQuery => {
16132 // BigQuery: UNHEX(x) -> FROM_HEX(x)
16133 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
16134 // because BigQuery MD5 returns BYTES, not hex string
16135 let arg = &f.args[0];
16136 let wrapped_arg = match arg {
16137 Expression::Function(inner_f)
16138 if inner_f.name.eq_ignore_ascii_case("MD5")
16139 || inner_f.name.eq_ignore_ascii_case("SHA1")
16140 || inner_f.name.eq_ignore_ascii_case("SHA256")
16141 || inner_f.name.eq_ignore_ascii_case("SHA512") =>
16142 {
16143 // Wrap hash function in TO_HEX for BigQuery
16144 Expression::Function(Box::new(Function::new(
16145 "TO_HEX".to_string(),
16146 vec![arg.clone()],
16147 )))
16148 }
16149 _ => f.args.into_iter().next().unwrap(),
16150 };
16151 Ok(Expression::Function(Box::new(Function::new(
16152 "FROM_HEX".to_string(),
16153 vec![wrapped_arg],
16154 ))))
16155 }
16156 _ => {
16157 let name = match target {
16158 DialectType::Presto | DialectType::Trino => "FROM_HEX",
16159 DialectType::Spark
16160 | DialectType::Databricks
16161 | DialectType::Hive => "UNHEX",
16162 _ => &f.name,
16163 };
16164 Ok(Expression::Function(Box::new(Function::new(
16165 name.to_string(),
16166 f.args,
16167 ))))
16168 }
16169 }
16170 }
16171 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
16172 "TO_UTF8" if f.args.len() == 1 => match target {
16173 DialectType::Spark | DialectType::Databricks => {
16174 let mut args = f.args;
16175 args.push(Expression::string("utf-8"));
16176 Ok(Expression::Function(Box::new(Function::new(
16177 "ENCODE".to_string(),
16178 args,
16179 ))))
16180 }
16181 _ => Ok(Expression::Function(f)),
16182 },
16183 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
16184 "FROM_UTF8" if f.args.len() == 1 => match target {
16185 DialectType::Spark | DialectType::Databricks => {
16186 let mut args = f.args;
16187 args.push(Expression::string("utf-8"));
16188 Ok(Expression::Function(Box::new(Function::new(
16189 "DECODE".to_string(),
16190 args,
16191 ))))
16192 }
16193 _ => Ok(Expression::Function(f)),
16194 },
16195 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
16196 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
16197 let name = match target {
16198 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
16199 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
16200 DialectType::PostgreSQL | DialectType::Redshift => {
16201 "STARTS_WITH"
16202 }
16203 _ => &f.name,
16204 };
16205 Ok(Expression::Function(Box::new(Function::new(
16206 name.to_string(),
16207 f.args,
16208 ))))
16209 }
16210 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
16211 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
16212 let name = match target {
16213 DialectType::Presto
16214 | DialectType::Trino
16215 | DialectType::Athena => "APPROX_DISTINCT",
16216 _ => "APPROX_COUNT_DISTINCT",
16217 };
16218 Ok(Expression::Function(Box::new(Function::new(
16219 name.to_string(),
16220 f.args,
16221 ))))
16222 }
16223 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
16224 "JSON_EXTRACT"
16225 if f.args.len() == 2
16226 && !matches!(source, DialectType::BigQuery)
16227 && matches!(
16228 target,
16229 DialectType::Spark
16230 | DialectType::Databricks
16231 | DialectType::Hive
16232 ) =>
16233 {
16234 Ok(Expression::Function(Box::new(Function::new(
16235 "GET_JSON_OBJECT".to_string(),
16236 f.args,
16237 ))))
16238 }
16239 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
16240 "JSON_EXTRACT"
16241 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
16242 {
16243 let mut args = f.args;
16244 let path = args.remove(1);
16245 let this = args.remove(0);
16246 Ok(Expression::JsonExtract(Box::new(
16247 crate::expressions::JsonExtractFunc {
16248 this,
16249 path,
16250 returning: None,
16251 arrow_syntax: true,
16252 hash_arrow_syntax: false,
16253 wrapper_option: None,
16254 quotes_option: None,
16255 on_scalar_string: false,
16256 on_error: None,
16257 },
16258 )))
16259 }
16260 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
16261 "JSON_FORMAT" if f.args.len() == 1 => {
16262 match target {
16263 DialectType::Spark | DialectType::Databricks => {
16264 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
16265 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
16266 if matches!(
16267 source,
16268 DialectType::Presto
16269 | DialectType::Trino
16270 | DialectType::Athena
16271 ) {
16272 if let Some(Expression::ParseJson(pj)) = f.args.first()
16273 {
16274 if let Expression::Literal(lit) =
16275 &pj.this
16276 {
16277 if let Literal::String(s) = lit.as_ref() {
16278 let wrapped = Expression::Literal(Box::new(Literal::String(format!("[{}]", s)),));
16279 let schema_of_json = Expression::Function(
16280 Box::new(Function::new(
16281 "SCHEMA_OF_JSON".to_string(),
16282 vec![wrapped.clone()],
16283 )),
16284 );
16285 let from_json = Expression::Function(Box::new(
16286 Function::new(
16287 "FROM_JSON".to_string(),
16288 vec![wrapped, schema_of_json],
16289 ),
16290 ));
16291 let to_json = Expression::Function(Box::new(
16292 Function::new(
16293 "TO_JSON".to_string(),
16294 vec![from_json],
16295 ),
16296 ));
16297 return Ok(Expression::Function(Box::new(
16298 Function::new(
16299 "REGEXP_EXTRACT".to_string(),
16300 vec![
16301 to_json,
16302 Expression::Literal(Box::new(Literal::String(
16303 "^.(.*).$".to_string(),
16304 ),)),
16305 Expression::Literal(Box::new(Literal::Number(
16306 "1".to_string(),
16307 ),)),
16308 ],
16309 ),
16310 )));
16311 }
16312 }
16313 }
16314 }
16315
16316 // Strip inner CAST(... AS JSON) or TO_JSON() if present
16317 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
16318 let mut args = f.args;
16319 if let Some(Expression::Cast(ref c)) = args.first() {
16320 if matches!(&c.to, DataType::Json | DataType::JsonB) {
16321 args = vec![c.this.clone()];
16322 }
16323 } else if let Some(Expression::Function(ref inner_f)) =
16324 args.first()
16325 {
16326 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
16327 && inner_f.args.len() == 1
16328 {
16329 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
16330 args = inner_f.args.clone();
16331 }
16332 }
16333 Ok(Expression::Function(Box::new(Function::new(
16334 "TO_JSON".to_string(),
16335 args,
16336 ))))
16337 }
16338 DialectType::BigQuery => Ok(Expression::Function(Box::new(
16339 Function::new("TO_JSON_STRING".to_string(), f.args),
16340 ))),
16341 DialectType::DuckDB => {
16342 // CAST(TO_JSON(x) AS TEXT)
16343 let to_json = Expression::Function(Box::new(
16344 Function::new("TO_JSON".to_string(), f.args),
16345 ));
16346 Ok(Expression::Cast(Box::new(Cast {
16347 this: to_json,
16348 to: DataType::Text,
16349 trailing_comments: Vec::new(),
16350 double_colon_syntax: false,
16351 format: None,
16352 default: None,
16353 inferred_type: None,
16354 })))
16355 }
16356 _ => Ok(Expression::Function(f)),
16357 }
16358 }
16359 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
16360 "SYSDATE" if f.args.is_empty() => {
16361 match target {
16362 DialectType::Oracle | DialectType::Redshift => {
16363 Ok(Expression::Function(f))
16364 }
16365 DialectType::Snowflake => {
16366 // Snowflake uses SYSDATE() with parens
16367 let mut f = *f;
16368 f.no_parens = false;
16369 Ok(Expression::Function(Box::new(f)))
16370 }
16371 DialectType::DuckDB => {
16372 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
16373 Ok(Expression::AtTimeZone(Box::new(
16374 crate::expressions::AtTimeZone {
16375 this: Expression::CurrentTimestamp(
16376 crate::expressions::CurrentTimestamp {
16377 precision: None,
16378 sysdate: false,
16379 },
16380 ),
16381 zone: Expression::Literal(Box::new(Literal::String(
16382 "UTC".to_string(),
16383 ))),
16384 },
16385 )))
16386 }
16387 _ => Ok(Expression::CurrentTimestamp(
16388 crate::expressions::CurrentTimestamp {
16389 precision: None,
16390 sysdate: true,
16391 },
16392 )),
16393 }
16394 }
16395 // LOGICAL_OR(x) -> BOOL_OR(x)
16396 "LOGICAL_OR" if f.args.len() == 1 => {
16397 let name = match target {
16398 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
16399 _ => &f.name,
16400 };
16401 Ok(Expression::Function(Box::new(Function::new(
16402 name.to_string(),
16403 f.args,
16404 ))))
16405 }
16406 // LOGICAL_AND(x) -> BOOL_AND(x)
16407 "LOGICAL_AND" if f.args.len() == 1 => {
16408 let name = match target {
16409 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
16410 _ => &f.name,
16411 };
16412 Ok(Expression::Function(Box::new(Function::new(
16413 name.to_string(),
16414 f.args,
16415 ))))
16416 }
16417 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
16418 "MONTHS_ADD" if f.args.len() == 2 => match target {
16419 DialectType::Oracle => Ok(Expression::Function(Box::new(
16420 Function::new("ADD_MONTHS".to_string(), f.args),
16421 ))),
16422 _ => Ok(Expression::Function(f)),
16423 },
16424 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
16425 "ARRAY_JOIN" if f.args.len() >= 2 => {
16426 match target {
16427 DialectType::Spark | DialectType::Databricks => {
16428 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
16429 Ok(Expression::Function(f))
16430 }
16431 DialectType::Hive => {
16432 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
16433 let mut args = f.args;
16434 let arr = args.remove(0);
16435 let sep = args.remove(0);
16436 // Drop any remaining args (null_replacement)
16437 Ok(Expression::Function(Box::new(Function::new(
16438 "CONCAT_WS".to_string(),
16439 vec![sep, arr],
16440 ))))
16441 }
16442 DialectType::Presto | DialectType::Trino => {
16443 Ok(Expression::Function(f))
16444 }
16445 _ => Ok(Expression::Function(f)),
16446 }
16447 }
16448 // LOCATE(substr, str, pos) 3-arg -> target-specific
16449 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
16450 "LOCATE"
16451 if f.args.len() == 3
16452 && matches!(
16453 target,
16454 DialectType::Presto
16455 | DialectType::Trino
16456 | DialectType::Athena
16457 | DialectType::DuckDB
16458 ) =>
16459 {
16460 let mut args = f.args;
16461 let substr = args.remove(0);
16462 let string = args.remove(0);
16463 let pos = args.remove(0);
16464 // STRPOS(SUBSTRING(string, pos), substr)
16465 let substring_call = Expression::Function(Box::new(Function::new(
16466 "SUBSTRING".to_string(),
16467 vec![string.clone(), pos.clone()],
16468 )));
16469 let strpos_call = Expression::Function(Box::new(Function::new(
16470 "STRPOS".to_string(),
16471 vec![substring_call, substr.clone()],
16472 )));
16473 // STRPOS(...) + pos - 1
16474 let pos_adjusted =
16475 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
16476 Expression::Add(Box::new(
16477 crate::expressions::BinaryOp::new(
16478 strpos_call.clone(),
16479 pos.clone(),
16480 ),
16481 )),
16482 Expression::number(1),
16483 )));
16484 // STRPOS(...) = 0
16485 let is_zero =
16486 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
16487 strpos_call.clone(),
16488 Expression::number(0),
16489 )));
16490
16491 match target {
16492 DialectType::Presto
16493 | DialectType::Trino
16494 | DialectType::Athena => {
16495 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
16496 Ok(Expression::Function(Box::new(Function::new(
16497 "IF".to_string(),
16498 vec![is_zero, Expression::number(0), pos_adjusted],
16499 ))))
16500 }
16501 DialectType::DuckDB => {
16502 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
16503 Ok(Expression::Case(Box::new(crate::expressions::Case {
16504 operand: None,
16505 whens: vec![(is_zero, Expression::number(0))],
16506 else_: Some(pos_adjusted),
16507 comments: Vec::new(),
16508 inferred_type: None,
16509 })))
16510 }
16511 _ => Ok(Expression::Function(Box::new(Function::new(
16512 "LOCATE".to_string(),
16513 vec![substr, string, pos],
16514 )))),
16515 }
16516 }
16517 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
16518 "STRPOS"
16519 if f.args.len() == 3
16520 && matches!(
16521 target,
16522 DialectType::BigQuery
16523 | DialectType::Oracle
16524 | DialectType::Teradata
16525 ) =>
16526 {
16527 let mut args = f.args;
16528 let haystack = args.remove(0);
16529 let needle = args.remove(0);
16530 let occurrence = args.remove(0);
16531 Ok(Expression::Function(Box::new(Function::new(
16532 "INSTR".to_string(),
16533 vec![haystack, needle, Expression::number(1), occurrence],
16534 ))))
16535 }
16536 // SCHEMA_NAME(id) -> target-specific
16537 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
16538 DialectType::MySQL | DialectType::SingleStore => {
16539 Ok(Expression::Function(Box::new(Function::new(
16540 "SCHEMA".to_string(),
16541 vec![],
16542 ))))
16543 }
16544 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
16545 crate::expressions::CurrentSchema { this: None },
16546 ))),
16547 DialectType::SQLite => Ok(Expression::string("main")),
16548 _ => Ok(Expression::Function(f)),
16549 },
16550 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
16551 "STRTOL" if f.args.len() == 2 => match target {
16552 DialectType::Presto | DialectType::Trino => {
16553 Ok(Expression::Function(Box::new(Function::new(
16554 "FROM_BASE".to_string(),
16555 f.args,
16556 ))))
16557 }
16558 _ => Ok(Expression::Function(f)),
16559 },
16560 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
16561 "EDITDIST3" if f.args.len() == 2 => match target {
16562 DialectType::Spark | DialectType::Databricks => {
16563 Ok(Expression::Function(Box::new(Function::new(
16564 "LEVENSHTEIN".to_string(),
16565 f.args,
16566 ))))
16567 }
16568 _ => Ok(Expression::Function(f)),
16569 },
16570 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
16571 "FORMAT"
16572 if f.args.len() == 2
16573 && matches!(
16574 source,
16575 DialectType::MySQL | DialectType::SingleStore
16576 )
16577 && matches!(target, DialectType::DuckDB) =>
16578 {
16579 let mut args = f.args;
16580 let num_expr = args.remove(0);
16581 let decimals_expr = args.remove(0);
16582 // Extract decimal count
16583 let dec_count = match &decimals_expr {
16584 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => { let Literal::Number(n) = lit.as_ref() else { unreachable!() }; n.clone() },
16585 _ => "0".to_string(),
16586 };
16587 let fmt_str = format!("{{:,.{}f}}", dec_count);
16588 Ok(Expression::Function(Box::new(Function::new(
16589 "FORMAT".to_string(),
16590 vec![Expression::string(&fmt_str), num_expr],
16591 ))))
16592 }
16593 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
16594 "FORMAT"
16595 if f.args.len() == 2
16596 && matches!(
16597 source,
16598 DialectType::TSQL | DialectType::Fabric
16599 ) =>
16600 {
16601 let val_expr = f.args[0].clone();
16602 let fmt_expr = f.args[1].clone();
16603 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
16604 // Only expand shortcodes that are NOT also valid numeric format specifiers.
16605 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
16606 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
16607 let (expanded_fmt, is_shortcode) = match &fmt_expr {
16608 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
16609 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
16610 match s.as_str() {
16611 "m" | "M" => (Expression::string("MMMM d"), true),
16612 "t" => (Expression::string("h:mm tt"), true),
16613 "T" => (Expression::string("h:mm:ss tt"), true),
16614 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
16615 _ => (fmt_expr.clone(), false),
16616 }
16617 }
16618 _ => (fmt_expr.clone(), false),
16619 };
16620 // Check if the format looks like a date format
16621 let is_date_format = is_shortcode
16622 || match &expanded_fmt {
16623 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
16624 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
16625 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
16626 s.contains("yyyy")
16627 || s.contains("YYYY")
16628 || s.contains("MM")
16629 || s.contains("dd")
16630 || s.contains("MMMM")
16631 || s.contains("HH")
16632 || s.contains("hh")
16633 || s.contains("ss")
16634 }
16635 _ => false,
16636 };
16637 match target {
16638 DialectType::Spark | DialectType::Databricks => {
16639 let func_name = if is_date_format {
16640 "DATE_FORMAT"
16641 } else {
16642 "FORMAT_NUMBER"
16643 };
16644 Ok(Expression::Function(Box::new(Function::new(
16645 func_name.to_string(),
16646 vec![val_expr, expanded_fmt],
16647 ))))
16648 }
16649 _ => {
16650 // For TSQL and other targets, expand shortcodes but keep FORMAT
16651 if is_shortcode {
16652 Ok(Expression::Function(Box::new(Function::new(
16653 "FORMAT".to_string(),
16654 vec![val_expr, expanded_fmt],
16655 ))))
16656 } else {
16657 Ok(Expression::Function(f))
16658 }
16659 }
16660 }
16661 }
16662 // FORMAT('%s', x) from Trino/Presto -> target-specific
16663 "FORMAT"
16664 if f.args.len() >= 2
16665 && matches!(
16666 source,
16667 DialectType::Trino
16668 | DialectType::Presto
16669 | DialectType::Athena
16670 ) =>
16671 {
16672 let fmt_expr = f.args[0].clone();
16673 let value_args: Vec<Expression> = f.args[1..].to_vec();
16674 match target {
16675 // DuckDB: replace %s with {} in format string
16676 DialectType::DuckDB => {
16677 let new_fmt = match &fmt_expr {
16678 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
16679 let Literal::String(s) = lit.as_ref() else { unreachable!() };
16680 Expression::Literal(Box::new(Literal::String(
16681 s.replace("%s", "{}"),
16682 )))
16683 }
16684 _ => fmt_expr,
16685 };
16686 let mut args = vec![new_fmt];
16687 args.extend(value_args);
16688 Ok(Expression::Function(Box::new(Function::new(
16689 "FORMAT".to_string(),
16690 args,
16691 ))))
16692 }
16693 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
16694 DialectType::Snowflake => match &fmt_expr {
16695 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "%s" && value_args.len() == 1)
16696 =>
16697 {
16698 let Literal::String(_) = lit.as_ref() else { unreachable!() };
16699 Ok(Expression::Function(Box::new(Function::new(
16700 "TO_CHAR".to_string(),
16701 value_args,
16702 ))))
16703 }
16704 _ => Ok(Expression::Function(f)),
16705 },
16706 // Default: keep FORMAT as-is
16707 _ => Ok(Expression::Function(f)),
16708 }
16709 }
16710 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
16711 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
16712 if f.args.len() == 2 =>
16713 {
16714 // When coming from Snowflake source: ARRAY_CONTAINS(value, array)
16715 // args[0]=value, args[1]=array. For DuckDB target, swap and add NULL-aware CASE.
16716 if matches!(target, DialectType::DuckDB)
16717 && matches!(source, DialectType::Snowflake)
16718 && f.name.eq_ignore_ascii_case("ARRAY_CONTAINS")
16719 {
16720 let value = f.args[0].clone();
16721 let array = f.args[1].clone();
16722
16723 // value IS NULL
16724 let value_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
16725 this: value.clone(),
16726 not: false,
16727 postfix_form: false,
16728 }));
16729
16730 // ARRAY_LENGTH(array)
16731 let array_length = Expression::Function(Box::new(Function::new(
16732 "ARRAY_LENGTH".to_string(),
16733 vec![array.clone()],
16734 )));
16735 // LIST_COUNT(array)
16736 let list_count = Expression::Function(Box::new(Function::new(
16737 "LIST_COUNT".to_string(),
16738 vec![array.clone()],
16739 )));
16740 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
16741 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
16742 left: array_length,
16743 right: list_count,
16744 left_comments: vec![],
16745 operator_comments: vec![],
16746 trailing_comments: vec![],
16747 inferred_type: None,
16748 }));
16749 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
16750 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
16751 this: Box::new(neq),
16752 expression: Box::new(Expression::Boolean(crate::expressions::BooleanLiteral { value: false })),
16753 }));
16754
16755 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
16756 let array_contains = Expression::Function(Box::new(Function::new(
16757 "ARRAY_CONTAINS".to_string(),
16758 vec![array, value],
16759 )));
16760
16761 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
16762 return Ok(Expression::Case(Box::new(Case {
16763 operand: None,
16764 whens: vec![(value_is_null, nullif)],
16765 else_: Some(array_contains),
16766 comments: Vec::new(),
16767 inferred_type: None,
16768 })));
16769 }
16770 match target {
16771 DialectType::PostgreSQL | DialectType::Redshift => {
16772 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
16773 let arr = f.args[0].clone();
16774 let needle = f.args[1].clone();
16775 // Convert [] to ARRAY[] for PostgreSQL
16776 let pg_arr = match arr {
16777 Expression::Array(a) => Expression::ArrayFunc(
16778 Box::new(crate::expressions::ArrayConstructor {
16779 expressions: a.expressions,
16780 bracket_notation: false,
16781 use_list_keyword: false,
16782 }),
16783 ),
16784 _ => arr,
16785 };
16786 // needle = ANY(arr) using the Any quantified expression
16787 let any_expr = Expression::Any(Box::new(
16788 crate::expressions::QuantifiedExpr {
16789 this: needle.clone(),
16790 subquery: pg_arr,
16791 op: Some(crate::expressions::QuantifiedOp::Eq),
16792 },
16793 ));
16794 let coalesce = Expression::Coalesce(Box::new(
16795 crate::expressions::VarArgFunc {
16796 expressions: vec![
16797 any_expr,
16798 Expression::Boolean(
16799 crate::expressions::BooleanLiteral {
16800 value: false,
16801 },
16802 ),
16803 ],
16804 original_name: None,
16805 inferred_type: None,
16806 },
16807 ));
16808 let is_null_check = Expression::IsNull(Box::new(
16809 crate::expressions::IsNull {
16810 this: needle,
16811 not: false,
16812 postfix_form: false,
16813 },
16814 ));
16815 Ok(Expression::Case(Box::new(Case {
16816 operand: None,
16817 whens: vec![(
16818 is_null_check,
16819 Expression::Null(crate::expressions::Null),
16820 )],
16821 else_: Some(coalesce),
16822 comments: Vec::new(),
16823 inferred_type: None,
16824 })))
16825 }
16826 _ => Ok(Expression::Function(Box::new(Function::new(
16827 "ARRAY_CONTAINS".to_string(),
16828 f.args,
16829 )))),
16830 }
16831 }
16832 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
16833 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
16834 match target {
16835 DialectType::PostgreSQL | DialectType::Redshift => {
16836 // arr1 && arr2 with ARRAY[] syntax
16837 let mut args = f.args;
16838 let arr1 = args.remove(0);
16839 let arr2 = args.remove(0);
16840 let pg_arr1 = match arr1 {
16841 Expression::Array(a) => Expression::ArrayFunc(
16842 Box::new(crate::expressions::ArrayConstructor {
16843 expressions: a.expressions,
16844 bracket_notation: false,
16845 use_list_keyword: false,
16846 }),
16847 ),
16848 _ => arr1,
16849 };
16850 let pg_arr2 = match arr2 {
16851 Expression::Array(a) => Expression::ArrayFunc(
16852 Box::new(crate::expressions::ArrayConstructor {
16853 expressions: a.expressions,
16854 bracket_notation: false,
16855 use_list_keyword: false,
16856 }),
16857 ),
16858 _ => arr2,
16859 };
16860 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
16861 pg_arr1, pg_arr2,
16862 ))))
16863 }
16864 DialectType::DuckDB => {
16865 // DuckDB: arr1 && arr2 (native support)
16866 let mut args = f.args;
16867 let arr1 = args.remove(0);
16868 let arr2 = args.remove(0);
16869 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
16870 arr1, arr2,
16871 ))))
16872 }
16873 _ => Ok(Expression::Function(Box::new(Function::new(
16874 "LIST_HAS_ANY".to_string(),
16875 f.args,
16876 )))),
16877 }
16878 }
16879 // APPROX_QUANTILE(x, q) -> target-specific
16880 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
16881 DialectType::Snowflake => Ok(Expression::Function(Box::new(
16882 Function::new("APPROX_PERCENTILE".to_string(), f.args),
16883 ))),
16884 DialectType::DuckDB => Ok(Expression::Function(f)),
16885 _ => Ok(Expression::Function(f)),
16886 },
16887 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
16888 "MAKE_DATE" if f.args.len() == 3 => match target {
16889 DialectType::BigQuery => Ok(Expression::Function(Box::new(
16890 Function::new("DATE".to_string(), f.args),
16891 ))),
16892 _ => Ok(Expression::Function(f)),
16893 },
16894 // RANGE(start, end[, step]) -> target-specific
16895 "RANGE"
16896 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
16897 {
16898 let start = f.args[0].clone();
16899 let end = f.args[1].clone();
16900 let step = f.args.get(2).cloned();
16901 match target {
16902 // Snowflake ARRAY_GENERATE_RANGE uses exclusive end (same as DuckDB RANGE),
16903 // so just rename without adjusting the end argument.
16904 DialectType::Snowflake => {
16905 let mut args = vec![start, end];
16906 if let Some(s) = step {
16907 args.push(s);
16908 }
16909 Ok(Expression::Function(Box::new(Function::new(
16910 "ARRAY_GENERATE_RANGE".to_string(),
16911 args,
16912 ))))
16913 }
16914 DialectType::Spark | DialectType::Databricks => {
16915 // RANGE(start, end) -> SEQUENCE(start, end-1)
16916 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
16917 // RANGE(start, start) -> ARRAY() (empty)
16918 // RANGE(start, end, 0) -> ARRAY() (empty)
16919 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
16920
16921 // Check for constant args
16922 fn extract_i64(e: &Expression) -> Option<i64> {
16923 match e {
16924 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
16925 let Literal::Number(n) = lit.as_ref() else { unreachable!() };
16926 n.parse::<i64>().ok()
16927 }
16928 Expression::Neg(u) => {
16929 if let Expression::Literal(lit) =
16930 &u.this
16931 {
16932 if let Literal::Number(n) = lit.as_ref() {
16933 n.parse::<i64>().ok().map(|v| -v)
16934 } else { None }
16935 } else {
16936 None
16937 }
16938 }
16939 _ => None,
16940 }
16941 }
16942 let start_val = extract_i64(&start);
16943 let end_val = extract_i64(&end);
16944 let step_val = step.as_ref().and_then(|s| extract_i64(s));
16945
16946 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
16947 if step_val == Some(0) {
16948 return Ok(Expression::Function(Box::new(
16949 Function::new("ARRAY".to_string(), vec![]),
16950 )));
16951 }
16952 if let (Some(s), Some(e_val)) = (start_val, end_val) {
16953 if s == e_val {
16954 return Ok(Expression::Function(Box::new(
16955 Function::new("ARRAY".to_string(), vec![]),
16956 )));
16957 }
16958 }
16959
16960 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
16961 // All constants - compute new end = end - step (if step provided) or end - 1
16962 match step_val {
16963 Some(st) if st < 0 => {
16964 // Negative step: SEQUENCE(start, end - step, step)
16965 let new_end = e_val - st; // end - step (= end + |step|)
16966 let mut args =
16967 vec![start, Expression::number(new_end)];
16968 if let Some(s) = step {
16969 args.push(s);
16970 }
16971 Ok(Expression::Function(Box::new(
16972 Function::new("SEQUENCE".to_string(), args),
16973 )))
16974 }
16975 Some(st) => {
16976 let new_end = e_val - st;
16977 let mut args =
16978 vec![start, Expression::number(new_end)];
16979 if let Some(s) = step {
16980 args.push(s);
16981 }
16982 Ok(Expression::Function(Box::new(
16983 Function::new("SEQUENCE".to_string(), args),
16984 )))
16985 }
16986 None => {
16987 // No step: SEQUENCE(start, end - 1)
16988 let new_end = e_val - 1;
16989 Ok(Expression::Function(Box::new(
16990 Function::new(
16991 "SEQUENCE".to_string(),
16992 vec![
16993 start,
16994 Expression::number(new_end),
16995 ],
16996 ),
16997 )))
16998 }
16999 }
17000 } else {
17001 // Variable end: IF((end - 1) < start, ARRAY(), SEQUENCE(start, (end - 1)))
17002 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
17003 end.clone(),
17004 Expression::number(1),
17005 )));
17006 let cond = Expression::Lt(Box::new(BinaryOp::new(
17007 Expression::Paren(Box::new(Paren {
17008 this: end_m1.clone(),
17009 trailing_comments: Vec::new(),
17010 })),
17011 start.clone(),
17012 )));
17013 let empty = Expression::Function(Box::new(
17014 Function::new("ARRAY".to_string(), vec![]),
17015 ));
17016 let mut seq_args = vec![
17017 start,
17018 Expression::Paren(Box::new(Paren {
17019 this: end_m1,
17020 trailing_comments: Vec::new(),
17021 })),
17022 ];
17023 if let Some(s) = step {
17024 seq_args.push(s);
17025 }
17026 let seq = Expression::Function(Box::new(
17027 Function::new("SEQUENCE".to_string(), seq_args),
17028 ));
17029 Ok(Expression::IfFunc(Box::new(
17030 crate::expressions::IfFunc {
17031 condition: cond,
17032 true_value: empty,
17033 false_value: Some(seq),
17034 original_name: None,
17035 inferred_type: None,
17036 },
17037 )))
17038 }
17039 }
17040 DialectType::SQLite => {
17041 // RANGE(start, end) -> GENERATE_SERIES(start, end)
17042 // The subquery wrapping is handled at the Alias level
17043 let mut args = vec![start, end];
17044 if let Some(s) = step {
17045 args.push(s);
17046 }
17047 Ok(Expression::Function(Box::new(Function::new(
17048 "GENERATE_SERIES".to_string(),
17049 args,
17050 ))))
17051 }
17052 _ => Ok(Expression::Function(f)),
17053 }
17054 }
17055 // ARRAY_REVERSE_SORT -> target-specific
17056 // (handled above as well, but also need DuckDB self-normalization)
17057 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
17058 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
17059 DialectType::Snowflake => Ok(Expression::Function(Box::new(
17060 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
17061 ))),
17062 DialectType::Spark | DialectType::Databricks => {
17063 Ok(Expression::Function(Box::new(Function::new(
17064 "MAP_FROM_ARRAYS".to_string(),
17065 f.args,
17066 ))))
17067 }
17068 _ => Ok(Expression::Function(Box::new(Function::new(
17069 "MAP".to_string(),
17070 f.args,
17071 )))),
17072 },
17073 // VARIANCE(x) -> varSamp(x) for ClickHouse
17074 "VARIANCE" if f.args.len() == 1 => match target {
17075 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
17076 Function::new("varSamp".to_string(), f.args),
17077 ))),
17078 _ => Ok(Expression::Function(f)),
17079 },
17080 // STDDEV(x) -> stddevSamp(x) for ClickHouse
17081 "STDDEV" if f.args.len() == 1 => match target {
17082 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
17083 Function::new("stddevSamp".to_string(), f.args),
17084 ))),
17085 _ => Ok(Expression::Function(f)),
17086 },
17087 // ISINF(x) -> IS_INF(x) for BigQuery
17088 "ISINF" if f.args.len() == 1 => match target {
17089 DialectType::BigQuery => Ok(Expression::Function(Box::new(
17090 Function::new("IS_INF".to_string(), f.args),
17091 ))),
17092 _ => Ok(Expression::Function(f)),
17093 },
17094 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
17095 "CONTAINS" if f.args.len() == 2 => match target {
17096 DialectType::Spark
17097 | DialectType::Databricks
17098 | DialectType::Hive => Ok(Expression::Function(Box::new(
17099 Function::new("ARRAY_CONTAINS".to_string(), f.args),
17100 ))),
17101 _ => Ok(Expression::Function(f)),
17102 },
17103 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
17104 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
17105 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
17106 Ok(Expression::Function(Box::new(Function::new(
17107 "CONTAINS".to_string(),
17108 f.args,
17109 ))))
17110 }
17111 DialectType::DuckDB => Ok(Expression::Function(Box::new(
17112 Function::new("ARRAY_CONTAINS".to_string(), f.args),
17113 ))),
17114 _ => Ok(Expression::Function(f)),
17115 },
17116 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
17117 "TO_UNIXTIME" if f.args.len() == 1 => match target {
17118 DialectType::Hive
17119 | DialectType::Spark
17120 | DialectType::Databricks => Ok(Expression::Function(Box::new(
17121 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
17122 ))),
17123 _ => Ok(Expression::Function(f)),
17124 },
17125 // FROM_UNIXTIME(x) -> target-specific
17126 "FROM_UNIXTIME" if f.args.len() == 1 => {
17127 match target {
17128 DialectType::Hive
17129 | DialectType::Spark
17130 | DialectType::Databricks
17131 | DialectType::Presto
17132 | DialectType::Trino => Ok(Expression::Function(f)),
17133 DialectType::DuckDB => {
17134 // DuckDB: TO_TIMESTAMP(x)
17135 let arg = f.args.into_iter().next().unwrap();
17136 Ok(Expression::Function(Box::new(Function::new(
17137 "TO_TIMESTAMP".to_string(),
17138 vec![arg],
17139 ))))
17140 }
17141 DialectType::PostgreSQL => {
17142 // PG: TO_TIMESTAMP(col)
17143 let arg = f.args.into_iter().next().unwrap();
17144 Ok(Expression::Function(Box::new(Function::new(
17145 "TO_TIMESTAMP".to_string(),
17146 vec![arg],
17147 ))))
17148 }
17149 DialectType::Redshift => {
17150 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
17151 let arg = f.args.into_iter().next().unwrap();
17152 let epoch_ts = Expression::Literal(Box::new(Literal::Timestamp(
17153 "epoch".to_string(),
17154 )));
17155 let interval = Expression::Interval(Box::new(
17156 crate::expressions::Interval {
17157 this: Some(Expression::string("1 SECOND")),
17158 unit: None,
17159 },
17160 ));
17161 let mul =
17162 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
17163 let add =
17164 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
17165 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
17166 this: add,
17167 trailing_comments: Vec::new(),
17168 })))
17169 }
17170 _ => Ok(Expression::Function(f)),
17171 }
17172 }
17173 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
17174 "FROM_UNIXTIME"
17175 if f.args.len() == 2
17176 && matches!(
17177 source,
17178 DialectType::Hive
17179 | DialectType::Spark
17180 | DialectType::Databricks
17181 ) =>
17182 {
17183 let mut args = f.args;
17184 let unix_ts = args.remove(0);
17185 let fmt_expr = args.remove(0);
17186 match target {
17187 DialectType::DuckDB => {
17188 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
17189 let to_ts = Expression::Function(Box::new(Function::new(
17190 "TO_TIMESTAMP".to_string(),
17191 vec![unix_ts],
17192 )));
17193 if let Expression::Literal(lit) = &fmt_expr
17194 {
17195 if let crate::expressions::Literal::String(s) = lit.as_ref() {
17196 let c_fmt = Self::hive_format_to_c_format(s);
17197 Ok(Expression::Function(Box::new(Function::new(
17198 "STRFTIME".to_string(),
17199 vec![to_ts, Expression::string(&c_fmt)],
17200 ))))
17201 } else {
17202 Ok(Expression::Function(Box::new(Function::new(
17203 "STRFTIME".to_string(),
17204 vec![to_ts, fmt_expr],
17205 ))))
17206 }
17207 } else {
17208 Ok(Expression::Function(Box::new(Function::new(
17209 "STRFTIME".to_string(),
17210 vec![to_ts, fmt_expr],
17211 ))))
17212 }
17213 }
17214 DialectType::Presto
17215 | DialectType::Trino
17216 | DialectType::Athena => {
17217 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
17218 let from_unix =
17219 Expression::Function(Box::new(Function::new(
17220 "FROM_UNIXTIME".to_string(),
17221 vec![unix_ts],
17222 )));
17223 if let Expression::Literal(lit) = &fmt_expr
17224 {
17225 if let crate::expressions::Literal::String(s) = lit.as_ref() {
17226 let p_fmt = Self::hive_format_to_presto_format(s);
17227 Ok(Expression::Function(Box::new(Function::new(
17228 "DATE_FORMAT".to_string(),
17229 vec![from_unix, Expression::string(&p_fmt)],
17230 ))))
17231 } else {
17232 Ok(Expression::Function(Box::new(Function::new(
17233 "DATE_FORMAT".to_string(),
17234 vec![from_unix, fmt_expr],
17235 ))))
17236 }
17237 } else {
17238 Ok(Expression::Function(Box::new(Function::new(
17239 "DATE_FORMAT".to_string(),
17240 vec![from_unix, fmt_expr],
17241 ))))
17242 }
17243 }
17244 _ => {
17245 // Keep as FROM_UNIXTIME(x, fmt) for other targets
17246 Ok(Expression::Function(Box::new(Function::new(
17247 "FROM_UNIXTIME".to_string(),
17248 vec![unix_ts, fmt_expr],
17249 ))))
17250 }
17251 }
17252 }
17253 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
17254 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
17255 let unit_str = Self::get_unit_str_static(&f.args[0]);
17256 // Get the raw unit text preserving original case
17257 let raw_unit = match &f.args[0] {
17258 Expression::Identifier(id) => id.name.clone(),
17259 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
17260 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
17261 s.clone()
17262 }
17263 Expression::Column(col) => col.name.name.clone(),
17264 _ => unit_str.clone(),
17265 };
17266 match target {
17267 DialectType::TSQL | DialectType::Fabric => {
17268 // Preserve original case of unit for TSQL
17269 let unit_name = match unit_str.as_str() {
17270 "YY" | "YYYY" => "YEAR".to_string(),
17271 "QQ" | "Q" => "QUARTER".to_string(),
17272 "MM" | "M" => "MONTH".to_string(),
17273 "WK" | "WW" => "WEEK".to_string(),
17274 "DD" | "D" | "DY" => "DAY".to_string(),
17275 "HH" => "HOUR".to_string(),
17276 "MI" | "N" => "MINUTE".to_string(),
17277 "SS" | "S" => "SECOND".to_string(),
17278 _ => raw_unit.clone(), // preserve original case
17279 };
17280 let mut args = f.args;
17281 args[0] =
17282 Expression::Identifier(Identifier::new(&unit_name));
17283 Ok(Expression::Function(Box::new(Function::new(
17284 "DATEPART".to_string(),
17285 args,
17286 ))))
17287 }
17288 DialectType::Spark | DialectType::Databricks => {
17289 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
17290 // Preserve original case for non-abbreviation units
17291 let unit = match unit_str.as_str() {
17292 "YY" | "YYYY" => "YEAR".to_string(),
17293 "QQ" | "Q" => "QUARTER".to_string(),
17294 "MM" | "M" => "MONTH".to_string(),
17295 "WK" | "WW" => "WEEK".to_string(),
17296 "DD" | "D" | "DY" => "DAY".to_string(),
17297 "HH" => "HOUR".to_string(),
17298 "MI" | "N" => "MINUTE".to_string(),
17299 "SS" | "S" => "SECOND".to_string(),
17300 _ => raw_unit, // preserve original case
17301 };
17302 Ok(Expression::Extract(Box::new(
17303 crate::expressions::ExtractFunc {
17304 this: f.args[1].clone(),
17305 field: crate::expressions::DateTimeField::Custom(
17306 unit,
17307 ),
17308 },
17309 )))
17310 }
17311 _ => Ok(Expression::Function(Box::new(Function::new(
17312 "DATE_PART".to_string(),
17313 f.args,
17314 )))),
17315 }
17316 }
17317 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
17318 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
17319 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
17320 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
17321 "DATENAME" if f.args.len() == 2 => {
17322 let unit_str = Self::get_unit_str_static(&f.args[0]);
17323 let date_expr = f.args[1].clone();
17324 match unit_str.as_str() {
17325 "MM" | "M" | "MONTH" => match target {
17326 DialectType::TSQL => {
17327 let cast_date = Expression::Cast(Box::new(
17328 crate::expressions::Cast {
17329 this: date_expr,
17330 to: DataType::Custom {
17331 name: "DATETIME2".to_string(),
17332 },
17333 trailing_comments: Vec::new(),
17334 double_colon_syntax: false,
17335 format: None,
17336 default: None,
17337 inferred_type: None,
17338 },
17339 ));
17340 Ok(Expression::Function(Box::new(Function::new(
17341 "FORMAT".to_string(),
17342 vec![cast_date, Expression::string("MMMM")],
17343 ))))
17344 }
17345 DialectType::Spark | DialectType::Databricks => {
17346 let cast_date = Expression::Cast(Box::new(
17347 crate::expressions::Cast {
17348 this: date_expr,
17349 to: DataType::Timestamp {
17350 timezone: false,
17351 precision: None,
17352 },
17353 trailing_comments: Vec::new(),
17354 double_colon_syntax: false,
17355 format: None,
17356 default: None,
17357 inferred_type: None,
17358 },
17359 ));
17360 Ok(Expression::Function(Box::new(Function::new(
17361 "DATE_FORMAT".to_string(),
17362 vec![cast_date, Expression::string("MMMM")],
17363 ))))
17364 }
17365 _ => Ok(Expression::Function(f)),
17366 },
17367 "DW" | "WEEKDAY" => match target {
17368 DialectType::TSQL => {
17369 let cast_date = Expression::Cast(Box::new(
17370 crate::expressions::Cast {
17371 this: date_expr,
17372 to: DataType::Custom {
17373 name: "DATETIME2".to_string(),
17374 },
17375 trailing_comments: Vec::new(),
17376 double_colon_syntax: false,
17377 format: None,
17378 default: None,
17379 inferred_type: None,
17380 },
17381 ));
17382 Ok(Expression::Function(Box::new(Function::new(
17383 "FORMAT".to_string(),
17384 vec![cast_date, Expression::string("dddd")],
17385 ))))
17386 }
17387 DialectType::Spark | DialectType::Databricks => {
17388 let cast_date = Expression::Cast(Box::new(
17389 crate::expressions::Cast {
17390 this: date_expr,
17391 to: DataType::Timestamp {
17392 timezone: false,
17393 precision: None,
17394 },
17395 trailing_comments: Vec::new(),
17396 double_colon_syntax: false,
17397 format: None,
17398 default: None,
17399 inferred_type: None,
17400 },
17401 ));
17402 Ok(Expression::Function(Box::new(Function::new(
17403 "DATE_FORMAT".to_string(),
17404 vec![cast_date, Expression::string("EEEE")],
17405 ))))
17406 }
17407 _ => Ok(Expression::Function(f)),
17408 },
17409 _ => Ok(Expression::Function(f)),
17410 }
17411 }
17412 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
17413 "STRING_AGG" if f.args.len() >= 2 => {
17414 let x = f.args[0].clone();
17415 let sep = f.args[1].clone();
17416 match target {
17417 DialectType::MySQL
17418 | DialectType::SingleStore
17419 | DialectType::Doris
17420 | DialectType::StarRocks => Ok(Expression::GroupConcat(
17421 Box::new(crate::expressions::GroupConcatFunc {
17422 this: x,
17423 separator: Some(sep),
17424 order_by: None,
17425 distinct: false,
17426 filter: None,
17427 inferred_type: None,
17428 }),
17429 )),
17430 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
17431 crate::expressions::GroupConcatFunc {
17432 this: x,
17433 separator: Some(sep),
17434 order_by: None,
17435 distinct: false,
17436 filter: None,
17437 inferred_type: None,
17438 },
17439 ))),
17440 DialectType::PostgreSQL | DialectType::Redshift => {
17441 Ok(Expression::StringAgg(Box::new(
17442 crate::expressions::StringAggFunc {
17443 this: x,
17444 separator: Some(sep),
17445 order_by: None,
17446 distinct: false,
17447 filter: None,
17448 limit: None,
17449 inferred_type: None,
17450 },
17451 )))
17452 }
17453 _ => Ok(Expression::Function(f)),
17454 }
17455 }
17456 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
17457 "JSON_ARRAYAGG" => match target {
17458 DialectType::PostgreSQL => {
17459 Ok(Expression::Function(Box::new(Function {
17460 name: "JSON_AGG".to_string(),
17461 ..(*f)
17462 })))
17463 }
17464 _ => Ok(Expression::Function(f)),
17465 },
17466 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
17467 "SCHEMA_NAME" => match target {
17468 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
17469 crate::expressions::CurrentSchema { this: None },
17470 ))),
17471 DialectType::SQLite => Ok(Expression::string("main")),
17472 _ => Ok(Expression::Function(f)),
17473 },
17474 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
17475 "TO_TIMESTAMP"
17476 if f.args.len() == 2
17477 && matches!(
17478 source,
17479 DialectType::Spark
17480 | DialectType::Databricks
17481 | DialectType::Hive
17482 )
17483 && matches!(target, DialectType::DuckDB) =>
17484 {
17485 let mut args = f.args;
17486 let val = args.remove(0);
17487 let fmt_expr = args.remove(0);
17488 if let Expression::Literal(ref lit) = fmt_expr {
17489 if let Literal::String(ref s) = lit.as_ref() {
17490 // Convert Java/Spark format to C strptime format
17491 fn java_to_c_fmt(fmt: &str) -> String {
17492 let result = fmt
17493 .replace("yyyy", "%Y")
17494 .replace("SSSSSS", "%f")
17495 .replace("EEEE", "%W")
17496 .replace("MM", "%m")
17497 .replace("dd", "%d")
17498 .replace("HH", "%H")
17499 .replace("mm", "%M")
17500 .replace("ss", "%S")
17501 .replace("yy", "%y");
17502 let mut out = String::new();
17503 let chars: Vec<char> = result.chars().collect();
17504 let mut i = 0;
17505 while i < chars.len() {
17506 if chars[i] == '%' && i + 1 < chars.len() {
17507 out.push(chars[i]);
17508 out.push(chars[i + 1]);
17509 i += 2;
17510 } else if chars[i] == 'z' {
17511 out.push_str("%Z");
17512 i += 1;
17513 } else if chars[i] == 'Z' {
17514 out.push_str("%z");
17515 i += 1;
17516 } else {
17517 out.push(chars[i]);
17518 i += 1;
17519 }
17520 }
17521 out
17522 }
17523 let c_fmt = java_to_c_fmt(s);
17524 Ok(Expression::Function(Box::new(Function::new(
17525 "STRPTIME".to_string(),
17526 vec![val, Expression::string(&c_fmt)],
17527 ))))
17528 } else {
17529 Ok(Expression::Function(Box::new(Function::new(
17530 "STRPTIME".to_string(),
17531 vec![val, fmt_expr],
17532 ))))
17533 }
17534 } else {
17535 Ok(Expression::Function(Box::new(Function::new(
17536 "STRPTIME".to_string(),
17537 vec![val, fmt_expr],
17538 ))))
17539 }
17540 }
17541 // TO_DATE(x) 1-arg from Doris: date conversion
17542 "TO_DATE"
17543 if f.args.len() == 1
17544 && matches!(
17545 source,
17546 DialectType::Doris | DialectType::StarRocks
17547 ) =>
17548 {
17549 let arg = f.args.into_iter().next().unwrap();
17550 match target {
17551 DialectType::Oracle
17552 | DialectType::DuckDB
17553 | DialectType::TSQL => {
17554 // CAST(x AS DATE)
17555 Ok(Expression::Cast(Box::new(Cast {
17556 this: arg,
17557 to: DataType::Date,
17558 double_colon_syntax: false,
17559 trailing_comments: vec![],
17560 format: None,
17561 default: None,
17562 inferred_type: None,
17563 })))
17564 }
17565 DialectType::MySQL | DialectType::SingleStore => {
17566 // DATE(x)
17567 Ok(Expression::Function(Box::new(Function::new(
17568 "DATE".to_string(),
17569 vec![arg],
17570 ))))
17571 }
17572 _ => {
17573 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
17574 Ok(Expression::Function(Box::new(Function::new(
17575 "TO_DATE".to_string(),
17576 vec![arg],
17577 ))))
17578 }
17579 }
17580 }
17581 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
17582 "TO_DATE"
17583 if f.args.len() == 1
17584 && matches!(
17585 source,
17586 DialectType::Spark
17587 | DialectType::Databricks
17588 | DialectType::Hive
17589 ) =>
17590 {
17591 let arg = f.args.into_iter().next().unwrap();
17592 match target {
17593 DialectType::DuckDB => {
17594 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
17595 Ok(Expression::TryCast(Box::new(Cast {
17596 this: arg,
17597 to: DataType::Date,
17598 double_colon_syntax: false,
17599 trailing_comments: vec![],
17600 format: None,
17601 default: None,
17602 inferred_type: None,
17603 })))
17604 }
17605 DialectType::Presto
17606 | DialectType::Trino
17607 | DialectType::Athena => {
17608 // CAST(CAST(x AS TIMESTAMP) AS DATE)
17609 Ok(Self::double_cast_timestamp_date(arg))
17610 }
17611 DialectType::Snowflake => {
17612 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
17613 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
17614 Ok(Expression::Function(Box::new(Function::new(
17615 "TRY_TO_DATE".to_string(),
17616 vec![arg, Expression::string("yyyy-mm-DD")],
17617 ))))
17618 }
17619 _ => {
17620 // Default: keep as TO_DATE(x)
17621 Ok(Expression::Function(Box::new(Function::new(
17622 "TO_DATE".to_string(),
17623 vec![arg],
17624 ))))
17625 }
17626 }
17627 }
17628 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
17629 "TO_DATE"
17630 if f.args.len() == 2
17631 && matches!(
17632 source,
17633 DialectType::Spark
17634 | DialectType::Databricks
17635 | DialectType::Hive
17636 ) =>
17637 {
17638 let mut args = f.args;
17639 let val = args.remove(0);
17640 let fmt_expr = args.remove(0);
17641 let is_default_format = matches!(&fmt_expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "yyyy-MM-dd"));
17642
17643 if is_default_format {
17644 // Default format: same as 1-arg form
17645 match target {
17646 DialectType::DuckDB => {
17647 Ok(Expression::TryCast(Box::new(Cast {
17648 this: val,
17649 to: DataType::Date,
17650 double_colon_syntax: false,
17651 trailing_comments: vec![],
17652 format: None,
17653 default: None,
17654 inferred_type: None,
17655 })))
17656 }
17657 DialectType::Presto
17658 | DialectType::Trino
17659 | DialectType::Athena => {
17660 Ok(Self::double_cast_timestamp_date(val))
17661 }
17662 DialectType::Snowflake => {
17663 // TRY_TO_DATE(x, format) with Snowflake format mapping
17664 let sf_fmt = "yyyy-MM-dd"
17665 .replace("yyyy", "yyyy")
17666 .replace("MM", "mm")
17667 .replace("dd", "DD");
17668 Ok(Expression::Function(Box::new(Function::new(
17669 "TRY_TO_DATE".to_string(),
17670 vec![val, Expression::string(&sf_fmt)],
17671 ))))
17672 }
17673 _ => Ok(Expression::Function(Box::new(Function::new(
17674 "TO_DATE".to_string(),
17675 vec![val],
17676 )))),
17677 }
17678 } else {
17679 // Non-default format: use format-based parsing
17680 if let Expression::Literal(ref lit) = fmt_expr {
17681 if let Literal::String(ref s) = lit.as_ref() {
17682 match target {
17683 DialectType::DuckDB => {
17684 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
17685 fn java_to_c_fmt_todate(fmt: &str) -> String {
17686 let result = fmt
17687 .replace("yyyy", "%Y")
17688 .replace("SSSSSS", "%f")
17689 .replace("EEEE", "%W")
17690 .replace("MM", "%m")
17691 .replace("dd", "%d")
17692 .replace("HH", "%H")
17693 .replace("mm", "%M")
17694 .replace("ss", "%S")
17695 .replace("yy", "%y");
17696 let mut out = String::new();
17697 let chars: Vec<char> = result.chars().collect();
17698 let mut i = 0;
17699 while i < chars.len() {
17700 if chars[i] == '%' && i + 1 < chars.len() {
17701 out.push(chars[i]);
17702 out.push(chars[i + 1]);
17703 i += 2;
17704 } else if chars[i] == 'z' {
17705 out.push_str("%Z");
17706 i += 1;
17707 } else if chars[i] == 'Z' {
17708 out.push_str("%z");
17709 i += 1;
17710 } else {
17711 out.push(chars[i]);
17712 i += 1;
17713 }
17714 }
17715 out
17716 }
17717 let c_fmt = java_to_c_fmt_todate(s);
17718 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
17719 let try_strptime =
17720 Expression::Function(Box::new(Function::new(
17721 "TRY_STRPTIME".to_string(),
17722 vec![val, Expression::string(&c_fmt)],
17723 )));
17724 let cast_ts = Expression::Cast(Box::new(Cast {
17725 this: try_strptime,
17726 to: DataType::Timestamp {
17727 precision: None,
17728 timezone: false,
17729 },
17730 double_colon_syntax: false,
17731 trailing_comments: vec![],
17732 format: None,
17733 default: None,
17734 inferred_type: None,
17735 }));
17736 Ok(Expression::Cast(Box::new(Cast {
17737 this: cast_ts,
17738 to: DataType::Date,
17739 double_colon_syntax: false,
17740 trailing_comments: vec![],
17741 format: None,
17742 default: None,
17743 inferred_type: None,
17744 })))
17745 }
17746 DialectType::Presto
17747 | DialectType::Trino
17748 | DialectType::Athena => {
17749 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
17750 let p_fmt = s
17751 .replace("yyyy", "%Y")
17752 .replace("SSSSSS", "%f")
17753 .replace("MM", "%m")
17754 .replace("dd", "%d")
17755 .replace("HH", "%H")
17756 .replace("mm", "%M")
17757 .replace("ss", "%S")
17758 .replace("yy", "%y");
17759 let date_parse =
17760 Expression::Function(Box::new(Function::new(
17761 "DATE_PARSE".to_string(),
17762 vec![val, Expression::string(&p_fmt)],
17763 )));
17764 Ok(Expression::Cast(Box::new(Cast {
17765 this: date_parse,
17766 to: DataType::Date,
17767 double_colon_syntax: false,
17768 trailing_comments: vec![],
17769 format: None,
17770 default: None,
17771 inferred_type: None,
17772 })))
17773 }
17774 DialectType::Snowflake => {
17775 // TRY_TO_DATE(x, snowflake_fmt)
17776 Ok(Expression::Function(Box::new(Function::new(
17777 "TRY_TO_DATE".to_string(),
17778 vec![val, Expression::string(s)],
17779 ))))
17780 }
17781 _ => Ok(Expression::Function(Box::new(Function::new(
17782 "TO_DATE".to_string(),
17783 vec![val, fmt_expr],
17784 )))),
17785 }
17786 } else {
17787 Ok(Expression::Function(Box::new(Function::new(
17788 "TO_DATE".to_string(),
17789 vec![val, fmt_expr],
17790 ))))
17791 }
17792 } else {
17793 Ok(Expression::Function(Box::new(Function::new(
17794 "TO_DATE".to_string(),
17795 vec![val, fmt_expr],
17796 ))))
17797 }
17798 }
17799 }
17800 // TO_TIMESTAMP(x) 1-arg: epoch conversion
17801 "TO_TIMESTAMP"
17802 if f.args.len() == 1
17803 && matches!(source, DialectType::DuckDB)
17804 && matches!(
17805 target,
17806 DialectType::BigQuery
17807 | DialectType::Presto
17808 | DialectType::Trino
17809 | DialectType::Hive
17810 | DialectType::Spark
17811 | DialectType::Databricks
17812 | DialectType::Athena
17813 ) =>
17814 {
17815 let arg = f.args.into_iter().next().unwrap();
17816 let func_name = match target {
17817 DialectType::BigQuery => "TIMESTAMP_SECONDS",
17818 DialectType::Presto
17819 | DialectType::Trino
17820 | DialectType::Athena
17821 | DialectType::Hive
17822 | DialectType::Spark
17823 | DialectType::Databricks => "FROM_UNIXTIME",
17824 _ => "TO_TIMESTAMP",
17825 };
17826 Ok(Expression::Function(Box::new(Function::new(
17827 func_name.to_string(),
17828 vec![arg],
17829 ))))
17830 }
17831 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
17832 "CONCAT" if f.args.len() == 1 => {
17833 let arg = f.args.into_iter().next().unwrap();
17834 match target {
17835 DialectType::Presto
17836 | DialectType::Trino
17837 | DialectType::Athena => {
17838 // CONCAT(a) -> CAST(a AS VARCHAR)
17839 Ok(Expression::Cast(Box::new(Cast {
17840 this: arg,
17841 to: DataType::VarChar {
17842 length: None,
17843 parenthesized_length: false,
17844 },
17845 trailing_comments: vec![],
17846 double_colon_syntax: false,
17847 format: None,
17848 default: None,
17849 inferred_type: None,
17850 })))
17851 }
17852 DialectType::TSQL => {
17853 // CONCAT(a) -> a
17854 Ok(arg)
17855 }
17856 DialectType::DuckDB => {
17857 // Keep CONCAT(a) for DuckDB (native support)
17858 Ok(Expression::Function(Box::new(Function::new(
17859 "CONCAT".to_string(),
17860 vec![arg],
17861 ))))
17862 }
17863 DialectType::Spark | DialectType::Databricks => {
17864 let coalesced = Expression::Coalesce(Box::new(
17865 crate::expressions::VarArgFunc {
17866 expressions: vec![arg, Expression::string("")],
17867 original_name: None,
17868 inferred_type: None,
17869 },
17870 ));
17871 Ok(Expression::Function(Box::new(Function::new(
17872 "CONCAT".to_string(),
17873 vec![coalesced],
17874 ))))
17875 }
17876 _ => Ok(Expression::Function(Box::new(Function::new(
17877 "CONCAT".to_string(),
17878 vec![arg],
17879 )))),
17880 }
17881 }
17882 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
17883 "REGEXP_EXTRACT"
17884 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
17885 {
17886 // If group_index is 0, drop it
17887 let drop_group = match &f.args[2] {
17888 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => { let Literal::Number(n) = lit.as_ref() else { unreachable!() }; n == "0" },
17889 _ => false,
17890 };
17891 if drop_group {
17892 let mut args = f.args;
17893 args.truncate(2);
17894 Ok(Expression::Function(Box::new(Function::new(
17895 "REGEXP_EXTRACT".to_string(),
17896 args,
17897 ))))
17898 } else {
17899 Ok(Expression::Function(f))
17900 }
17901 }
17902 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
17903 "REGEXP_EXTRACT"
17904 if f.args.len() == 4
17905 && matches!(target, DialectType::Snowflake) =>
17906 {
17907 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
17908 let mut args = f.args;
17909 let this = args.remove(0);
17910 let pattern = args.remove(0);
17911 let group = args.remove(0);
17912 let flags = args.remove(0);
17913 Ok(Expression::Function(Box::new(Function::new(
17914 "REGEXP_SUBSTR".to_string(),
17915 vec![
17916 this,
17917 pattern,
17918 Expression::number(1),
17919 Expression::number(1),
17920 flags,
17921 group,
17922 ],
17923 ))))
17924 }
17925 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
17926 "REGEXP_SUBSTR"
17927 if f.args.len() == 3
17928 && matches!(
17929 target,
17930 DialectType::DuckDB
17931 | DialectType::Presto
17932 | DialectType::Trino
17933 | DialectType::Spark
17934 | DialectType::Databricks
17935 ) =>
17936 {
17937 let mut args = f.args;
17938 let this = args.remove(0);
17939 let pattern = args.remove(0);
17940 let position = args.remove(0);
17941 // Wrap subject in SUBSTRING(this, position) to apply the offset
17942 let substring_expr = Expression::Function(Box::new(Function::new(
17943 "SUBSTRING".to_string(),
17944 vec![this, position],
17945 )));
17946 let target_name = match target {
17947 DialectType::DuckDB => "REGEXP_EXTRACT",
17948 _ => "REGEXP_EXTRACT",
17949 };
17950 Ok(Expression::Function(Box::new(Function::new(
17951 target_name.to_string(),
17952 vec![substring_expr, pattern],
17953 ))))
17954 }
17955 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
17956 "TO_DAYS" if f.args.len() == 1 => {
17957 let x = f.args.into_iter().next().unwrap();
17958 let epoch = Expression::string("0000-01-01");
17959 // Build the final target-specific expression directly
17960 let datediff_expr = match target {
17961 DialectType::MySQL | DialectType::SingleStore => {
17962 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
17963 Expression::Function(Box::new(Function::new(
17964 "DATEDIFF".to_string(),
17965 vec![x, epoch],
17966 )))
17967 }
17968 DialectType::DuckDB => {
17969 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
17970 let cast_epoch = Expression::Cast(Box::new(Cast {
17971 this: epoch,
17972 to: DataType::Date,
17973 trailing_comments: Vec::new(),
17974 double_colon_syntax: false,
17975 format: None,
17976 default: None,
17977 inferred_type: None,
17978 }));
17979 let cast_x = Expression::Cast(Box::new(Cast {
17980 this: x,
17981 to: DataType::Date,
17982 trailing_comments: Vec::new(),
17983 double_colon_syntax: false,
17984 format: None,
17985 default: None,
17986 inferred_type: None,
17987 }));
17988 Expression::Function(Box::new(Function::new(
17989 "DATE_DIFF".to_string(),
17990 vec![Expression::string("DAY"), cast_epoch, cast_x],
17991 )))
17992 }
17993 DialectType::Presto
17994 | DialectType::Trino
17995 | DialectType::Athena => {
17996 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
17997 let cast_epoch = Self::double_cast_timestamp_date(epoch);
17998 let cast_x = Self::double_cast_timestamp_date(x);
17999 Expression::Function(Box::new(Function::new(
18000 "DATE_DIFF".to_string(),
18001 vec![Expression::string("DAY"), cast_epoch, cast_x],
18002 )))
18003 }
18004 _ => {
18005 // Default: (DATEDIFF(x, '0000-01-01') + 1)
18006 Expression::Function(Box::new(Function::new(
18007 "DATEDIFF".to_string(),
18008 vec![x, epoch],
18009 )))
18010 }
18011 };
18012 let add_one = Expression::Add(Box::new(BinaryOp::new(
18013 datediff_expr,
18014 Expression::number(1),
18015 )));
18016 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
18017 this: add_one,
18018 trailing_comments: Vec::new(),
18019 })))
18020 }
18021 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
18022 "STR_TO_DATE"
18023 if f.args.len() == 2
18024 && matches!(
18025 target,
18026 DialectType::Presto | DialectType::Trino
18027 ) =>
18028 {
18029 let mut args = f.args;
18030 let x = args.remove(0);
18031 let format_expr = args.remove(0);
18032 // Check if the format contains time components
18033 let has_time =
18034 if let Expression::Literal(ref lit) =
18035 format_expr
18036 {
18037 if let Literal::String(ref fmt) = lit.as_ref() {
18038 fmt.contains("%H")
18039 || fmt.contains("%T")
18040 || fmt.contains("%M")
18041 || fmt.contains("%S")
18042 || fmt.contains("%I")
18043 || fmt.contains("%p")
18044 } else { false }
18045 } else {
18046 false
18047 };
18048 let date_parse = Expression::Function(Box::new(Function::new(
18049 "DATE_PARSE".to_string(),
18050 vec![x, format_expr],
18051 )));
18052 if has_time {
18053 // Has time components: just DATE_PARSE
18054 Ok(date_parse)
18055 } else {
18056 // Date-only: CAST(DATE_PARSE(...) AS DATE)
18057 Ok(Expression::Cast(Box::new(Cast {
18058 this: date_parse,
18059 to: DataType::Date,
18060 trailing_comments: Vec::new(),
18061 double_colon_syntax: false,
18062 format: None,
18063 default: None,
18064 inferred_type: None,
18065 })))
18066 }
18067 }
18068 "STR_TO_DATE"
18069 if f.args.len() == 2
18070 && matches!(
18071 target,
18072 DialectType::PostgreSQL | DialectType::Redshift
18073 ) =>
18074 {
18075 let mut args = f.args;
18076 let x = args.remove(0);
18077 let fmt = args.remove(0);
18078 let pg_fmt = match fmt {
18079 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; Expression::string(
18080 &s.replace("%Y", "YYYY")
18081 .replace("%m", "MM")
18082 .replace("%d", "DD")
18083 .replace("%H", "HH24")
18084 .replace("%M", "MI")
18085 .replace("%S", "SS"),
18086 ) },
18087 other => other,
18088 };
18089 let to_date = Expression::Function(Box::new(Function::new(
18090 "TO_DATE".to_string(),
18091 vec![x, pg_fmt],
18092 )));
18093 Ok(Expression::Cast(Box::new(Cast {
18094 this: to_date,
18095 to: DataType::Timestamp {
18096 timezone: false,
18097 precision: None,
18098 },
18099 trailing_comments: Vec::new(),
18100 double_colon_syntax: false,
18101 format: None,
18102 default: None,
18103 inferred_type: None,
18104 })))
18105 }
18106 // RANGE(start, end) -> GENERATE_SERIES for SQLite
18107 "RANGE"
18108 if (f.args.len() == 1 || f.args.len() == 2)
18109 && matches!(target, DialectType::SQLite) =>
18110 {
18111 if f.args.len() == 2 {
18112 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
18113 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
18114 let mut args = f.args;
18115 let start = args.remove(0);
18116 let end = args.remove(0);
18117 Ok(Expression::Function(Box::new(Function::new(
18118 "GENERATE_SERIES".to_string(),
18119 vec![start, end],
18120 ))))
18121 } else {
18122 Ok(Expression::Function(f))
18123 }
18124 }
18125 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
18126 // When source is Snowflake, keep as-is (args already in correct form)
18127 "UNIFORM"
18128 if matches!(target, DialectType::Snowflake)
18129 && (f.args.len() == 2 || f.args.len() == 3) =>
18130 {
18131 if matches!(source, DialectType::Snowflake) {
18132 // Snowflake -> Snowflake: keep as-is
18133 Ok(Expression::Function(f))
18134 } else {
18135 let mut args = f.args;
18136 let low = args.remove(0);
18137 let high = args.remove(0);
18138 let random = if !args.is_empty() {
18139 let seed = args.remove(0);
18140 Expression::Function(Box::new(Function::new(
18141 "RANDOM".to_string(),
18142 vec![seed],
18143 )))
18144 } else {
18145 Expression::Function(Box::new(Function::new(
18146 "RANDOM".to_string(),
18147 vec![],
18148 )))
18149 };
18150 Ok(Expression::Function(Box::new(Function::new(
18151 "UNIFORM".to_string(),
18152 vec![low, high, random],
18153 ))))
18154 }
18155 }
18156 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
18157 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
18158 let mut args = f.args;
18159 let ts_arg = args.remove(0);
18160 let tz_arg = args.remove(0);
18161 // Cast string literal to TIMESTAMP for all targets
18162 let ts_cast =
18163 if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) {
18164 Expression::Cast(Box::new(Cast {
18165 this: ts_arg,
18166 to: DataType::Timestamp {
18167 timezone: false,
18168 precision: None,
18169 },
18170 trailing_comments: vec![],
18171 double_colon_syntax: false,
18172 format: None,
18173 default: None,
18174 inferred_type: None,
18175 }))
18176 } else {
18177 ts_arg
18178 };
18179 match target {
18180 DialectType::Spark | DialectType::Databricks => {
18181 Ok(Expression::Function(Box::new(Function::new(
18182 "TO_UTC_TIMESTAMP".to_string(),
18183 vec![ts_cast, tz_arg],
18184 ))))
18185 }
18186 DialectType::Snowflake => {
18187 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
18188 Ok(Expression::Function(Box::new(Function::new(
18189 "CONVERT_TIMEZONE".to_string(),
18190 vec![tz_arg, Expression::string("UTC"), ts_cast],
18191 ))))
18192 }
18193 DialectType::Presto
18194 | DialectType::Trino
18195 | DialectType::Athena => {
18196 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
18197 let wtz = Expression::Function(Box::new(Function::new(
18198 "WITH_TIMEZONE".to_string(),
18199 vec![ts_cast, tz_arg],
18200 )));
18201 Ok(Expression::AtTimeZone(Box::new(
18202 crate::expressions::AtTimeZone {
18203 this: wtz,
18204 zone: Expression::string("UTC"),
18205 },
18206 )))
18207 }
18208 DialectType::BigQuery => {
18209 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
18210 let cast_dt = Expression::Cast(Box::new(Cast {
18211 this: if let Expression::Cast(c) = ts_cast {
18212 c.this
18213 } else {
18214 ts_cast.clone()
18215 },
18216 to: DataType::Custom {
18217 name: "DATETIME".to_string(),
18218 },
18219 trailing_comments: vec![],
18220 double_colon_syntax: false,
18221 format: None,
18222 default: None,
18223 inferred_type: None,
18224 }));
18225 let ts_func =
18226 Expression::Function(Box::new(Function::new(
18227 "TIMESTAMP".to_string(),
18228 vec![cast_dt, tz_arg],
18229 )));
18230 Ok(Expression::Function(Box::new(Function::new(
18231 "DATETIME".to_string(),
18232 vec![ts_func, Expression::string("UTC")],
18233 ))))
18234 }
18235 _ => {
18236 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
18237 let atz1 = Expression::AtTimeZone(Box::new(
18238 crate::expressions::AtTimeZone {
18239 this: ts_cast,
18240 zone: tz_arg,
18241 },
18242 ));
18243 Ok(Expression::AtTimeZone(Box::new(
18244 crate::expressions::AtTimeZone {
18245 this: atz1,
18246 zone: Expression::string("UTC"),
18247 },
18248 )))
18249 }
18250 }
18251 }
18252 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
18253 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
18254 let mut args = f.args;
18255 let ts_arg = args.remove(0);
18256 let tz_arg = args.remove(0);
18257 // Cast string literal to TIMESTAMP
18258 let ts_cast =
18259 if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) {
18260 Expression::Cast(Box::new(Cast {
18261 this: ts_arg,
18262 to: DataType::Timestamp {
18263 timezone: false,
18264 precision: None,
18265 },
18266 trailing_comments: vec![],
18267 double_colon_syntax: false,
18268 format: None,
18269 default: None,
18270 inferred_type: None,
18271 }))
18272 } else {
18273 ts_arg
18274 };
18275 match target {
18276 DialectType::Spark | DialectType::Databricks => {
18277 Ok(Expression::Function(Box::new(Function::new(
18278 "FROM_UTC_TIMESTAMP".to_string(),
18279 vec![ts_cast, tz_arg],
18280 ))))
18281 }
18282 DialectType::Presto
18283 | DialectType::Trino
18284 | DialectType::Athena => {
18285 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
18286 Ok(Expression::Function(Box::new(Function::new(
18287 "AT_TIMEZONE".to_string(),
18288 vec![ts_cast, tz_arg],
18289 ))))
18290 }
18291 DialectType::Snowflake => {
18292 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
18293 Ok(Expression::Function(Box::new(Function::new(
18294 "CONVERT_TIMEZONE".to_string(),
18295 vec![Expression::string("UTC"), tz_arg, ts_cast],
18296 ))))
18297 }
18298 _ => {
18299 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
18300 Ok(Expression::AtTimeZone(Box::new(
18301 crate::expressions::AtTimeZone {
18302 this: ts_cast,
18303 zone: tz_arg,
18304 },
18305 )))
18306 }
18307 }
18308 }
18309 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
18310 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
18311 let name = match target {
18312 DialectType::Snowflake => "OBJECT_CONSTRUCT",
18313 _ => "MAP",
18314 };
18315 Ok(Expression::Function(Box::new(Function::new(
18316 name.to_string(),
18317 f.args,
18318 ))))
18319 }
18320 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
18321 "STR_TO_MAP" if f.args.len() >= 1 => match target {
18322 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18323 Ok(Expression::Function(Box::new(Function::new(
18324 "SPLIT_TO_MAP".to_string(),
18325 f.args,
18326 ))))
18327 }
18328 _ => Ok(Expression::Function(f)),
18329 },
18330 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
18331 "TIME_TO_STR" if f.args.len() == 2 => {
18332 let mut args = f.args;
18333 let this = args.remove(0);
18334 let fmt_expr = args.remove(0);
18335 let format =
18336 if let Expression::Literal(lit) = fmt_expr {
18337 if let Literal::String(s) = lit.as_ref() {
18338 s.clone()
18339 } else { String::new() }
18340 } else {
18341 "%Y-%m-%d %H:%M:%S".to_string()
18342 };
18343 Ok(Expression::TimeToStr(Box::new(
18344 crate::expressions::TimeToStr {
18345 this: Box::new(this),
18346 format,
18347 culture: None,
18348 zone: None,
18349 },
18350 )))
18351 }
18352 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
18353 "STR_TO_TIME" if f.args.len() == 2 => {
18354 let mut args = f.args;
18355 let this = args.remove(0);
18356 let fmt_expr = args.remove(0);
18357 let format =
18358 if let Expression::Literal(lit) = fmt_expr {
18359 if let Literal::String(s) = lit.as_ref() {
18360 s.clone()
18361 } else { String::new() }
18362 } else {
18363 "%Y-%m-%d %H:%M:%S".to_string()
18364 };
18365 Ok(Expression::StrToTime(Box::new(
18366 crate::expressions::StrToTime {
18367 this: Box::new(this),
18368 format,
18369 zone: None,
18370 safe: None,
18371 target_type: None,
18372 },
18373 )))
18374 }
18375 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
18376 "STR_TO_UNIX" if f.args.len() >= 1 => {
18377 let mut args = f.args;
18378 let this = args.remove(0);
18379 let format = if !args.is_empty() {
18380 if let Expression::Literal(lit) = args.remove(0)
18381 {
18382 if let Literal::String(s) = lit.as_ref() {
18383 Some(s.clone())
18384 } else { None }
18385 } else {
18386 None
18387 }
18388 } else {
18389 None
18390 };
18391 Ok(Expression::StrToUnix(Box::new(
18392 crate::expressions::StrToUnix {
18393 this: Some(Box::new(this)),
18394 format,
18395 },
18396 )))
18397 }
18398 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
18399 "TIME_TO_UNIX" if f.args.len() == 1 => {
18400 let mut args = f.args;
18401 let this = args.remove(0);
18402 Ok(Expression::TimeToUnix(Box::new(
18403 crate::expressions::UnaryFunc {
18404 this,
18405 original_name: None,
18406 inferred_type: None,
18407 },
18408 )))
18409 }
18410 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
18411 "UNIX_TO_STR" if f.args.len() >= 1 => {
18412 let mut args = f.args;
18413 let this = args.remove(0);
18414 let format = if !args.is_empty() {
18415 if let Expression::Literal(lit) = args.remove(0)
18416 {
18417 if let Literal::String(s) = lit.as_ref() {
18418 Some(s.clone())
18419 } else { None }
18420 } else {
18421 None
18422 }
18423 } else {
18424 None
18425 };
18426 Ok(Expression::UnixToStr(Box::new(
18427 crate::expressions::UnixToStr {
18428 this: Box::new(this),
18429 format,
18430 },
18431 )))
18432 }
18433 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
18434 "UNIX_TO_TIME" if f.args.len() == 1 => {
18435 let mut args = f.args;
18436 let this = args.remove(0);
18437 Ok(Expression::UnixToTime(Box::new(
18438 crate::expressions::UnixToTime {
18439 this: Box::new(this),
18440 scale: None,
18441 zone: None,
18442 hours: None,
18443 minutes: None,
18444 format: None,
18445 target_type: None,
18446 },
18447 )))
18448 }
18449 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
18450 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
18451 let mut args = f.args;
18452 let this = args.remove(0);
18453 Ok(Expression::TimeStrToDate(Box::new(
18454 crate::expressions::UnaryFunc {
18455 this,
18456 original_name: None,
18457 inferred_type: None,
18458 },
18459 )))
18460 }
18461 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
18462 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
18463 let mut args = f.args;
18464 let this = args.remove(0);
18465 Ok(Expression::TimeStrToTime(Box::new(
18466 crate::expressions::TimeStrToTime {
18467 this: Box::new(this),
18468 zone: None,
18469 },
18470 )))
18471 }
18472 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
18473 "MONTHS_BETWEEN" if f.args.len() == 2 => {
18474 match target {
18475 DialectType::DuckDB => {
18476 let mut args = f.args;
18477 let end_date = args.remove(0);
18478 let start_date = args.remove(0);
18479 let cast_end = Self::ensure_cast_date(end_date);
18480 let cast_start = Self::ensure_cast_date(start_date);
18481 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
18482 let dd = Expression::Function(Box::new(Function::new(
18483 "DATE_DIFF".to_string(),
18484 vec![
18485 Expression::string("MONTH"),
18486 cast_start.clone(),
18487 cast_end.clone(),
18488 ],
18489 )));
18490 let day_end =
18491 Expression::Function(Box::new(Function::new(
18492 "DAY".to_string(),
18493 vec![cast_end.clone()],
18494 )));
18495 let day_start =
18496 Expression::Function(Box::new(Function::new(
18497 "DAY".to_string(),
18498 vec![cast_start.clone()],
18499 )));
18500 let last_day_end =
18501 Expression::Function(Box::new(Function::new(
18502 "LAST_DAY".to_string(),
18503 vec![cast_end.clone()],
18504 )));
18505 let last_day_start =
18506 Expression::Function(Box::new(Function::new(
18507 "LAST_DAY".to_string(),
18508 vec![cast_start.clone()],
18509 )));
18510 let day_last_end = Expression::Function(Box::new(
18511 Function::new("DAY".to_string(), vec![last_day_end]),
18512 ));
18513 let day_last_start = Expression::Function(Box::new(
18514 Function::new("DAY".to_string(), vec![last_day_start]),
18515 ));
18516 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
18517 day_end.clone(),
18518 day_last_end,
18519 )));
18520 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
18521 day_start.clone(),
18522 day_last_start,
18523 )));
18524 let both_cond =
18525 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
18526 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
18527 day_end, day_start,
18528 )));
18529 let day_diff_paren = Expression::Paren(Box::new(
18530 crate::expressions::Paren {
18531 this: day_diff,
18532 trailing_comments: Vec::new(),
18533 },
18534 ));
18535 let frac = Expression::Div(Box::new(BinaryOp::new(
18536 day_diff_paren,
18537 Expression::Literal(Box::new(Literal::Number(
18538 "31.0".to_string(),
18539 ))),
18540 )));
18541 let case_expr = Expression::Case(Box::new(Case {
18542 operand: None,
18543 whens: vec![(both_cond, Expression::number(0))],
18544 else_: Some(frac),
18545 comments: Vec::new(),
18546 inferred_type: None,
18547 }));
18548 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
18549 }
18550 DialectType::Snowflake | DialectType::Redshift => {
18551 let mut args = f.args;
18552 let end_date = args.remove(0);
18553 let start_date = args.remove(0);
18554 let unit = Expression::Identifier(Identifier::new("MONTH"));
18555 Ok(Expression::Function(Box::new(Function::new(
18556 "DATEDIFF".to_string(),
18557 vec![unit, start_date, end_date],
18558 ))))
18559 }
18560 DialectType::Presto
18561 | DialectType::Trino
18562 | DialectType::Athena => {
18563 let mut args = f.args;
18564 let end_date = args.remove(0);
18565 let start_date = args.remove(0);
18566 Ok(Expression::Function(Box::new(Function::new(
18567 "DATE_DIFF".to_string(),
18568 vec![Expression::string("MONTH"), start_date, end_date],
18569 ))))
18570 }
18571 _ => Ok(Expression::Function(f)),
18572 }
18573 }
18574 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
18575 // Drop the roundOff arg for non-Spark targets, keep it for Spark
18576 "MONTHS_BETWEEN" if f.args.len() == 3 => {
18577 match target {
18578 DialectType::Spark | DialectType::Databricks => {
18579 Ok(Expression::Function(f))
18580 }
18581 _ => {
18582 // Drop the 3rd arg and delegate to the 2-arg logic
18583 let mut args = f.args;
18584 let end_date = args.remove(0);
18585 let start_date = args.remove(0);
18586 // Re-create as 2-arg and process
18587 let f2 = Function::new(
18588 "MONTHS_BETWEEN".to_string(),
18589 vec![end_date, start_date],
18590 );
18591 let e2 = Expression::Function(Box::new(f2));
18592 Self::cross_dialect_normalize(e2, source, target)
18593 }
18594 }
18595 }
18596 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
18597 "TO_TIMESTAMP"
18598 if f.args.len() == 1
18599 && matches!(
18600 source,
18601 DialectType::Spark
18602 | DialectType::Databricks
18603 | DialectType::Hive
18604 ) =>
18605 {
18606 let arg = f.args.into_iter().next().unwrap();
18607 Ok(Expression::Cast(Box::new(Cast {
18608 this: arg,
18609 to: DataType::Timestamp {
18610 timezone: false,
18611 precision: None,
18612 },
18613 trailing_comments: vec![],
18614 double_colon_syntax: false,
18615 format: None,
18616 default: None,
18617 inferred_type: None,
18618 })))
18619 }
18620 // STRING(x) -> CAST(x AS STRING) for Spark target
18621 "STRING"
18622 if f.args.len() == 1
18623 && matches!(
18624 source,
18625 DialectType::Spark | DialectType::Databricks
18626 ) =>
18627 {
18628 let arg = f.args.into_iter().next().unwrap();
18629 let dt = match target {
18630 DialectType::Spark
18631 | DialectType::Databricks
18632 | DialectType::Hive => DataType::Custom {
18633 name: "STRING".to_string(),
18634 },
18635 _ => DataType::Text,
18636 };
18637 Ok(Expression::Cast(Box::new(Cast {
18638 this: arg,
18639 to: dt,
18640 trailing_comments: vec![],
18641 double_colon_syntax: false,
18642 format: None,
18643 default: None,
18644 inferred_type: None,
18645 })))
18646 }
18647 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
18648 "LOGICAL_OR" if f.args.len() == 1 => {
18649 let name = match target {
18650 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
18651 _ => "LOGICAL_OR",
18652 };
18653 Ok(Expression::Function(Box::new(Function::new(
18654 name.to_string(),
18655 f.args,
18656 ))))
18657 }
18658 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
18659 "SPLIT"
18660 if f.args.len() == 2
18661 && matches!(
18662 source,
18663 DialectType::Spark
18664 | DialectType::Databricks
18665 | DialectType::Hive
18666 ) =>
18667 {
18668 let name = match target {
18669 DialectType::DuckDB => "STR_SPLIT_REGEX",
18670 DialectType::Presto
18671 | DialectType::Trino
18672 | DialectType::Athena => "REGEXP_SPLIT",
18673 DialectType::Spark
18674 | DialectType::Databricks
18675 | DialectType::Hive => "SPLIT",
18676 _ => "SPLIT",
18677 };
18678 Ok(Expression::Function(Box::new(Function::new(
18679 name.to_string(),
18680 f.args,
18681 ))))
18682 }
18683 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
18684 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
18685 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18686 Ok(Expression::Function(Box::new(Function::new(
18687 "ELEMENT_AT".to_string(),
18688 f.args,
18689 ))))
18690 }
18691 DialectType::DuckDB => {
18692 let mut args = f.args;
18693 let arr = args.remove(0);
18694 let idx = args.remove(0);
18695 Ok(Expression::Subscript(Box::new(
18696 crate::expressions::Subscript {
18697 this: arr,
18698 index: idx,
18699 },
18700 )))
18701 }
18702 _ => Ok(Expression::Function(f)),
18703 },
18704 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
18705 "ARRAY_FILTER" if f.args.len() == 2 => {
18706 let name = match target {
18707 DialectType::DuckDB => "LIST_FILTER",
18708 DialectType::StarRocks => "ARRAY_FILTER",
18709 _ => "FILTER",
18710 };
18711 Ok(Expression::Function(Box::new(Function::new(
18712 name.to_string(),
18713 f.args,
18714 ))))
18715 }
18716 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
18717 "FILTER" if f.args.len() == 2 => {
18718 let name = match target {
18719 DialectType::DuckDB => "LIST_FILTER",
18720 DialectType::StarRocks => "ARRAY_FILTER",
18721 _ => "FILTER",
18722 };
18723 Ok(Expression::Function(Box::new(Function::new(
18724 name.to_string(),
18725 f.args,
18726 ))))
18727 }
18728 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
18729 "REDUCE" if f.args.len() >= 3 => {
18730 let name = match target {
18731 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
18732 _ => "REDUCE",
18733 };
18734 Ok(Expression::Function(Box::new(Function::new(
18735 name.to_string(),
18736 f.args,
18737 ))))
18738 }
18739 // CURRENT_SCHEMA() -> dialect-specific
18740 "CURRENT_SCHEMA" => {
18741 match target {
18742 DialectType::PostgreSQL => {
18743 // PostgreSQL: CURRENT_SCHEMA (no parens)
18744 Ok(Expression::Function(Box::new(Function {
18745 name: "CURRENT_SCHEMA".to_string(),
18746 args: vec![],
18747 distinct: false,
18748 trailing_comments: vec![],
18749 use_bracket_syntax: false,
18750 no_parens: true,
18751 quoted: false,
18752 span: None,
18753 inferred_type: None,
18754 })))
18755 }
18756 DialectType::MySQL
18757 | DialectType::Doris
18758 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
18759 Function::new("SCHEMA".to_string(), vec![]),
18760 ))),
18761 DialectType::TSQL => Ok(Expression::Function(Box::new(
18762 Function::new("SCHEMA_NAME".to_string(), vec![]),
18763 ))),
18764 DialectType::SQLite => {
18765 Ok(Expression::Literal(Box::new(Literal::String("main".to_string()))))
18766 }
18767 _ => Ok(Expression::Function(f)),
18768 }
18769 }
18770 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
18771 "LTRIM" if f.args.len() == 2 => match target {
18772 DialectType::Spark
18773 | DialectType::Hive
18774 | DialectType::Databricks
18775 | DialectType::ClickHouse => {
18776 let mut args = f.args;
18777 let str_expr = args.remove(0);
18778 let chars = args.remove(0);
18779 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
18780 this: str_expr,
18781 characters: Some(chars),
18782 position: crate::expressions::TrimPosition::Leading,
18783 sql_standard_syntax: true,
18784 position_explicit: true,
18785 })))
18786 }
18787 _ => Ok(Expression::Function(f)),
18788 },
18789 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
18790 "RTRIM" if f.args.len() == 2 => match target {
18791 DialectType::Spark
18792 | DialectType::Hive
18793 | DialectType::Databricks
18794 | DialectType::ClickHouse => {
18795 let mut args = f.args;
18796 let str_expr = args.remove(0);
18797 let chars = args.remove(0);
18798 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
18799 this: str_expr,
18800 characters: Some(chars),
18801 position: crate::expressions::TrimPosition::Trailing,
18802 sql_standard_syntax: true,
18803 position_explicit: true,
18804 })))
18805 }
18806 _ => Ok(Expression::Function(f)),
18807 },
18808 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
18809 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
18810 DialectType::ClickHouse => {
18811 let mut new_f = *f;
18812 new_f.name = "arrayReverse".to_string();
18813 Ok(Expression::Function(Box::new(new_f)))
18814 }
18815 _ => Ok(Expression::Function(f)),
18816 },
18817 // UUID() -> NEWID() for TSQL
18818 "UUID" if f.args.is_empty() => match target {
18819 DialectType::TSQL | DialectType::Fabric => {
18820 Ok(Expression::Function(Box::new(Function::new(
18821 "NEWID".to_string(),
18822 vec![],
18823 ))))
18824 }
18825 _ => Ok(Expression::Function(f)),
18826 },
18827 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
18828 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
18829 DialectType::ClickHouse => {
18830 let mut new_f = *f;
18831 new_f.name = "farmFingerprint64".to_string();
18832 Ok(Expression::Function(Box::new(new_f)))
18833 }
18834 DialectType::Redshift => {
18835 let mut new_f = *f;
18836 new_f.name = "FARMFINGERPRINT64".to_string();
18837 Ok(Expression::Function(Box::new(new_f)))
18838 }
18839 _ => Ok(Expression::Function(f)),
18840 },
18841 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
18842 "JSON_KEYS" => match target {
18843 DialectType::Databricks | DialectType::Spark => {
18844 let mut new_f = *f;
18845 new_f.name = "JSON_OBJECT_KEYS".to_string();
18846 Ok(Expression::Function(Box::new(new_f)))
18847 }
18848 DialectType::Snowflake => {
18849 let mut new_f = *f;
18850 new_f.name = "OBJECT_KEYS".to_string();
18851 Ok(Expression::Function(Box::new(new_f)))
18852 }
18853 _ => Ok(Expression::Function(f)),
18854 },
18855 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
18856 "WEEKOFYEAR" => match target {
18857 DialectType::Snowflake => {
18858 let mut new_f = *f;
18859 new_f.name = "WEEKISO".to_string();
18860 Ok(Expression::Function(Box::new(new_f)))
18861 }
18862 _ => Ok(Expression::Function(f)),
18863 },
18864 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
18865 "FORMAT"
18866 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
18867 {
18868 match target {
18869 DialectType::Databricks | DialectType::Spark => {
18870 let mut new_f = *f;
18871 new_f.name = "FORMAT_STRING".to_string();
18872 Ok(Expression::Function(Box::new(new_f)))
18873 }
18874 _ => Ok(Expression::Function(f)),
18875 }
18876 }
18877 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
18878 "CONCAT_WS" if f.args.len() >= 2 => match target {
18879 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18880 let mut args = f.args;
18881 let sep = args.remove(0);
18882 let cast_args: Vec<Expression> = args
18883 .into_iter()
18884 .map(|a| {
18885 Expression::Cast(Box::new(Cast {
18886 this: a,
18887 to: DataType::VarChar {
18888 length: None,
18889 parenthesized_length: false,
18890 },
18891 double_colon_syntax: false,
18892 trailing_comments: Vec::new(),
18893 format: None,
18894 default: None,
18895 inferred_type: None,
18896 }))
18897 })
18898 .collect();
18899 let mut new_args = vec![sep];
18900 new_args.extend(cast_args);
18901 Ok(Expression::Function(Box::new(Function::new(
18902 "CONCAT_WS".to_string(),
18903 new_args,
18904 ))))
18905 }
18906 _ => Ok(Expression::Function(f)),
18907 },
18908 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
18909 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
18910 DialectType::DuckDB
18911 if f.args.len() == 3
18912 && matches!(source, DialectType::Snowflake) =>
18913 {
18914 // Snowflake ARRAY_SLICE (0-indexed, exclusive end)
18915 // -> DuckDB ARRAY_SLICE (1-indexed, inclusive end)
18916 let mut args = f.args;
18917 let arr = args.remove(0);
18918 let start = args.remove(0);
18919 let end = args.remove(0);
18920
18921 // CASE WHEN start >= 0 THEN start + 1 ELSE start END
18922 let adjusted_start = Expression::Case(Box::new(Case {
18923 operand: None,
18924 whens: vec![(
18925 Expression::Gte(Box::new(BinaryOp {
18926 left: start.clone(),
18927 right: Expression::number(0),
18928 left_comments: vec![],
18929 operator_comments: vec![],
18930 trailing_comments: vec![],
18931 inferred_type: None,
18932 })),
18933 Expression::Add(Box::new(BinaryOp {
18934 left: start.clone(),
18935 right: Expression::number(1),
18936 left_comments: vec![],
18937 operator_comments: vec![],
18938 trailing_comments: vec![],
18939 inferred_type: None,
18940 })),
18941 )],
18942 else_: Some(start),
18943 comments: vec![],
18944 inferred_type: None,
18945 }));
18946
18947 // CASE WHEN end < 0 THEN end - 1 ELSE end END
18948 let adjusted_end = Expression::Case(Box::new(Case {
18949 operand: None,
18950 whens: vec![(
18951 Expression::Lt(Box::new(BinaryOp {
18952 left: end.clone(),
18953 right: Expression::number(0),
18954 left_comments: vec![],
18955 operator_comments: vec![],
18956 trailing_comments: vec![],
18957 inferred_type: None,
18958 })),
18959 Expression::Sub(Box::new(BinaryOp {
18960 left: end.clone(),
18961 right: Expression::number(1),
18962 left_comments: vec![],
18963 operator_comments: vec![],
18964 trailing_comments: vec![],
18965 inferred_type: None,
18966 })),
18967 )],
18968 else_: Some(end),
18969 comments: vec![],
18970 inferred_type: None,
18971 }));
18972
18973 Ok(Expression::Function(Box::new(Function::new(
18974 "ARRAY_SLICE".to_string(),
18975 vec![arr, adjusted_start, adjusted_end],
18976 ))))
18977 }
18978 DialectType::Presto
18979 | DialectType::Trino
18980 | DialectType::Athena
18981 | DialectType::Databricks
18982 | DialectType::Spark => {
18983 let mut new_f = *f;
18984 new_f.name = "SLICE".to_string();
18985 Ok(Expression::Function(Box::new(new_f)))
18986 }
18987 DialectType::ClickHouse => {
18988 let mut new_f = *f;
18989 new_f.name = "arraySlice".to_string();
18990 Ok(Expression::Function(Box::new(new_f)))
18991 }
18992 _ => Ok(Expression::Function(f)),
18993 },
18994 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
18995 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
18996 DialectType::DuckDB => {
18997 let mut args = f.args;
18998 let arr = args.remove(0);
18999 let val = args.remove(0);
19000 Ok(Expression::Function(Box::new(Function::new(
19001 "LIST_PREPEND".to_string(),
19002 vec![val, arr],
19003 ))))
19004 }
19005 _ => Ok(Expression::Function(f)),
19006 },
19007 // ARRAY_REMOVE(arr, target) -> dialect-specific
19008 "ARRAY_REMOVE" if f.args.len() == 2 => {
19009 match target {
19010 DialectType::DuckDB => {
19011 let mut args = f.args;
19012 let arr = args.remove(0);
19013 let target_val = args.remove(0);
19014 let u_id = crate::expressions::Identifier::new("_u");
19015 // LIST_FILTER(arr, _u -> _u <> target)
19016 let lambda = Expression::Lambda(Box::new(
19017 crate::expressions::LambdaExpr {
19018 parameters: vec![u_id.clone()],
19019 body: Expression::Neq(Box::new(BinaryOp {
19020 left: Expression::Identifier(u_id),
19021 right: target_val,
19022 left_comments: Vec::new(),
19023 operator_comments: Vec::new(),
19024 trailing_comments: Vec::new(),
19025 inferred_type: None,
19026 })),
19027 colon: false,
19028 parameter_types: Vec::new(),
19029 },
19030 ));
19031 Ok(Expression::Function(Box::new(Function::new(
19032 "LIST_FILTER".to_string(),
19033 vec![arr, lambda],
19034 ))))
19035 }
19036 DialectType::ClickHouse => {
19037 let mut args = f.args;
19038 let arr = args.remove(0);
19039 let target_val = args.remove(0);
19040 let u_id = crate::expressions::Identifier::new("_u");
19041 // arrayFilter(_u -> _u <> target, arr)
19042 let lambda = Expression::Lambda(Box::new(
19043 crate::expressions::LambdaExpr {
19044 parameters: vec![u_id.clone()],
19045 body: Expression::Neq(Box::new(BinaryOp {
19046 left: Expression::Identifier(u_id),
19047 right: target_val,
19048 left_comments: Vec::new(),
19049 operator_comments: Vec::new(),
19050 trailing_comments: Vec::new(),
19051 inferred_type: None,
19052 })),
19053 colon: false,
19054 parameter_types: Vec::new(),
19055 },
19056 ));
19057 Ok(Expression::Function(Box::new(Function::new(
19058 "arrayFilter".to_string(),
19059 vec![lambda, arr],
19060 ))))
19061 }
19062 DialectType::BigQuery => {
19063 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
19064 let mut args = f.args;
19065 let arr = args.remove(0);
19066 let target_val = args.remove(0);
19067 let u_id = crate::expressions::Identifier::new("_u");
19068 let u_col =
19069 Expression::Column(Box::new(crate::expressions::Column {
19070 name: u_id.clone(),
19071 table: None,
19072 join_mark: false,
19073 trailing_comments: Vec::new(),
19074 span: None,
19075 inferred_type: None,
19076 }));
19077 // UNNEST(the_array) AS _u
19078 let unnest_expr = Expression::Unnest(Box::new(
19079 crate::expressions::UnnestFunc {
19080 this: arr,
19081 expressions: Vec::new(),
19082 with_ordinality: false,
19083 alias: None,
19084 offset_alias: None,
19085 },
19086 ));
19087 let aliased_unnest = Expression::Alias(Box::new(
19088 crate::expressions::Alias {
19089 this: unnest_expr,
19090 alias: u_id.clone(),
19091 column_aliases: Vec::new(),
19092 pre_alias_comments: Vec::new(),
19093 trailing_comments: Vec::new(),
19094 inferred_type: None,
19095 },
19096 ));
19097 // _u <> target
19098 let where_cond = Expression::Neq(Box::new(BinaryOp {
19099 left: u_col.clone(),
19100 right: target_val,
19101 left_comments: Vec::new(),
19102 operator_comments: Vec::new(),
19103 trailing_comments: Vec::new(),
19104 inferred_type: None,
19105 }));
19106 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
19107 let subquery = Expression::Select(Box::new(
19108 crate::expressions::Select::new()
19109 .column(u_col)
19110 .from(aliased_unnest)
19111 .where_(where_cond),
19112 ));
19113 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
19114 Ok(Expression::ArrayFunc(Box::new(
19115 crate::expressions::ArrayConstructor {
19116 expressions: vec![subquery],
19117 bracket_notation: false,
19118 use_list_keyword: false,
19119 },
19120 )))
19121 }
19122 _ => Ok(Expression::Function(f)),
19123 }
19124 }
19125 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
19126 "PARSE_JSON" if f.args.len() == 1 => {
19127 match target {
19128 DialectType::SQLite
19129 | DialectType::Doris
19130 | DialectType::MySQL
19131 | DialectType::StarRocks => {
19132 // Strip PARSE_JSON, return the inner argument
19133 Ok(f.args.into_iter().next().unwrap())
19134 }
19135 _ => Ok(Expression::Function(f)),
19136 }
19137 }
19138 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
19139 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
19140 "JSON_REMOVE" => Ok(Expression::Function(f)),
19141 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
19142 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
19143 "JSON_SET" => Ok(Expression::Function(f)),
19144 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
19145 // Behavior per search value type:
19146 // NULL literal -> CASE WHEN x IS NULL THEN result
19147 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
19148 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
19149 "DECODE" if f.args.len() >= 3 => {
19150 // Keep as DECODE for targets that support it natively
19151 let keep_as_decode = matches!(
19152 target,
19153 DialectType::Oracle
19154 | DialectType::Snowflake
19155 | DialectType::Redshift
19156 | DialectType::Teradata
19157 | DialectType::Spark
19158 | DialectType::Databricks
19159 );
19160 if keep_as_decode {
19161 return Ok(Expression::Function(f));
19162 }
19163
19164 let mut args = f.args;
19165 let this_expr = args.remove(0);
19166 let mut pairs = Vec::new();
19167 let mut default = None;
19168 let mut i = 0;
19169 while i + 1 < args.len() {
19170 pairs.push((args[i].clone(), args[i + 1].clone()));
19171 i += 2;
19172 }
19173 if i < args.len() {
19174 default = Some(args[i].clone());
19175 }
19176 // Helper: check if expression is a literal value
19177 fn is_literal(e: &Expression) -> bool {
19178 matches!(
19179 e,
19180 Expression::Literal(_)
19181 | Expression::Boolean(_)
19182 | Expression::Neg(_)
19183 )
19184 }
19185 let whens: Vec<(Expression, Expression)> = pairs
19186 .into_iter()
19187 .map(|(search, result)| {
19188 if matches!(&search, Expression::Null(_)) {
19189 // NULL search -> IS NULL
19190 let condition = Expression::Is(Box::new(BinaryOp {
19191 left: this_expr.clone(),
19192 right: Expression::Null(crate::expressions::Null),
19193 left_comments: Vec::new(),
19194 operator_comments: Vec::new(),
19195 trailing_comments: Vec::new(),
19196 inferred_type: None,
19197 }));
19198 (condition, result)
19199 } else if is_literal(&search) {
19200 // Literal search -> simple equality
19201 let eq = Expression::Eq(Box::new(BinaryOp {
19202 left: this_expr.clone(),
19203 right: search,
19204 left_comments: Vec::new(),
19205 operator_comments: Vec::new(),
19206 trailing_comments: Vec::new(),
19207 inferred_type: None,
19208 }));
19209 (eq, result)
19210 } else {
19211 // Non-literal (column ref, expression) -> null-safe comparison
19212 let needs_paren = matches!(
19213 &search,
19214 Expression::Eq(_)
19215 | Expression::Neq(_)
19216 | Expression::Gt(_)
19217 | Expression::Gte(_)
19218 | Expression::Lt(_)
19219 | Expression::Lte(_)
19220 );
19221 let search_for_eq = if needs_paren {
19222 Expression::Paren(Box::new(
19223 crate::expressions::Paren {
19224 this: search.clone(),
19225 trailing_comments: Vec::new(),
19226 },
19227 ))
19228 } else {
19229 search.clone()
19230 };
19231 let eq = Expression::Eq(Box::new(BinaryOp {
19232 left: this_expr.clone(),
19233 right: search_for_eq,
19234 left_comments: Vec::new(),
19235 operator_comments: Vec::new(),
19236 trailing_comments: Vec::new(),
19237 inferred_type: None,
19238 }));
19239 let search_for_null = if needs_paren {
19240 Expression::Paren(Box::new(
19241 crate::expressions::Paren {
19242 this: search.clone(),
19243 trailing_comments: Vec::new(),
19244 },
19245 ))
19246 } else {
19247 search.clone()
19248 };
19249 let x_is_null = Expression::Is(Box::new(BinaryOp {
19250 left: this_expr.clone(),
19251 right: Expression::Null(crate::expressions::Null),
19252 left_comments: Vec::new(),
19253 operator_comments: Vec::new(),
19254 trailing_comments: Vec::new(),
19255 inferred_type: None,
19256 }));
19257 let s_is_null = Expression::Is(Box::new(BinaryOp {
19258 left: search_for_null,
19259 right: Expression::Null(crate::expressions::Null),
19260 left_comments: Vec::new(),
19261 operator_comments: Vec::new(),
19262 trailing_comments: Vec::new(),
19263 inferred_type: None,
19264 }));
19265 let both_null = Expression::And(Box::new(BinaryOp {
19266 left: x_is_null,
19267 right: s_is_null,
19268 left_comments: Vec::new(),
19269 operator_comments: Vec::new(),
19270 trailing_comments: Vec::new(),
19271 inferred_type: None,
19272 }));
19273 let condition = Expression::Or(Box::new(BinaryOp {
19274 left: eq,
19275 right: Expression::Paren(Box::new(
19276 crate::expressions::Paren {
19277 this: both_null,
19278 trailing_comments: Vec::new(),
19279 },
19280 )),
19281 left_comments: Vec::new(),
19282 operator_comments: Vec::new(),
19283 trailing_comments: Vec::new(),
19284 inferred_type: None,
19285 }));
19286 (condition, result)
19287 }
19288 })
19289 .collect();
19290 Ok(Expression::Case(Box::new(Case {
19291 operand: None,
19292 whens,
19293 else_: default,
19294 comments: Vec::new(),
19295 inferred_type: None,
19296 })))
19297 }
19298 // LEVENSHTEIN(a, b, ...) -> dialect-specific
19299 "LEVENSHTEIN" => {
19300 match target {
19301 DialectType::BigQuery => {
19302 let mut new_f = *f;
19303 new_f.name = "EDIT_DISTANCE".to_string();
19304 Ok(Expression::Function(Box::new(new_f)))
19305 }
19306 DialectType::Drill => {
19307 let mut new_f = *f;
19308 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
19309 Ok(Expression::Function(Box::new(new_f)))
19310 }
19311 DialectType::PostgreSQL if f.args.len() == 6 => {
19312 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
19313 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
19314 let mut new_f = *f;
19315 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
19316 Ok(Expression::Function(Box::new(new_f)))
19317 }
19318 _ => Ok(Expression::Function(f)),
19319 }
19320 }
19321 // ARRAY_MAX(x) -> arrayMax(x) for ClickHouse, LIST_MAX(x) for DuckDB
19322 "ARRAY_MAX" => {
19323 let name = match target {
19324 DialectType::ClickHouse => "arrayMax",
19325 DialectType::DuckDB => "LIST_MAX",
19326 _ => "ARRAY_MAX",
19327 };
19328 let mut new_f = *f;
19329 new_f.name = name.to_string();
19330 Ok(Expression::Function(Box::new(new_f)))
19331 }
19332 // ARRAY_MIN(x) -> arrayMin(x) for ClickHouse, LIST_MIN(x) for DuckDB
19333 "ARRAY_MIN" => {
19334 let name = match target {
19335 DialectType::ClickHouse => "arrayMin",
19336 DialectType::DuckDB => "LIST_MIN",
19337 _ => "ARRAY_MIN",
19338 };
19339 let mut new_f = *f;
19340 new_f.name = name.to_string();
19341 Ok(Expression::Function(Box::new(new_f)))
19342 }
19343 // JAROWINKLER_SIMILARITY(a, b) -> jaroWinklerSimilarity(UPPER(a), UPPER(b)) for ClickHouse
19344 // -> JARO_WINKLER_SIMILARITY(UPPER(a), UPPER(b)) for DuckDB
19345 "JAROWINKLER_SIMILARITY" if f.args.len() == 2 => {
19346 let mut args = f.args;
19347 let b = args.pop().unwrap();
19348 let a = args.pop().unwrap();
19349 match target {
19350 DialectType::ClickHouse => {
19351 let upper_a = Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(a)));
19352 let upper_b = Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(b)));
19353 Ok(Expression::Function(Box::new(Function::new(
19354 "jaroWinklerSimilarity".to_string(),
19355 vec![upper_a, upper_b],
19356 ))))
19357 }
19358 DialectType::DuckDB => {
19359 let upper_a = Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(a)));
19360 let upper_b = Expression::Upper(Box::new(crate::expressions::UnaryFunc::new(b)));
19361 Ok(Expression::Function(Box::new(Function::new(
19362 "JARO_WINKLER_SIMILARITY".to_string(),
19363 vec![upper_a, upper_b],
19364 ))))
19365 }
19366 _ => {
19367 Ok(Expression::Function(Box::new(Function::new(
19368 "JAROWINKLER_SIMILARITY".to_string(),
19369 vec![a, b],
19370 ))))
19371 }
19372 }
19373 }
19374 // CURRENT_SCHEMAS(x) -> CURRENT_SCHEMAS() for Snowflake (drop arg)
19375 "CURRENT_SCHEMAS" => match target {
19376 DialectType::Snowflake => {
19377 Ok(Expression::Function(Box::new(Function::new(
19378 "CURRENT_SCHEMAS".to_string(),
19379 vec![],
19380 ))))
19381 }
19382 _ => Ok(Expression::Function(f)),
19383 },
19384 // TRUNC/TRUNCATE (numeric) -> dialect-specific
19385 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 => {
19386 match target {
19387 DialectType::TSQL | DialectType::Fabric => {
19388 // ROUND(x, decimals, 1) - the 1 flag means truncation
19389 let mut args = f.args;
19390 let this = if args.is_empty() {
19391 return Ok(Expression::Function(Box::new(Function::new(
19392 "TRUNC".to_string(), args,
19393 ))));
19394 } else {
19395 args.remove(0)
19396 };
19397 let decimals = if args.is_empty() {
19398 Expression::Literal(Box::new(Literal::Number("0".to_string())))
19399 } else {
19400 args.remove(0)
19401 };
19402 Ok(Expression::Function(Box::new(Function::new(
19403 "ROUND".to_string(),
19404 vec![this, decimals, Expression::Literal(Box::new(Literal::Number("1".to_string())))],
19405 ))))
19406 }
19407 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
19408 // TRUNCATE(x, decimals)
19409 let mut new_f = *f;
19410 new_f.name = "TRUNCATE".to_string();
19411 Ok(Expression::Function(Box::new(new_f)))
19412 }
19413 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
19414 // TRUNCATE(x, decimals)
19415 let mut new_f = *f;
19416 new_f.name = "TRUNCATE".to_string();
19417 Ok(Expression::Function(Box::new(new_f)))
19418 }
19419 DialectType::DuckDB => {
19420 // TRUNC(x) - drop decimals
19421 let this = f.args.into_iter().next().unwrap_or(
19422 Expression::Literal(Box::new(Literal::Number("0".to_string())))
19423 );
19424 Ok(Expression::Function(Box::new(Function::new(
19425 "TRUNC".to_string(),
19426 vec![this],
19427 ))))
19428 }
19429 DialectType::ClickHouse => {
19430 // trunc(x, decimals) - lowercase
19431 let mut new_f = *f;
19432 new_f.name = "trunc".to_string();
19433 Ok(Expression::Function(Box::new(new_f)))
19434 }
19435 DialectType::Spark | DialectType::Databricks => {
19436 // Spark: TRUNC is date-only; numeric TRUNC → CAST(x AS BIGINT)
19437 let this = f.args.into_iter().next().unwrap_or(
19438 Expression::Literal(Box::new(Literal::Number("0".to_string())))
19439 );
19440 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
19441 this,
19442 to: crate::expressions::DataType::BigInt { length: None },
19443 double_colon_syntax: false,
19444 trailing_comments: Vec::new(),
19445 format: None,
19446 default: None,
19447 inferred_type: None,
19448 })))
19449 }
19450 _ => {
19451 // TRUNC(x, decimals) for PostgreSQL, Oracle, Snowflake, etc.
19452 let mut new_f = *f;
19453 new_f.name = "TRUNC".to_string();
19454 Ok(Expression::Function(Box::new(new_f)))
19455 }
19456 }
19457 }
19458 // CURRENT_VERSION() -> VERSION() for most dialects
19459 "CURRENT_VERSION" => match target {
19460 DialectType::Snowflake
19461 | DialectType::Databricks
19462 | DialectType::StarRocks => {
19463 Ok(Expression::Function(f))
19464 }
19465 DialectType::SQLite => {
19466 let mut new_f = *f;
19467 new_f.name = "SQLITE_VERSION".to_string();
19468 Ok(Expression::Function(Box::new(new_f)))
19469 }
19470 _ => {
19471 let mut new_f = *f;
19472 new_f.name = "VERSION".to_string();
19473 Ok(Expression::Function(Box::new(new_f)))
19474 }
19475 },
19476 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
19477 "ARRAY_REVERSE" => match target {
19478 DialectType::ClickHouse => {
19479 let mut new_f = *f;
19480 new_f.name = "arrayReverse".to_string();
19481 Ok(Expression::Function(Box::new(new_f)))
19482 }
19483 _ => Ok(Expression::Function(f)),
19484 },
19485 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
19486 "GENERATE_DATE_ARRAY" => {
19487 let mut args = f.args;
19488 if matches!(target, DialectType::BigQuery) {
19489 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
19490 if args.len() == 2 {
19491 let default_interval = Expression::Interval(Box::new(
19492 crate::expressions::Interval {
19493 this: Some(Expression::Literal(Box::new(Literal::String(
19494 "1".to_string(),
19495 )))),
19496 unit: Some(
19497 crate::expressions::IntervalUnitSpec::Simple {
19498 unit: crate::expressions::IntervalUnit::Day,
19499 use_plural: false,
19500 },
19501 ),
19502 },
19503 ));
19504 args.push(default_interval);
19505 }
19506 Ok(Expression::Function(Box::new(Function::new(
19507 "GENERATE_DATE_ARRAY".to_string(),
19508 args,
19509 ))))
19510 } else if matches!(target, DialectType::DuckDB) {
19511 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
19512 let start = args.get(0).cloned();
19513 let end = args.get(1).cloned();
19514 let step = args.get(2).cloned().or_else(|| {
19515 Some(Expression::Interval(Box::new(
19516 crate::expressions::Interval {
19517 this: Some(Expression::Literal(Box::new(Literal::String(
19518 "1".to_string(),
19519 )))),
19520 unit: Some(
19521 crate::expressions::IntervalUnitSpec::Simple {
19522 unit: crate::expressions::IntervalUnit::Day,
19523 use_plural: false,
19524 },
19525 ),
19526 },
19527 )))
19528 });
19529 let gen_series = Expression::GenerateSeries(Box::new(
19530 crate::expressions::GenerateSeries {
19531 start: start.map(Box::new),
19532 end: end.map(Box::new),
19533 step: step.map(Box::new),
19534 is_end_exclusive: None,
19535 },
19536 ));
19537 Ok(Expression::Cast(Box::new(Cast {
19538 this: gen_series,
19539 to: DataType::Array {
19540 element_type: Box::new(DataType::Date),
19541 dimension: None,
19542 },
19543 trailing_comments: vec![],
19544 double_colon_syntax: false,
19545 format: None,
19546 default: None,
19547 inferred_type: None,
19548 })))
19549 } else if matches!(
19550 target,
19551 DialectType::Presto | DialectType::Trino | DialectType::Athena
19552 ) {
19553 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
19554 let start = args.get(0).cloned();
19555 let end = args.get(1).cloned();
19556 let step = args.get(2).cloned().or_else(|| {
19557 Some(Expression::Interval(Box::new(
19558 crate::expressions::Interval {
19559 this: Some(Expression::Literal(Box::new(Literal::String(
19560 "1".to_string(),
19561 )))),
19562 unit: Some(
19563 crate::expressions::IntervalUnitSpec::Simple {
19564 unit: crate::expressions::IntervalUnit::Day,
19565 use_plural: false,
19566 },
19567 ),
19568 },
19569 )))
19570 });
19571 let gen_series = Expression::GenerateSeries(Box::new(
19572 crate::expressions::GenerateSeries {
19573 start: start.map(Box::new),
19574 end: end.map(Box::new),
19575 step: step.map(Box::new),
19576 is_end_exclusive: None,
19577 },
19578 ));
19579 Ok(gen_series)
19580 } else if matches!(
19581 target,
19582 DialectType::Spark | DialectType::Databricks
19583 ) {
19584 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
19585 let start = args.get(0).cloned();
19586 let end = args.get(1).cloned();
19587 let step = args.get(2).cloned().or_else(|| {
19588 Some(Expression::Interval(Box::new(
19589 crate::expressions::Interval {
19590 this: Some(Expression::Literal(Box::new(Literal::String(
19591 "1".to_string(),
19592 )))),
19593 unit: Some(
19594 crate::expressions::IntervalUnitSpec::Simple {
19595 unit: crate::expressions::IntervalUnit::Day,
19596 use_plural: false,
19597 },
19598 ),
19599 },
19600 )))
19601 });
19602 let gen_series = Expression::GenerateSeries(Box::new(
19603 crate::expressions::GenerateSeries {
19604 start: start.map(Box::new),
19605 end: end.map(Box::new),
19606 step: step.map(Box::new),
19607 is_end_exclusive: None,
19608 },
19609 ));
19610 Ok(gen_series)
19611 } else if matches!(target, DialectType::Snowflake) {
19612 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
19613 if args.len() == 2 {
19614 let default_interval = Expression::Interval(Box::new(
19615 crate::expressions::Interval {
19616 this: Some(Expression::Literal(Box::new(Literal::String(
19617 "1".to_string(),
19618 )))),
19619 unit: Some(
19620 crate::expressions::IntervalUnitSpec::Simple {
19621 unit: crate::expressions::IntervalUnit::Day,
19622 use_plural: false,
19623 },
19624 ),
19625 },
19626 ));
19627 args.push(default_interval);
19628 }
19629 Ok(Expression::Function(Box::new(Function::new(
19630 "GENERATE_DATE_ARRAY".to_string(),
19631 args,
19632 ))))
19633 } else if matches!(
19634 target,
19635 DialectType::MySQL
19636 | DialectType::TSQL
19637 | DialectType::Fabric
19638 | DialectType::Redshift
19639 ) {
19640 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
19641 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
19642 Ok(Expression::Function(Box::new(Function::new(
19643 "GENERATE_DATE_ARRAY".to_string(),
19644 args,
19645 ))))
19646 } else {
19647 // PostgreSQL/others: convert to GenerateSeries
19648 let start = args.get(0).cloned();
19649 let end = args.get(1).cloned();
19650 let step = args.get(2).cloned().or_else(|| {
19651 Some(Expression::Interval(Box::new(
19652 crate::expressions::Interval {
19653 this: Some(Expression::Literal(Box::new(Literal::String(
19654 "1".to_string(),
19655 )))),
19656 unit: Some(
19657 crate::expressions::IntervalUnitSpec::Simple {
19658 unit: crate::expressions::IntervalUnit::Day,
19659 use_plural: false,
19660 },
19661 ),
19662 },
19663 )))
19664 });
19665 Ok(Expression::GenerateSeries(Box::new(
19666 crate::expressions::GenerateSeries {
19667 start: start.map(Box::new),
19668 end: end.map(Box::new),
19669 step: step.map(Box::new),
19670 is_end_exclusive: None,
19671 },
19672 )))
19673 }
19674 }
19675 // ARRAYS_OVERLAP(arr1, arr2) from Snowflake -> DuckDB:
19676 // (arr1 && arr2) OR (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
19677 "ARRAYS_OVERLAP"
19678 if f.args.len() == 2
19679 && matches!(source, DialectType::Snowflake)
19680 && matches!(target, DialectType::DuckDB) =>
19681 {
19682 let mut args = f.args;
19683 let arr1 = args.remove(0);
19684 let arr2 = args.remove(0);
19685
19686 // (arr1 && arr2)
19687 let overlap = Expression::Paren(Box::new(Paren {
19688 this: Expression::ArrayOverlaps(Box::new(BinaryOp {
19689 left: arr1.clone(),
19690 right: arr2.clone(),
19691 left_comments: vec![],
19692 operator_comments: vec![],
19693 trailing_comments: vec![],
19694 inferred_type: None,
19695 })),
19696 trailing_comments: vec![],
19697 }));
19698
19699 // ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1)
19700 let arr1_has_null = Expression::Neq(Box::new(BinaryOp {
19701 left: Expression::Function(Box::new(Function::new(
19702 "ARRAY_LENGTH".to_string(),
19703 vec![arr1.clone()],
19704 ))),
19705 right: Expression::Function(Box::new(Function::new(
19706 "LIST_COUNT".to_string(),
19707 vec![arr1],
19708 ))),
19709 left_comments: vec![],
19710 operator_comments: vec![],
19711 trailing_comments: vec![],
19712 inferred_type: None,
19713 }));
19714
19715 // ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2)
19716 let arr2_has_null = Expression::Neq(Box::new(BinaryOp {
19717 left: Expression::Function(Box::new(Function::new(
19718 "ARRAY_LENGTH".to_string(),
19719 vec![arr2.clone()],
19720 ))),
19721 right: Expression::Function(Box::new(Function::new(
19722 "LIST_COUNT".to_string(),
19723 vec![arr2],
19724 ))),
19725 left_comments: vec![],
19726 operator_comments: vec![],
19727 trailing_comments: vec![],
19728 inferred_type: None,
19729 }));
19730
19731 // (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
19732 let null_check = Expression::Paren(Box::new(Paren {
19733 this: Expression::And(Box::new(BinaryOp {
19734 left: arr1_has_null,
19735 right: arr2_has_null,
19736 left_comments: vec![],
19737 operator_comments: vec![],
19738 trailing_comments: vec![],
19739 inferred_type: None,
19740 })),
19741 trailing_comments: vec![],
19742 }));
19743
19744 // (arr1 && arr2) OR (null_check)
19745 Ok(Expression::Or(Box::new(BinaryOp {
19746 left: overlap,
19747 right: null_check,
19748 left_comments: vec![],
19749 operator_comments: vec![],
19750 trailing_comments: vec![],
19751 inferred_type: None,
19752 })))
19753 }
19754 // ARRAY_INTERSECTION([1, 2], [2, 3]) from Snowflake -> DuckDB:
19755 // Bag semantics using LIST_TRANSFORM/LIST_FILTER with GENERATE_SERIES
19756 "ARRAY_INTERSECTION"
19757 if f.args.len() == 2
19758 && matches!(source, DialectType::Snowflake)
19759 && matches!(target, DialectType::DuckDB) =>
19760 {
19761 let mut args = f.args;
19762 let arr1 = args.remove(0);
19763 let arr2 = args.remove(0);
19764
19765 // Build: arr1 IS NULL
19766 let arr1_is_null = Expression::IsNull(Box::new(IsNull {
19767 this: arr1.clone(),
19768 not: false,
19769 postfix_form: false,
19770 }));
19771 let arr2_is_null = Expression::IsNull(Box::new(IsNull {
19772 this: arr2.clone(),
19773 not: false,
19774 postfix_form: false,
19775 }));
19776 let null_check = Expression::Or(Box::new(BinaryOp {
19777 left: arr1_is_null,
19778 right: arr2_is_null,
19779 left_comments: vec![],
19780 operator_comments: vec![],
19781 trailing_comments: vec![],
19782 inferred_type: None,
19783 }));
19784
19785 // GENERATE_SERIES(1, LENGTH(arr1))
19786 let gen_series = Expression::Function(Box::new(Function::new(
19787 "GENERATE_SERIES".to_string(),
19788 vec![
19789 Expression::number(1),
19790 Expression::Function(Box::new(Function::new(
19791 "LENGTH".to_string(),
19792 vec![arr1.clone()],
19793 ))),
19794 ],
19795 )));
19796
19797 // LIST_ZIP(arr1, GENERATE_SERIES(1, LENGTH(arr1)))
19798 let list_zip = Expression::Function(Box::new(Function::new(
19799 "LIST_ZIP".to_string(),
19800 vec![arr1.clone(), gen_series],
19801 )));
19802
19803 // pair[1] and pair[2]
19804 let pair_col = Expression::column("pair");
19805 let pair_1 = Expression::Subscript(Box::new(crate::expressions::Subscript {
19806 this: pair_col.clone(),
19807 index: Expression::number(1),
19808 }));
19809 let pair_2 = Expression::Subscript(Box::new(crate::expressions::Subscript {
19810 this: pair_col.clone(),
19811 index: Expression::number(2),
19812 }));
19813
19814 // arr1[1:pair[2]]
19815 let arr1_slice = Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
19816 this: arr1.clone(),
19817 start: Some(Expression::number(1)),
19818 end: Some(pair_2),
19819 }));
19820
19821 // e IS NOT DISTINCT FROM pair[1]
19822 let e_col = Expression::column("e");
19823 let is_not_distinct = Expression::NullSafeEq(Box::new(BinaryOp {
19824 left: e_col.clone(),
19825 right: pair_1.clone(),
19826 left_comments: vec![],
19827 operator_comments: vec![],
19828 trailing_comments: vec![],
19829 inferred_type: None,
19830 }));
19831
19832 // e -> e IS NOT DISTINCT FROM pair[1]
19833 let inner_lambda1 = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
19834 parameters: vec![crate::expressions::Identifier::new("e")],
19835 body: is_not_distinct,
19836 colon: false,
19837 parameter_types: vec![],
19838 }));
19839
19840 // LIST_FILTER(arr1[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
19841 let inner_filter1 = Expression::Function(Box::new(Function::new(
19842 "LIST_FILTER".to_string(),
19843 vec![arr1_slice, inner_lambda1],
19844 )));
19845
19846 // LENGTH(LIST_FILTER(arr1[1:pair[2]], ...))
19847 let len1 = Expression::Function(Box::new(Function::new(
19848 "LENGTH".to_string(),
19849 vec![inner_filter1],
19850 )));
19851
19852 // e -> e IS NOT DISTINCT FROM pair[1]
19853 let inner_lambda2 = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
19854 parameters: vec![crate::expressions::Identifier::new("e")],
19855 body: Expression::NullSafeEq(Box::new(BinaryOp {
19856 left: e_col,
19857 right: pair_1.clone(),
19858 left_comments: vec![],
19859 operator_comments: vec![],
19860 trailing_comments: vec![],
19861 inferred_type: None,
19862 })),
19863 colon: false,
19864 parameter_types: vec![],
19865 }));
19866
19867 // LIST_FILTER(arr2, e -> e IS NOT DISTINCT FROM pair[1])
19868 let inner_filter2 = Expression::Function(Box::new(Function::new(
19869 "LIST_FILTER".to_string(),
19870 vec![arr2.clone(), inner_lambda2],
19871 )));
19872
19873 // LENGTH(LIST_FILTER(arr2, ...))
19874 let len2 = Expression::Function(Box::new(Function::new(
19875 "LENGTH".to_string(),
19876 vec![inner_filter2],
19877 )));
19878
19879 // LENGTH(...) <= LENGTH(...)
19880 let cond = Expression::Paren(Box::new(Paren {
19881 this: Expression::Lte(Box::new(BinaryOp {
19882 left: len1,
19883 right: len2,
19884 left_comments: vec![],
19885 operator_comments: vec![],
19886 trailing_comments: vec![],
19887 inferred_type: None,
19888 })),
19889 trailing_comments: vec![],
19890 }));
19891
19892 // pair -> (condition)
19893 let filter_lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
19894 parameters: vec![crate::expressions::Identifier::new("pair")],
19895 body: cond,
19896 colon: false,
19897 parameter_types: vec![],
19898 }));
19899
19900 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
19901 let outer_filter = Expression::Function(Box::new(Function::new(
19902 "LIST_FILTER".to_string(),
19903 vec![list_zip, filter_lambda],
19904 )));
19905
19906 // pair -> pair[1]
19907 let transform_lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
19908 parameters: vec![crate::expressions::Identifier::new("pair")],
19909 body: pair_1,
19910 colon: false,
19911 parameter_types: vec![],
19912 }));
19913
19914 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
19915 let list_transform = Expression::Function(Box::new(Function::new(
19916 "LIST_TRANSFORM".to_string(),
19917 vec![outer_filter, transform_lambda],
19918 )));
19919
19920 // CASE WHEN arr1 IS NULL OR arr2 IS NULL THEN NULL
19921 // ELSE LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
19922 // END
19923 Ok(Expression::Case(Box::new(Case {
19924 operand: None,
19925 whens: vec![(null_check, Expression::Null(Null))],
19926 else_: Some(list_transform),
19927 comments: vec![],
19928 inferred_type: None,
19929 })))
19930 }
19931 // ARRAY_CONSTRUCT(args) -> Expression::Array for all targets
19932 "ARRAY_CONSTRUCT" => {
19933 Ok(Expression::Array(Box::new(crate::expressions::Array {
19934 expressions: f.args,
19935 })))
19936 }
19937 // ARRAY(args) function -> Expression::Array for DuckDB/Snowflake/Presto/Trino/Athena
19938 "ARRAY" if !f.args.iter().any(|a| matches!(a, Expression::Select(_) | Expression::Subquery(_))) => {
19939 match target {
19940 DialectType::DuckDB
19941 | DialectType::Snowflake
19942 | DialectType::Presto
19943 | DialectType::Trino
19944 | DialectType::Athena => {
19945 Ok(Expression::Array(Box::new(crate::expressions::Array {
19946 expressions: f.args,
19947 })))
19948 }
19949 _ => Ok(Expression::Function(f)),
19950 }
19951 }
19952 _ => Ok(Expression::Function(f)),
19953 }
19954 } else if let Expression::AggregateFunction(mut af) = e {
19955 let name = af.name.to_ascii_uppercase();
19956 match name.as_str() {
19957 "ARBITRARY" if af.args.len() == 1 => {
19958 let arg = af.args.into_iter().next().unwrap();
19959 Ok(convert_arbitrary(arg, target))
19960 }
19961 "JSON_ARRAYAGG" => {
19962 match target {
19963 DialectType::PostgreSQL => {
19964 af.name = "JSON_AGG".to_string();
19965 // Add NULLS FIRST to ORDER BY items for PostgreSQL
19966 for ordered in af.order_by.iter_mut() {
19967 if ordered.nulls_first.is_none() {
19968 ordered.nulls_first = Some(true);
19969 }
19970 }
19971 Ok(Expression::AggregateFunction(af))
19972 }
19973 _ => Ok(Expression::AggregateFunction(af)),
19974 }
19975 }
19976 _ => Ok(Expression::AggregateFunction(af)),
19977 }
19978 } else if let Expression::JSONArrayAgg(ja) = e {
19979 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
19980 match target {
19981 DialectType::PostgreSQL => {
19982 let mut order_by = Vec::new();
19983 if let Some(order_expr) = ja.order {
19984 if let Expression::OrderBy(ob) = *order_expr {
19985 for mut ordered in ob.expressions {
19986 if ordered.nulls_first.is_none() {
19987 ordered.nulls_first = Some(true);
19988 }
19989 order_by.push(ordered);
19990 }
19991 }
19992 }
19993 Ok(Expression::AggregateFunction(Box::new(
19994 crate::expressions::AggregateFunction {
19995 name: "JSON_AGG".to_string(),
19996 args: vec![*ja.this],
19997 distinct: false,
19998 filter: None,
19999 order_by,
20000 limit: None,
20001 ignore_nulls: None,
20002 inferred_type: None,
20003 },
20004 )))
20005 }
20006 _ => Ok(Expression::JSONArrayAgg(ja)),
20007 }
20008 } else if let Expression::ToNumber(tn) = e {
20009 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
20010 let arg = *tn.this;
20011 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
20012 this: arg,
20013 to: crate::expressions::DataType::Double {
20014 precision: None,
20015 scale: None,
20016 },
20017 double_colon_syntax: false,
20018 trailing_comments: Vec::new(),
20019 format: None,
20020 default: None,
20021 inferred_type: None,
20022 })))
20023 } else {
20024 Ok(e)
20025 }
20026 }
20027
20028 Action::RegexpLikeToDuckDB => {
20029 if let Expression::RegexpLike(f) = e {
20030 let mut args = vec![f.this, f.pattern];
20031 if let Some(flags) = f.flags {
20032 args.push(flags);
20033 }
20034 Ok(Expression::Function(Box::new(Function::new(
20035 "REGEXP_MATCHES".to_string(),
20036 args,
20037 ))))
20038 } else {
20039 Ok(e)
20040 }
20041 }
20042 Action::EpochConvert => {
20043 if let Expression::Epoch(f) = e {
20044 let arg = f.this;
20045 let name = match target {
20046 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
20047 "UNIX_TIMESTAMP"
20048 }
20049 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
20050 DialectType::BigQuery => "TIME_TO_UNIX",
20051 _ => "EPOCH",
20052 };
20053 Ok(Expression::Function(Box::new(Function::new(
20054 name.to_string(),
20055 vec![arg],
20056 ))))
20057 } else {
20058 Ok(e)
20059 }
20060 }
20061 Action::EpochMsConvert => {
20062 use crate::expressions::{BinaryOp, Cast};
20063 if let Expression::EpochMs(f) = e {
20064 let arg = f.this;
20065 match target {
20066 DialectType::Spark | DialectType::Databricks => {
20067 Ok(Expression::Function(Box::new(Function::new(
20068 "TIMESTAMP_MILLIS".to_string(),
20069 vec![arg],
20070 ))))
20071 }
20072 DialectType::BigQuery => Ok(Expression::Function(Box::new(
20073 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
20074 ))),
20075 DialectType::Presto | DialectType::Trino => {
20076 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
20077 let cast_arg = Expression::Cast(Box::new(Cast {
20078 this: arg,
20079 to: DataType::Double {
20080 precision: None,
20081 scale: None,
20082 },
20083 trailing_comments: Vec::new(),
20084 double_colon_syntax: false,
20085 format: None,
20086 default: None,
20087 inferred_type: None,
20088 }));
20089 let div = Expression::Div(Box::new(BinaryOp::new(
20090 cast_arg,
20091 Expression::Function(Box::new(Function::new(
20092 "POW".to_string(),
20093 vec![Expression::number(10), Expression::number(3)],
20094 ))),
20095 )));
20096 Ok(Expression::Function(Box::new(Function::new(
20097 "FROM_UNIXTIME".to_string(),
20098 vec![div],
20099 ))))
20100 }
20101 DialectType::MySQL => {
20102 // FROM_UNIXTIME(x / POWER(10, 3))
20103 let div = Expression::Div(Box::new(BinaryOp::new(
20104 arg,
20105 Expression::Function(Box::new(Function::new(
20106 "POWER".to_string(),
20107 vec![Expression::number(10), Expression::number(3)],
20108 ))),
20109 )));
20110 Ok(Expression::Function(Box::new(Function::new(
20111 "FROM_UNIXTIME".to_string(),
20112 vec![div],
20113 ))))
20114 }
20115 DialectType::PostgreSQL | DialectType::Redshift => {
20116 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
20117 let cast_arg = Expression::Cast(Box::new(Cast {
20118 this: arg,
20119 to: DataType::Custom {
20120 name: "DOUBLE PRECISION".to_string(),
20121 },
20122 trailing_comments: Vec::new(),
20123 double_colon_syntax: false,
20124 format: None,
20125 default: None,
20126 inferred_type: None,
20127 }));
20128 let div = Expression::Div(Box::new(BinaryOp::new(
20129 cast_arg,
20130 Expression::Function(Box::new(Function::new(
20131 "POWER".to_string(),
20132 vec![Expression::number(10), Expression::number(3)],
20133 ))),
20134 )));
20135 Ok(Expression::Function(Box::new(Function::new(
20136 "TO_TIMESTAMP".to_string(),
20137 vec![div],
20138 ))))
20139 }
20140 DialectType::ClickHouse => {
20141 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
20142 let cast_arg = Expression::Cast(Box::new(Cast {
20143 this: arg,
20144 to: DataType::Nullable {
20145 inner: Box::new(DataType::BigInt { length: None }),
20146 },
20147 trailing_comments: Vec::new(),
20148 double_colon_syntax: false,
20149 format: None,
20150 default: None,
20151 inferred_type: None,
20152 }));
20153 Ok(Expression::Function(Box::new(Function::new(
20154 "fromUnixTimestamp64Milli".to_string(),
20155 vec![cast_arg],
20156 ))))
20157 }
20158 _ => Ok(Expression::Function(Box::new(Function::new(
20159 "EPOCH_MS".to_string(),
20160 vec![arg],
20161 )))),
20162 }
20163 } else {
20164 Ok(e)
20165 }
20166 }
20167 Action::TSQLTypeNormalize => {
20168 if let Expression::DataType(dt) = e {
20169 let new_dt = match &dt {
20170 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
20171 DataType::Decimal {
20172 precision: Some(15),
20173 scale: Some(4),
20174 }
20175 }
20176 DataType::Custom { name }
20177 if name.eq_ignore_ascii_case("SMALLMONEY") =>
20178 {
20179 DataType::Decimal {
20180 precision: Some(6),
20181 scale: Some(4),
20182 }
20183 }
20184 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
20185 DataType::Timestamp {
20186 timezone: false,
20187 precision: None,
20188 }
20189 }
20190 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
20191 DataType::Float {
20192 precision: None,
20193 scale: None,
20194 real_spelling: false,
20195 }
20196 }
20197 DataType::Float {
20198 real_spelling: true,
20199 ..
20200 } => DataType::Float {
20201 precision: None,
20202 scale: None,
20203 real_spelling: false,
20204 },
20205 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
20206 DataType::Custom {
20207 name: "BLOB".to_string(),
20208 }
20209 }
20210 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
20211 DataType::Boolean
20212 }
20213 DataType::Custom { name }
20214 if name.eq_ignore_ascii_case("ROWVERSION") =>
20215 {
20216 DataType::Custom {
20217 name: "BINARY".to_string(),
20218 }
20219 }
20220 DataType::Custom { name }
20221 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
20222 {
20223 match target {
20224 DialectType::Spark
20225 | DialectType::Databricks
20226 | DialectType::Hive => DataType::Custom {
20227 name: "STRING".to_string(),
20228 },
20229 _ => DataType::VarChar {
20230 length: Some(36),
20231 parenthesized_length: true,
20232 },
20233 }
20234 }
20235 DataType::Custom { name }
20236 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
20237 {
20238 match target {
20239 DialectType::Spark
20240 | DialectType::Databricks
20241 | DialectType::Hive => DataType::Timestamp {
20242 timezone: false,
20243 precision: None,
20244 },
20245 _ => DataType::Timestamp {
20246 timezone: true,
20247 precision: None,
20248 },
20249 }
20250 }
20251 DataType::Custom { ref name }
20252 if name.len() >= 10 && name[..10].eq_ignore_ascii_case("DATETIME2(") =>
20253 {
20254 // DATETIME2(n) -> TIMESTAMP
20255 DataType::Timestamp {
20256 timezone: false,
20257 precision: None,
20258 }
20259 }
20260 DataType::Custom { ref name }
20261 if name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME(") =>
20262 {
20263 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
20264 match target {
20265 DialectType::Spark
20266 | DialectType::Databricks
20267 | DialectType::Hive => DataType::Timestamp {
20268 timezone: false,
20269 precision: None,
20270 },
20271 _ => return Ok(Expression::DataType(dt)),
20272 }
20273 }
20274 DataType::Custom { ref name }
20275 if name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC") =>
20276 {
20277 // Parse NUMERIC(p,s) back to Decimal(p,s)
20278 let upper = name.to_ascii_uppercase();
20279 if let Some(inner) = upper
20280 .strip_prefix("NUMERIC(")
20281 .and_then(|s| s.strip_suffix(')'))
20282 {
20283 let parts: Vec<&str> = inner.split(',').collect();
20284 let precision =
20285 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
20286 let scale =
20287 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
20288 DataType::Decimal { precision, scale }
20289 } else if upper == "NUMERIC" {
20290 DataType::Decimal {
20291 precision: None,
20292 scale: None,
20293 }
20294 } else {
20295 return Ok(Expression::DataType(dt));
20296 }
20297 }
20298 DataType::Float {
20299 precision: Some(p), ..
20300 } => {
20301 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
20302 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
20303 let boundary = match target {
20304 DialectType::Hive
20305 | DialectType::Spark
20306 | DialectType::Databricks => 32,
20307 _ => 24,
20308 };
20309 if *p <= boundary {
20310 DataType::Float {
20311 precision: None,
20312 scale: None,
20313 real_spelling: false,
20314 }
20315 } else {
20316 DataType::Double {
20317 precision: None,
20318 scale: None,
20319 }
20320 }
20321 }
20322 DataType::TinyInt { .. } => match target {
20323 DialectType::DuckDB => DataType::Custom {
20324 name: "UTINYINT".to_string(),
20325 },
20326 DialectType::Hive
20327 | DialectType::Spark
20328 | DialectType::Databricks => DataType::SmallInt { length: None },
20329 _ => return Ok(Expression::DataType(dt)),
20330 },
20331 // INTEGER -> INT for Spark/Databricks
20332 DataType::Int {
20333 length,
20334 integer_spelling: true,
20335 } => DataType::Int {
20336 length: *length,
20337 integer_spelling: false,
20338 },
20339 _ => return Ok(Expression::DataType(dt)),
20340 };
20341 Ok(Expression::DataType(new_dt))
20342 } else {
20343 Ok(e)
20344 }
20345 }
20346 Action::MySQLSafeDivide => {
20347 use crate::expressions::{BinaryOp, Cast};
20348 if let Expression::Div(op) = e {
20349 let left = op.left;
20350 let right = op.right;
20351 // For SQLite: CAST left as REAL but NO NULLIF wrapping
20352 if matches!(target, DialectType::SQLite) {
20353 let new_left = Expression::Cast(Box::new(Cast {
20354 this: left,
20355 to: DataType::Float {
20356 precision: None,
20357 scale: None,
20358 real_spelling: true,
20359 },
20360 trailing_comments: Vec::new(),
20361 double_colon_syntax: false,
20362 format: None,
20363 default: None,
20364 inferred_type: None,
20365 }));
20366 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
20367 }
20368 // Wrap right in NULLIF(right, 0)
20369 let nullif_right = Expression::Function(Box::new(Function::new(
20370 "NULLIF".to_string(),
20371 vec![right, Expression::number(0)],
20372 )));
20373 // For some dialects, also CAST the left side
20374 let new_left = match target {
20375 DialectType::PostgreSQL
20376 | DialectType::Redshift
20377 | DialectType::Teradata
20378 | DialectType::Materialize
20379 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
20380 this: left,
20381 to: DataType::Custom {
20382 name: "DOUBLE PRECISION".to_string(),
20383 },
20384 trailing_comments: Vec::new(),
20385 double_colon_syntax: false,
20386 format: None,
20387 default: None,
20388 inferred_type: None,
20389 })),
20390 DialectType::Drill
20391 | DialectType::Trino
20392 | DialectType::Presto
20393 | DialectType::Athena => Expression::Cast(Box::new(Cast {
20394 this: left,
20395 to: DataType::Double {
20396 precision: None,
20397 scale: None,
20398 },
20399 trailing_comments: Vec::new(),
20400 double_colon_syntax: false,
20401 format: None,
20402 default: None,
20403 inferred_type: None,
20404 })),
20405 DialectType::TSQL => Expression::Cast(Box::new(Cast {
20406 this: left,
20407 to: DataType::Float {
20408 precision: None,
20409 scale: None,
20410 real_spelling: false,
20411 },
20412 trailing_comments: Vec::new(),
20413 double_colon_syntax: false,
20414 format: None,
20415 default: None,
20416 inferred_type: None,
20417 })),
20418 _ => left,
20419 };
20420 Ok(Expression::Div(Box::new(BinaryOp::new(
20421 new_left,
20422 nullif_right,
20423 ))))
20424 } else {
20425 Ok(e)
20426 }
20427 }
20428 Action::AlterTableRenameStripSchema => {
20429 if let Expression::AlterTable(mut at) = e {
20430 if let Some(crate::expressions::AlterTableAction::RenameTable(
20431 ref mut new_tbl,
20432 )) = at.actions.first_mut()
20433 {
20434 new_tbl.schema = None;
20435 new_tbl.catalog = None;
20436 }
20437 Ok(Expression::AlterTable(at))
20438 } else {
20439 Ok(e)
20440 }
20441 }
20442 Action::NullsOrdering => {
20443 // Fill in the source dialect's implied null ordering default.
20444 // This makes implicit null ordering explicit so the target generator
20445 // can correctly strip or keep it.
20446 //
20447 // Dialect null ordering categories:
20448 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
20449 // ASC -> NULLS LAST, DESC -> NULLS FIRST
20450 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
20451 // ASC -> NULLS FIRST, DESC -> NULLS LAST
20452 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
20453 // NULLS LAST always (both ASC and DESC)
20454 if let Expression::Ordered(mut o) = e {
20455 let is_asc = !o.desc;
20456
20457 let is_source_nulls_large = matches!(
20458 source,
20459 DialectType::Oracle
20460 | DialectType::PostgreSQL
20461 | DialectType::Redshift
20462 | DialectType::Snowflake
20463 );
20464 let is_source_nulls_last = matches!(
20465 source,
20466 DialectType::DuckDB
20467 | DialectType::Presto
20468 | DialectType::Trino
20469 | DialectType::Dremio
20470 | DialectType::Athena
20471 | DialectType::ClickHouse
20472 | DialectType::Drill
20473 | DialectType::Exasol
20474 | DialectType::DataFusion
20475 );
20476
20477 // Determine target category to check if default matches
20478 let is_target_nulls_large = matches!(
20479 target,
20480 DialectType::Oracle
20481 | DialectType::PostgreSQL
20482 | DialectType::Redshift
20483 | DialectType::Snowflake
20484 );
20485 let is_target_nulls_last = matches!(
20486 target,
20487 DialectType::DuckDB
20488 | DialectType::Presto
20489 | DialectType::Trino
20490 | DialectType::Dremio
20491 | DialectType::Athena
20492 | DialectType::ClickHouse
20493 | DialectType::Drill
20494 | DialectType::Exasol
20495 | DialectType::DataFusion
20496 );
20497
20498 // Compute the implied nulls_first for source
20499 let source_nulls_first = if is_source_nulls_large {
20500 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
20501 } else if is_source_nulls_last {
20502 false // NULLS LAST always
20503 } else {
20504 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
20505 };
20506
20507 // Compute the target's default
20508 let target_nulls_first = if is_target_nulls_large {
20509 !is_asc
20510 } else if is_target_nulls_last {
20511 false
20512 } else {
20513 is_asc
20514 };
20515
20516 // Only add explicit nulls ordering if source and target defaults differ
20517 if source_nulls_first != target_nulls_first {
20518 o.nulls_first = Some(source_nulls_first);
20519 }
20520 // If they match, leave nulls_first as None so the generator won't output it
20521
20522 Ok(Expression::Ordered(o))
20523 } else {
20524 Ok(e)
20525 }
20526 }
20527 Action::StringAggConvert => {
20528 match e {
20529 Expression::WithinGroup(wg) => {
20530 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
20531 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
20532 let (x_opt, sep_opt, distinct) = match wg.this {
20533 Expression::AggregateFunction(ref af)
20534 if af.name.eq_ignore_ascii_case("STRING_AGG")
20535 && af.args.len() >= 2 =>
20536 {
20537 (
20538 Some(af.args[0].clone()),
20539 Some(af.args[1].clone()),
20540 af.distinct,
20541 )
20542 }
20543 Expression::Function(ref f)
20544 if f.name.eq_ignore_ascii_case("STRING_AGG")
20545 && f.args.len() >= 2 =>
20546 {
20547 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
20548 }
20549 Expression::StringAgg(ref sa) => {
20550 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
20551 }
20552 _ => (None, None, false),
20553 };
20554 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
20555 let order_by = wg.order_by;
20556
20557 match target {
20558 DialectType::TSQL | DialectType::Fabric => {
20559 // Keep as WithinGroup(StringAgg) for TSQL
20560 Ok(Expression::WithinGroup(Box::new(
20561 crate::expressions::WithinGroup {
20562 this: Expression::StringAgg(Box::new(
20563 crate::expressions::StringAggFunc {
20564 this: x,
20565 separator: Some(sep),
20566 order_by: None, // order_by goes in WithinGroup, not StringAgg
20567 distinct,
20568 filter: None,
20569 limit: None,
20570 inferred_type: None,
20571 },
20572 )),
20573 order_by,
20574 },
20575 )))
20576 }
20577 DialectType::MySQL
20578 | DialectType::SingleStore
20579 | DialectType::Doris
20580 | DialectType::StarRocks => {
20581 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
20582 Ok(Expression::GroupConcat(Box::new(
20583 crate::expressions::GroupConcatFunc {
20584 this: x,
20585 separator: Some(sep),
20586 order_by: Some(order_by),
20587 distinct,
20588 filter: None,
20589 inferred_type: None,
20590 },
20591 )))
20592 }
20593 DialectType::SQLite => {
20594 // GROUP_CONCAT(x, sep) - no ORDER BY support
20595 Ok(Expression::GroupConcat(Box::new(
20596 crate::expressions::GroupConcatFunc {
20597 this: x,
20598 separator: Some(sep),
20599 order_by: None,
20600 distinct,
20601 filter: None,
20602 inferred_type: None,
20603 },
20604 )))
20605 }
20606 DialectType::PostgreSQL | DialectType::Redshift => {
20607 // STRING_AGG(x, sep ORDER BY z)
20608 Ok(Expression::StringAgg(Box::new(
20609 crate::expressions::StringAggFunc {
20610 this: x,
20611 separator: Some(sep),
20612 order_by: Some(order_by),
20613 distinct,
20614 filter: None,
20615 limit: None,
20616 inferred_type: None,
20617 },
20618 )))
20619 }
20620 _ => {
20621 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
20622 Ok(Expression::StringAgg(Box::new(
20623 crate::expressions::StringAggFunc {
20624 this: x,
20625 separator: Some(sep),
20626 order_by: Some(order_by),
20627 distinct,
20628 filter: None,
20629 limit: None,
20630 inferred_type: None,
20631 },
20632 )))
20633 }
20634 }
20635 } else {
20636 Ok(Expression::WithinGroup(wg))
20637 }
20638 }
20639 Expression::StringAgg(sa) => {
20640 match target {
20641 DialectType::MySQL
20642 | DialectType::SingleStore
20643 | DialectType::Doris
20644 | DialectType::StarRocks => {
20645 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
20646 Ok(Expression::GroupConcat(Box::new(
20647 crate::expressions::GroupConcatFunc {
20648 this: sa.this,
20649 separator: sa.separator,
20650 order_by: sa.order_by,
20651 distinct: sa.distinct,
20652 filter: sa.filter,
20653 inferred_type: None,
20654 },
20655 )))
20656 }
20657 DialectType::SQLite => {
20658 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
20659 Ok(Expression::GroupConcat(Box::new(
20660 crate::expressions::GroupConcatFunc {
20661 this: sa.this,
20662 separator: sa.separator,
20663 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
20664 distinct: sa.distinct,
20665 filter: sa.filter,
20666 inferred_type: None,
20667 },
20668 )))
20669 }
20670 DialectType::Spark | DialectType::Databricks => {
20671 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
20672 Ok(Expression::ListAgg(Box::new(
20673 crate::expressions::ListAggFunc {
20674 this: sa.this,
20675 separator: sa.separator,
20676 on_overflow: None,
20677 order_by: sa.order_by,
20678 distinct: sa.distinct,
20679 filter: None,
20680 inferred_type: None,
20681 },
20682 )))
20683 }
20684 _ => Ok(Expression::StringAgg(sa)),
20685 }
20686 }
20687 _ => Ok(e),
20688 }
20689 }
20690 Action::GroupConcatConvert => {
20691 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
20692 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
20693 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
20694 if let Expression::Function(ref f) = expr {
20695 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
20696 let mut result = f.args[0].clone();
20697 for arg in &f.args[1..] {
20698 result = Expression::Concat(Box::new(BinaryOp {
20699 left: result,
20700 right: arg.clone(),
20701 left_comments: vec![],
20702 operator_comments: vec![],
20703 trailing_comments: vec![],
20704 inferred_type: None,
20705 }));
20706 }
20707 return result;
20708 }
20709 }
20710 expr
20711 }
20712 fn expand_concat_to_plus(expr: Expression) -> Expression {
20713 if let Expression::Function(ref f) = expr {
20714 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
20715 let mut result = f.args[0].clone();
20716 for arg in &f.args[1..] {
20717 result = Expression::Add(Box::new(BinaryOp {
20718 left: result,
20719 right: arg.clone(),
20720 left_comments: vec![],
20721 operator_comments: vec![],
20722 trailing_comments: vec![],
20723 inferred_type: None,
20724 }));
20725 }
20726 return result;
20727 }
20728 }
20729 expr
20730 }
20731 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
20732 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
20733 if let Expression::Function(ref f) = expr {
20734 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
20735 let new_args: Vec<Expression> = f
20736 .args
20737 .iter()
20738 .map(|arg| {
20739 Expression::Cast(Box::new(crate::expressions::Cast {
20740 this: arg.clone(),
20741 to: crate::expressions::DataType::VarChar {
20742 length: None,
20743 parenthesized_length: false,
20744 },
20745 trailing_comments: Vec::new(),
20746 double_colon_syntax: false,
20747 format: None,
20748 default: None,
20749 inferred_type: None,
20750 }))
20751 })
20752 .collect();
20753 return Expression::Function(Box::new(
20754 crate::expressions::Function::new(
20755 "CONCAT".to_string(),
20756 new_args,
20757 ),
20758 ));
20759 }
20760 }
20761 expr
20762 }
20763 if let Expression::GroupConcat(gc) = e {
20764 match target {
20765 DialectType::Presto => {
20766 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
20767 let sep = gc.separator.unwrap_or(Expression::string(","));
20768 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
20769 let this = wrap_concat_args_in_varchar_cast(gc.this);
20770 let array_agg =
20771 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
20772 this,
20773 distinct: gc.distinct,
20774 filter: gc.filter,
20775 order_by: gc.order_by.unwrap_or_default(),
20776 name: None,
20777 ignore_nulls: None,
20778 having_max: None,
20779 limit: None,
20780 inferred_type: None,
20781 }));
20782 Ok(Expression::ArrayJoin(Box::new(
20783 crate::expressions::ArrayJoinFunc {
20784 this: array_agg,
20785 separator: sep,
20786 null_replacement: None,
20787 },
20788 )))
20789 }
20790 DialectType::Trino => {
20791 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
20792 let sep = gc.separator.unwrap_or(Expression::string(","));
20793 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
20794 let this = wrap_concat_args_in_varchar_cast(gc.this);
20795 Ok(Expression::ListAgg(Box::new(
20796 crate::expressions::ListAggFunc {
20797 this,
20798 separator: Some(sep),
20799 on_overflow: None,
20800 order_by: gc.order_by,
20801 distinct: gc.distinct,
20802 filter: gc.filter,
20803 inferred_type: None,
20804 },
20805 )))
20806 }
20807 DialectType::PostgreSQL
20808 | DialectType::Redshift
20809 | DialectType::Snowflake
20810 | DialectType::DuckDB
20811 | DialectType::Hive
20812 | DialectType::ClickHouse => {
20813 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
20814 let sep = gc.separator.unwrap_or(Expression::string(","));
20815 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
20816 let this = expand_concat_to_dpipe(gc.this);
20817 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
20818 let order_by = if target == DialectType::PostgreSQL {
20819 gc.order_by.map(|ords| {
20820 ords.into_iter()
20821 .map(|mut o| {
20822 if o.nulls_first.is_none() {
20823 if o.desc {
20824 o.nulls_first = Some(false);
20825 // NULLS LAST
20826 } else {
20827 o.nulls_first = Some(true);
20828 // NULLS FIRST
20829 }
20830 }
20831 o
20832 })
20833 .collect()
20834 })
20835 } else {
20836 gc.order_by
20837 };
20838 Ok(Expression::StringAgg(Box::new(
20839 crate::expressions::StringAggFunc {
20840 this,
20841 separator: Some(sep),
20842 order_by,
20843 distinct: gc.distinct,
20844 filter: gc.filter,
20845 limit: None,
20846 inferred_type: None,
20847 },
20848 )))
20849 }
20850 DialectType::TSQL => {
20851 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
20852 // TSQL doesn't support DISTINCT in STRING_AGG
20853 let sep = gc.separator.unwrap_or(Expression::string(","));
20854 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
20855 let this = expand_concat_to_plus(gc.this);
20856 Ok(Expression::StringAgg(Box::new(
20857 crate::expressions::StringAggFunc {
20858 this,
20859 separator: Some(sep),
20860 order_by: gc.order_by,
20861 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
20862 filter: gc.filter,
20863 limit: None,
20864 inferred_type: None,
20865 },
20866 )))
20867 }
20868 DialectType::SQLite => {
20869 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
20870 // SQLite GROUP_CONCAT doesn't support ORDER BY
20871 // Expand CONCAT(a,b,c) -> a || b || c
20872 let this = expand_concat_to_dpipe(gc.this);
20873 Ok(Expression::GroupConcat(Box::new(
20874 crate::expressions::GroupConcatFunc {
20875 this,
20876 separator: gc.separator,
20877 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
20878 distinct: gc.distinct,
20879 filter: gc.filter,
20880 inferred_type: None,
20881 },
20882 )))
20883 }
20884 DialectType::Spark | DialectType::Databricks => {
20885 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
20886 let sep = gc.separator.unwrap_or(Expression::string(","));
20887 Ok(Expression::ListAgg(Box::new(
20888 crate::expressions::ListAggFunc {
20889 this: gc.this,
20890 separator: Some(sep),
20891 on_overflow: None,
20892 order_by: gc.order_by,
20893 distinct: gc.distinct,
20894 filter: None,
20895 inferred_type: None,
20896 },
20897 )))
20898 }
20899 DialectType::MySQL
20900 | DialectType::SingleStore
20901 | DialectType::StarRocks => {
20902 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
20903 if gc.separator.is_none() {
20904 let mut gc = gc;
20905 gc.separator = Some(Expression::string(","));
20906 Ok(Expression::GroupConcat(gc))
20907 } else {
20908 Ok(Expression::GroupConcat(gc))
20909 }
20910 }
20911 _ => Ok(Expression::GroupConcat(gc)),
20912 }
20913 } else {
20914 Ok(e)
20915 }
20916 }
20917 Action::TempTableHash => {
20918 match e {
20919 Expression::CreateTable(mut ct) => {
20920 // TSQL #table -> TEMPORARY TABLE with # stripped from name
20921 let name = &ct.name.name.name;
20922 if name.starts_with('#') {
20923 ct.name.name.name = name.trim_start_matches('#').to_string();
20924 }
20925 // Set temporary flag
20926 ct.temporary = true;
20927 Ok(Expression::CreateTable(ct))
20928 }
20929 Expression::Table(mut tr) => {
20930 // Strip # from table references
20931 let name = &tr.name.name;
20932 if name.starts_with('#') {
20933 tr.name.name = name.trim_start_matches('#').to_string();
20934 }
20935 Ok(Expression::Table(tr))
20936 }
20937 Expression::DropTable(mut dt) => {
20938 // Strip # from DROP TABLE names
20939 for table_ref in &mut dt.names {
20940 if table_ref.name.name.starts_with('#') {
20941 table_ref.name.name =
20942 table_ref.name.name.trim_start_matches('#').to_string();
20943 }
20944 }
20945 Ok(Expression::DropTable(dt))
20946 }
20947 _ => Ok(e),
20948 }
20949 }
20950 Action::NvlClearOriginal => {
20951 if let Expression::Nvl(mut f) = e {
20952 f.original_name = None;
20953 Ok(Expression::Nvl(f))
20954 } else {
20955 Ok(e)
20956 }
20957 }
20958 Action::HiveCastToTryCast => {
20959 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
20960 if let Expression::Cast(mut c) = e {
20961 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
20962 // (Spark's TIMESTAMP is always timezone-aware)
20963 if matches!(target, DialectType::DuckDB)
20964 && matches!(source, DialectType::Spark | DialectType::Databricks)
20965 && matches!(
20966 c.to,
20967 DataType::Timestamp {
20968 timezone: false,
20969 ..
20970 }
20971 )
20972 {
20973 c.to = DataType::Custom {
20974 name: "TIMESTAMPTZ".to_string(),
20975 };
20976 }
20977 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
20978 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
20979 if matches!(target, DialectType::Databricks | DialectType::Spark)
20980 && matches!(
20981 source,
20982 DialectType::Spark | DialectType::Databricks | DialectType::Hive
20983 )
20984 && Self::has_varchar_char_type(&c.to)
20985 {
20986 c.to = Self::normalize_varchar_to_string(c.to);
20987 }
20988 Ok(Expression::TryCast(c))
20989 } else {
20990 Ok(e)
20991 }
20992 }
20993 Action::XorExpand => {
20994 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
20995 // Snowflake: use BOOLXOR(a, b) instead
20996 if let Expression::Xor(xor) = e {
20997 // Collect all XOR operands
20998 let mut operands = Vec::new();
20999 if let Some(this) = xor.this {
21000 operands.push(*this);
21001 }
21002 if let Some(expr) = xor.expression {
21003 operands.push(*expr);
21004 }
21005 operands.extend(xor.expressions);
21006
21007 // Snowflake: use BOOLXOR(a, b)
21008 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
21009 let a = operands.remove(0);
21010 let b = operands.remove(0);
21011 return Ok(Expression::Function(Box::new(Function::new(
21012 "BOOLXOR".to_string(),
21013 vec![a, b],
21014 ))));
21015 }
21016
21017 // Helper to build (a AND NOT b) OR (NOT a AND b)
21018 let make_xor = |a: Expression, b: Expression| -> Expression {
21019 let not_b = Expression::Not(Box::new(
21020 crate::expressions::UnaryOp::new(b.clone()),
21021 ));
21022 let not_a = Expression::Not(Box::new(
21023 crate::expressions::UnaryOp::new(a.clone()),
21024 ));
21025 let left_and = Expression::And(Box::new(BinaryOp {
21026 left: a,
21027 right: Expression::Paren(Box::new(Paren {
21028 this: not_b,
21029 trailing_comments: Vec::new(),
21030 })),
21031 left_comments: Vec::new(),
21032 operator_comments: Vec::new(),
21033 trailing_comments: Vec::new(),
21034 inferred_type: None,
21035 }));
21036 let right_and = Expression::And(Box::new(BinaryOp {
21037 left: Expression::Paren(Box::new(Paren {
21038 this: not_a,
21039 trailing_comments: Vec::new(),
21040 })),
21041 right: b,
21042 left_comments: Vec::new(),
21043 operator_comments: Vec::new(),
21044 trailing_comments: Vec::new(),
21045 inferred_type: None,
21046 }));
21047 Expression::Or(Box::new(BinaryOp {
21048 left: Expression::Paren(Box::new(Paren {
21049 this: left_and,
21050 trailing_comments: Vec::new(),
21051 })),
21052 right: Expression::Paren(Box::new(Paren {
21053 this: right_and,
21054 trailing_comments: Vec::new(),
21055 })),
21056 left_comments: Vec::new(),
21057 operator_comments: Vec::new(),
21058 trailing_comments: Vec::new(),
21059 inferred_type: None,
21060 }))
21061 };
21062
21063 if operands.len() >= 2 {
21064 let mut result = make_xor(operands.remove(0), operands.remove(0));
21065 for operand in operands {
21066 result = make_xor(result, operand);
21067 }
21068 Ok(result)
21069 } else if operands.len() == 1 {
21070 Ok(operands.remove(0))
21071 } else {
21072 // No operands - return FALSE (shouldn't happen)
21073 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
21074 value: false,
21075 }))
21076 }
21077 } else {
21078 Ok(e)
21079 }
21080 }
21081 Action::DatePartUnquote => {
21082 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
21083 // Convert the quoted string first arg to a bare Column/Identifier
21084 if let Expression::Function(mut f) = e {
21085 if let Some(Expression::Literal(lit)) =
21086 f.args.first()
21087 {
21088 if let crate::expressions::Literal::String(s) = lit.as_ref() {
21089 let bare_name = s.to_ascii_lowercase();
21090 f.args[0] = Expression::Column(Box::new(crate::expressions::Column {
21091 name: Identifier::new(bare_name),
21092 table: None,
21093 join_mark: false,
21094 trailing_comments: Vec::new(),
21095 span: None,
21096 inferred_type: None,
21097 }));
21098 }
21099 }
21100 Ok(Expression::Function(f))
21101 } else {
21102 Ok(e)
21103 }
21104 }
21105 Action::ArrayLengthConvert => {
21106 // Extract the argument from the expression
21107 let arg = match e {
21108 Expression::Cardinality(ref f) => f.this.clone(),
21109 Expression::ArrayLength(ref f) => f.this.clone(),
21110 Expression::ArraySize(ref f) => f.this.clone(),
21111 _ => return Ok(e),
21112 };
21113 match target {
21114 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
21115 Ok(Expression::Function(Box::new(Function::new(
21116 "SIZE".to_string(),
21117 vec![arg],
21118 ))))
21119 }
21120 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21121 Ok(Expression::Cardinality(Box::new(
21122 crate::expressions::UnaryFunc::new(arg),
21123 )))
21124 }
21125 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
21126 crate::expressions::UnaryFunc::new(arg),
21127 ))),
21128 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
21129 crate::expressions::UnaryFunc::new(arg),
21130 ))),
21131 DialectType::PostgreSQL | DialectType::Redshift => {
21132 // PostgreSQL ARRAY_LENGTH requires dimension arg
21133 Ok(Expression::Function(Box::new(Function::new(
21134 "ARRAY_LENGTH".to_string(),
21135 vec![arg, Expression::number(1)],
21136 ))))
21137 }
21138 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
21139 crate::expressions::UnaryFunc::new(arg),
21140 ))),
21141 _ => Ok(e), // Keep original
21142 }
21143 }
21144
21145 Action::JsonExtractToArrow => {
21146 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
21147 if let Expression::JsonExtract(mut f) = e {
21148 f.arrow_syntax = true;
21149 // Transform path: convert bracket notation to dot notation
21150 // SQLite strips wildcards, DuckDB preserves them
21151 if let Expression::Literal(ref lit) = f.path {
21152 if let Literal::String(ref s) = lit.as_ref() {
21153 let mut transformed = s.clone();
21154 if matches!(target, DialectType::SQLite) {
21155 transformed = Self::strip_json_wildcards(&transformed);
21156 }
21157 transformed = Self::bracket_to_dot_notation(&transformed);
21158 if transformed != *s {
21159 f.path = Expression::string(&transformed);
21160 }
21161 }
21162 }
21163 Ok(Expression::JsonExtract(f))
21164 } else {
21165 Ok(e)
21166 }
21167 }
21168
21169 Action::JsonExtractToGetJsonObject => {
21170 if let Expression::JsonExtract(f) = e {
21171 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
21172 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
21173 // Use proper decomposition that handles brackets
21174 let keys: Vec<Expression> =
21175 if let Expression::Literal(lit) = f.path {
21176 if let Literal::String(ref s) = lit.as_ref() {
21177 let parts = Self::decompose_json_path(s);
21178 parts.into_iter().map(|k| Expression::string(&k)).collect()
21179 } else { vec![] }
21180 } else {
21181 vec![f.path]
21182 };
21183 let func_name = if matches!(target, DialectType::Redshift) {
21184 "JSON_EXTRACT_PATH_TEXT"
21185 } else {
21186 "JSON_EXTRACT_PATH"
21187 };
21188 let mut args = vec![f.this];
21189 args.extend(keys);
21190 Ok(Expression::Function(Box::new(Function::new(
21191 func_name.to_string(),
21192 args,
21193 ))))
21194 } else {
21195 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
21196 // Convert bracket double quotes to single quotes
21197 let path = if let Expression::Literal(ref lit) = f.path {
21198 if let Literal::String(ref s) = lit.as_ref() {
21199 let normalized = Self::bracket_to_single_quotes(s);
21200 if normalized != *s {
21201 Expression::string(&normalized)
21202 } else {
21203 f.path.clone()
21204 }
21205 } else { f.path.clone() }
21206 } else {
21207 f.path.clone()
21208 };
21209 Ok(Expression::Function(Box::new(Function::new(
21210 "GET_JSON_OBJECT".to_string(),
21211 vec![f.this, path],
21212 ))))
21213 }
21214 } else {
21215 Ok(e)
21216 }
21217 }
21218
21219 Action::JsonExtractScalarToGetJsonObject => {
21220 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
21221 if let Expression::JsonExtractScalar(f) = e {
21222 Ok(Expression::Function(Box::new(Function::new(
21223 "GET_JSON_OBJECT".to_string(),
21224 vec![f.this, f.path],
21225 ))))
21226 } else {
21227 Ok(e)
21228 }
21229 }
21230
21231 Action::JsonExtractToTsql => {
21232 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
21233 let (this, path) = match e {
21234 Expression::JsonExtract(f) => (f.this, f.path),
21235 Expression::JsonExtractScalar(f) => (f.this, f.path),
21236 _ => return Ok(e),
21237 };
21238 // Transform path: strip wildcards, convert bracket notation to dot notation
21239 let transformed_path = if let Expression::Literal(ref lit) = path
21240 {
21241 if let Literal::String(ref s) = lit.as_ref() {
21242 let stripped = Self::strip_json_wildcards(s);
21243 let dotted = Self::bracket_to_dot_notation(&stripped);
21244 Expression::string(&dotted)
21245 } else { path.clone() }
21246 } else {
21247 path
21248 };
21249 let json_query = Expression::Function(Box::new(Function::new(
21250 "JSON_QUERY".to_string(),
21251 vec![this.clone(), transformed_path.clone()],
21252 )));
21253 let json_value = Expression::Function(Box::new(Function::new(
21254 "JSON_VALUE".to_string(),
21255 vec![this, transformed_path],
21256 )));
21257 Ok(Expression::Function(Box::new(Function::new(
21258 "ISNULL".to_string(),
21259 vec![json_query, json_value],
21260 ))))
21261 }
21262
21263 Action::JsonExtractToClickHouse => {
21264 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
21265 let (this, path) = match e {
21266 Expression::JsonExtract(f) => (f.this, f.path),
21267 Expression::JsonExtractScalar(f) => (f.this, f.path),
21268 _ => return Ok(e),
21269 };
21270 let args: Vec<Expression> =
21271 if let Expression::Literal(lit) = path {
21272 if let Literal::String(ref s) = lit.as_ref() {
21273 let parts = Self::decompose_json_path(s);
21274 let mut result = vec![this];
21275 for part in parts {
21276 // ClickHouse uses 1-based integer indices for array access
21277 if let Ok(idx) = part.parse::<i64>() {
21278 result.push(Expression::number(idx + 1));
21279 } else {
21280 result.push(Expression::string(&part));
21281 }
21282 }
21283 result
21284 } else { vec![] }
21285 } else {
21286 vec![this, path]
21287 };
21288 Ok(Expression::Function(Box::new(Function::new(
21289 "JSONExtractString".to_string(),
21290 args,
21291 ))))
21292 }
21293
21294 Action::JsonExtractScalarConvert => {
21295 // JSON_EXTRACT_SCALAR -> target-specific
21296 if let Expression::JsonExtractScalar(f) = e {
21297 match target {
21298 DialectType::PostgreSQL | DialectType::Redshift => {
21299 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
21300 let keys: Vec<Expression> =
21301 if let Expression::Literal(lit) = f.path {
21302 if let Literal::String(ref s) = lit.as_ref() {
21303 let parts = Self::decompose_json_path(s);
21304 parts.into_iter().map(|k| Expression::string(&k)).collect()
21305 } else { vec![] }
21306 } else {
21307 vec![f.path]
21308 };
21309 let mut args = vec![f.this];
21310 args.extend(keys);
21311 Ok(Expression::Function(Box::new(Function::new(
21312 "JSON_EXTRACT_PATH_TEXT".to_string(),
21313 args,
21314 ))))
21315 }
21316 DialectType::Snowflake => {
21317 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
21318 let stripped_path =
21319 if let Expression::Literal(ref lit) = f.path {
21320 if let Literal::String(ref s) = lit.as_ref() {
21321 let stripped = Self::strip_json_dollar_prefix(s);
21322 Expression::string(&stripped)
21323 } else { f.path.clone() }
21324 } else {
21325 f.path
21326 };
21327 Ok(Expression::Function(Box::new(Function::new(
21328 "JSON_EXTRACT_PATH_TEXT".to_string(),
21329 vec![f.this, stripped_path],
21330 ))))
21331 }
21332 DialectType::SQLite | DialectType::DuckDB => {
21333 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
21334 Ok(Expression::JsonExtractScalar(Box::new(
21335 crate::expressions::JsonExtractFunc {
21336 this: f.this,
21337 path: f.path,
21338 returning: f.returning,
21339 arrow_syntax: true,
21340 hash_arrow_syntax: false,
21341 wrapper_option: None,
21342 quotes_option: None,
21343 on_scalar_string: false,
21344 on_error: None,
21345 },
21346 )))
21347 }
21348 _ => Ok(Expression::JsonExtractScalar(f)),
21349 }
21350 } else {
21351 Ok(e)
21352 }
21353 }
21354
21355 Action::JsonPathNormalize => {
21356 // Normalize JSON path format for BigQuery, MySQL, etc.
21357 if let Expression::JsonExtract(mut f) = e {
21358 if let Expression::Literal(ref lit) = f.path {
21359 if let Literal::String(ref s) = lit.as_ref() {
21360 let mut normalized = s.clone();
21361 // Convert bracket notation and handle wildcards per dialect
21362 match target {
21363 DialectType::BigQuery => {
21364 // BigQuery strips wildcards and uses single quotes in brackets
21365 normalized = Self::strip_json_wildcards(&normalized);
21366 normalized = Self::bracket_to_single_quotes(&normalized);
21367 }
21368 DialectType::MySQL => {
21369 // MySQL preserves wildcards, converts brackets to dot notation
21370 normalized = Self::bracket_to_dot_notation(&normalized);
21371 }
21372 _ => {}
21373 }
21374 if normalized != *s {
21375 f.path = Expression::string(&normalized);
21376 }
21377 }
21378 }
21379 Ok(Expression::JsonExtract(f))
21380 } else {
21381 Ok(e)
21382 }
21383 }
21384
21385 Action::JsonQueryValueConvert => {
21386 // JsonQuery/JsonValue -> target-specific
21387 let (f, is_query) = match e {
21388 Expression::JsonQuery(f) => (f, true),
21389 Expression::JsonValue(f) => (f, false),
21390 _ => return Ok(e),
21391 };
21392 match target {
21393 DialectType::TSQL | DialectType::Fabric => {
21394 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
21395 let json_query = Expression::Function(Box::new(Function::new(
21396 "JSON_QUERY".to_string(),
21397 vec![f.this.clone(), f.path.clone()],
21398 )));
21399 let json_value = Expression::Function(Box::new(Function::new(
21400 "JSON_VALUE".to_string(),
21401 vec![f.this, f.path],
21402 )));
21403 Ok(Expression::Function(Box::new(Function::new(
21404 "ISNULL".to_string(),
21405 vec![json_query, json_value],
21406 ))))
21407 }
21408 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
21409 Ok(Expression::Function(Box::new(Function::new(
21410 "GET_JSON_OBJECT".to_string(),
21411 vec![f.this, f.path],
21412 ))))
21413 }
21414 DialectType::PostgreSQL | DialectType::Redshift => {
21415 Ok(Expression::Function(Box::new(Function::new(
21416 "JSON_EXTRACT_PATH_TEXT".to_string(),
21417 vec![f.this, f.path],
21418 ))))
21419 }
21420 DialectType::DuckDB | DialectType::SQLite => {
21421 // json -> path arrow syntax
21422 Ok(Expression::JsonExtract(Box::new(
21423 crate::expressions::JsonExtractFunc {
21424 this: f.this,
21425 path: f.path,
21426 returning: f.returning,
21427 arrow_syntax: true,
21428 hash_arrow_syntax: false,
21429 wrapper_option: f.wrapper_option,
21430 quotes_option: f.quotes_option,
21431 on_scalar_string: f.on_scalar_string,
21432 on_error: f.on_error,
21433 },
21434 )))
21435 }
21436 DialectType::Snowflake => {
21437 // GET_PATH(PARSE_JSON(json), 'path')
21438 // Strip $. prefix from path
21439 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
21440 let json_expr = match &f.this {
21441 Expression::Function(ref inner_f)
21442 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
21443 {
21444 f.this
21445 }
21446 Expression::ParseJson(_) => {
21447 // Already a ParseJson expression, which generates as PARSE_JSON(...)
21448 f.this
21449 }
21450 _ => Expression::Function(Box::new(Function::new(
21451 "PARSE_JSON".to_string(),
21452 vec![f.this],
21453 ))),
21454 };
21455 let path_str = match &f.path {
21456 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
21457 let Literal::String(s) = lit.as_ref() else { unreachable!() };
21458 let stripped = s.strip_prefix("$.").unwrap_or(s);
21459 Expression::Literal(Box::new(Literal::String(stripped.to_string())))
21460 }
21461 other => other.clone(),
21462 };
21463 Ok(Expression::Function(Box::new(Function::new(
21464 "GET_PATH".to_string(),
21465 vec![json_expr, path_str],
21466 ))))
21467 }
21468 _ => {
21469 // Default: keep as JSON_QUERY/JSON_VALUE function
21470 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
21471 Ok(Expression::Function(Box::new(Function::new(
21472 func_name.to_string(),
21473 vec![f.this, f.path],
21474 ))))
21475 }
21476 }
21477 }
21478
21479 Action::JsonLiteralToJsonParse => {
21480 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
21481 if let Expression::Cast(c) = e {
21482 let func_name = if matches!(target, DialectType::Snowflake) {
21483 "PARSE_JSON"
21484 } else {
21485 "JSON_PARSE"
21486 };
21487 Ok(Expression::Function(Box::new(Function::new(
21488 func_name.to_string(),
21489 vec![c.this],
21490 ))))
21491 } else {
21492 Ok(e)
21493 }
21494 }
21495
21496 Action::AtTimeZoneConvert => {
21497 // AT TIME ZONE -> target-specific conversion
21498 if let Expression::AtTimeZone(atz) = e {
21499 match target {
21500 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21501 Ok(Expression::Function(Box::new(Function::new(
21502 "AT_TIMEZONE".to_string(),
21503 vec![atz.this, atz.zone],
21504 ))))
21505 }
21506 DialectType::Spark | DialectType::Databricks => {
21507 Ok(Expression::Function(Box::new(Function::new(
21508 "FROM_UTC_TIMESTAMP".to_string(),
21509 vec![atz.this, atz.zone],
21510 ))))
21511 }
21512 DialectType::Snowflake => {
21513 // CONVERT_TIMEZONE('zone', expr)
21514 Ok(Expression::Function(Box::new(Function::new(
21515 "CONVERT_TIMEZONE".to_string(),
21516 vec![atz.zone, atz.this],
21517 ))))
21518 }
21519 DialectType::BigQuery => {
21520 // TIMESTAMP(DATETIME(expr, 'zone'))
21521 let datetime_call = Expression::Function(Box::new(Function::new(
21522 "DATETIME".to_string(),
21523 vec![atz.this, atz.zone],
21524 )));
21525 Ok(Expression::Function(Box::new(Function::new(
21526 "TIMESTAMP".to_string(),
21527 vec![datetime_call],
21528 ))))
21529 }
21530 _ => Ok(Expression::Function(Box::new(Function::new(
21531 "AT_TIMEZONE".to_string(),
21532 vec![atz.this, atz.zone],
21533 )))),
21534 }
21535 } else {
21536 Ok(e)
21537 }
21538 }
21539
21540 Action::DayOfWeekConvert => {
21541 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
21542 if let Expression::DayOfWeek(f) = e {
21543 match target {
21544 DialectType::DuckDB => Ok(Expression::Function(Box::new(
21545 Function::new("ISODOW".to_string(), vec![f.this]),
21546 ))),
21547 DialectType::Spark | DialectType::Databricks => {
21548 // ((DAYOFWEEK(x) % 7) + 1)
21549 let dayofweek = Expression::Function(Box::new(Function::new(
21550 "DAYOFWEEK".to_string(),
21551 vec![f.this],
21552 )));
21553 let modulo = Expression::Mod(Box::new(BinaryOp {
21554 left: dayofweek,
21555 right: Expression::number(7),
21556 left_comments: Vec::new(),
21557 operator_comments: Vec::new(),
21558 trailing_comments: Vec::new(),
21559 inferred_type: None,
21560 }));
21561 let paren_mod = Expression::Paren(Box::new(Paren {
21562 this: modulo,
21563 trailing_comments: Vec::new(),
21564 }));
21565 let add_one = Expression::Add(Box::new(BinaryOp {
21566 left: paren_mod,
21567 right: Expression::number(1),
21568 left_comments: Vec::new(),
21569 operator_comments: Vec::new(),
21570 trailing_comments: Vec::new(),
21571 inferred_type: None,
21572 }));
21573 Ok(Expression::Paren(Box::new(Paren {
21574 this: add_one,
21575 trailing_comments: Vec::new(),
21576 })))
21577 }
21578 _ => Ok(Expression::DayOfWeek(f)),
21579 }
21580 } else {
21581 Ok(e)
21582 }
21583 }
21584
21585 Action::MaxByMinByConvert => {
21586 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
21587 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
21588 // Handle both Expression::Function and Expression::AggregateFunction
21589 let (is_max, args) = match &e {
21590 Expression::Function(f) => {
21591 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
21592 }
21593 Expression::AggregateFunction(af) => {
21594 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
21595 }
21596 _ => return Ok(e),
21597 };
21598 match target {
21599 DialectType::ClickHouse => {
21600 let name = if is_max { "argMax" } else { "argMin" };
21601 let mut args = args;
21602 args.truncate(2);
21603 Ok(Expression::Function(Box::new(Function::new(
21604 name.to_string(),
21605 args,
21606 ))))
21607 }
21608 DialectType::DuckDB => {
21609 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
21610 Ok(Expression::Function(Box::new(Function::new(
21611 name.to_string(),
21612 args,
21613 ))))
21614 }
21615 DialectType::Spark | DialectType::Databricks => {
21616 let mut args = args;
21617 args.truncate(2);
21618 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
21619 Ok(Expression::Function(Box::new(Function::new(
21620 name.to_string(),
21621 args,
21622 ))))
21623 }
21624 _ => Ok(e),
21625 }
21626 }
21627
21628 Action::ElementAtConvert => {
21629 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
21630 let (arr, idx) = if let Expression::ElementAt(bf) = e {
21631 (bf.this, bf.expression)
21632 } else if let Expression::Function(ref f) = e {
21633 if f.args.len() >= 2 {
21634 if let Expression::Function(f) = e {
21635 let mut args = f.args;
21636 let arr = args.remove(0);
21637 let idx = args.remove(0);
21638 (arr, idx)
21639 } else {
21640 unreachable!("outer condition already matched Expression::Function")
21641 }
21642 } else {
21643 return Ok(e);
21644 }
21645 } else {
21646 return Ok(e);
21647 };
21648 match target {
21649 DialectType::PostgreSQL => {
21650 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
21651 let arr_expr = Expression::Paren(Box::new(Paren {
21652 this: arr,
21653 trailing_comments: vec![],
21654 }));
21655 Ok(Expression::Subscript(Box::new(
21656 crate::expressions::Subscript {
21657 this: arr_expr,
21658 index: idx,
21659 },
21660 )))
21661 }
21662 DialectType::BigQuery => {
21663 // BigQuery: convert ARRAY[...] to bare [...] for subscript
21664 let arr_expr = match arr {
21665 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
21666 crate::expressions::ArrayConstructor {
21667 expressions: af.expressions,
21668 bracket_notation: true,
21669 use_list_keyword: false,
21670 },
21671 )),
21672 other => other,
21673 };
21674 let safe_ordinal = Expression::Function(Box::new(Function::new(
21675 "SAFE_ORDINAL".to_string(),
21676 vec![idx],
21677 )));
21678 Ok(Expression::Subscript(Box::new(
21679 crate::expressions::Subscript {
21680 this: arr_expr,
21681 index: safe_ordinal,
21682 },
21683 )))
21684 }
21685 _ => Ok(Expression::Function(Box::new(Function::new(
21686 "ELEMENT_AT".to_string(),
21687 vec![arr, idx],
21688 )))),
21689 }
21690 }
21691
21692 Action::CurrentUserParens => {
21693 // CURRENT_USER -> CURRENT_USER() for Snowflake
21694 Ok(Expression::Function(Box::new(Function::new(
21695 "CURRENT_USER".to_string(),
21696 vec![],
21697 ))))
21698 }
21699
21700 Action::ArrayAggToCollectList => {
21701 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
21702 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
21703 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
21704 match e {
21705 Expression::AggregateFunction(mut af) => {
21706 let is_simple =
21707 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
21708 let args = if af.args.is_empty() {
21709 vec![]
21710 } else {
21711 vec![af.args[0].clone()]
21712 };
21713 af.name = "COLLECT_LIST".to_string();
21714 af.args = args;
21715 if is_simple {
21716 af.order_by = Vec::new();
21717 }
21718 Ok(Expression::AggregateFunction(af))
21719 }
21720 Expression::ArrayAgg(agg) => {
21721 let is_simple =
21722 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
21723 Ok(Expression::AggregateFunction(Box::new(
21724 crate::expressions::AggregateFunction {
21725 name: "COLLECT_LIST".to_string(),
21726 args: vec![agg.this.clone()],
21727 distinct: agg.distinct,
21728 filter: agg.filter.clone(),
21729 order_by: if is_simple {
21730 Vec::new()
21731 } else {
21732 agg.order_by.clone()
21733 },
21734 limit: agg.limit.clone(),
21735 ignore_nulls: agg.ignore_nulls,
21736 inferred_type: None,
21737 },
21738 )))
21739 }
21740 _ => Ok(e),
21741 }
21742 }
21743
21744 Action::ArraySyntaxConvert => {
21745 match e {
21746 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
21747 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
21748 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
21749 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
21750 expressions: arr.expressions,
21751 bracket_notation: true,
21752 use_list_keyword: false,
21753 })),
21754 ),
21755 // ARRAY(y) function style -> ArrayFunc for target dialect
21756 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
21757 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
21758 let bracket = matches!(
21759 target,
21760 DialectType::BigQuery
21761 | DialectType::DuckDB
21762 | DialectType::Snowflake
21763 | DialectType::ClickHouse
21764 | DialectType::StarRocks
21765 );
21766 Ok(Expression::ArrayFunc(Box::new(
21767 crate::expressions::ArrayConstructor {
21768 expressions: f.args,
21769 bracket_notation: bracket,
21770 use_list_keyword: false,
21771 },
21772 )))
21773 }
21774 _ => Ok(e),
21775 }
21776 }
21777
21778 Action::CastToJsonForSpark => {
21779 // CAST(x AS JSON) -> TO_JSON(x) for Spark
21780 if let Expression::Cast(c) = e {
21781 Ok(Expression::Function(Box::new(Function::new(
21782 "TO_JSON".to_string(),
21783 vec![c.this],
21784 ))))
21785 } else {
21786 Ok(e)
21787 }
21788 }
21789
21790 Action::CastJsonToFromJson => {
21791 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
21792 if let Expression::Cast(c) = e {
21793 // Extract the string literal from ParseJson
21794 let literal_expr = if let Expression::ParseJson(pj) = c.this {
21795 pj.this
21796 } else {
21797 c.this
21798 };
21799 // Convert the target DataType to Spark's type string format
21800 let type_str = Self::data_type_to_spark_string(&c.to);
21801 Ok(Expression::Function(Box::new(Function::new(
21802 "FROM_JSON".to_string(),
21803 vec![literal_expr, Expression::Literal(Box::new(Literal::String(type_str)))],
21804 ))))
21805 } else {
21806 Ok(e)
21807 }
21808 }
21809
21810 Action::ToJsonConvert => {
21811 // TO_JSON(x) -> target-specific conversion
21812 if let Expression::ToJson(f) = e {
21813 let arg = f.this;
21814 match target {
21815 DialectType::Presto | DialectType::Trino => {
21816 // JSON_FORMAT(CAST(x AS JSON))
21817 let cast_json = Expression::Cast(Box::new(Cast {
21818 this: arg,
21819 to: DataType::Custom {
21820 name: "JSON".to_string(),
21821 },
21822 trailing_comments: vec![],
21823 double_colon_syntax: false,
21824 format: None,
21825 default: None,
21826 inferred_type: None,
21827 }));
21828 Ok(Expression::Function(Box::new(Function::new(
21829 "JSON_FORMAT".to_string(),
21830 vec![cast_json],
21831 ))))
21832 }
21833 DialectType::BigQuery => Ok(Expression::Function(Box::new(
21834 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
21835 ))),
21836 DialectType::DuckDB => {
21837 // CAST(TO_JSON(x) AS TEXT)
21838 let to_json =
21839 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
21840 this: arg,
21841 original_name: None,
21842 inferred_type: None,
21843 }));
21844 Ok(Expression::Cast(Box::new(Cast {
21845 this: to_json,
21846 to: DataType::Text,
21847 trailing_comments: vec![],
21848 double_colon_syntax: false,
21849 format: None,
21850 default: None,
21851 inferred_type: None,
21852 })))
21853 }
21854 _ => Ok(Expression::ToJson(Box::new(
21855 crate::expressions::UnaryFunc {
21856 this: arg,
21857 original_name: None,
21858 inferred_type: None,
21859 },
21860 ))),
21861 }
21862 } else {
21863 Ok(e)
21864 }
21865 }
21866
21867 Action::VarianceToClickHouse => {
21868 if let Expression::Variance(f) = e {
21869 Ok(Expression::Function(Box::new(Function::new(
21870 "varSamp".to_string(),
21871 vec![f.this],
21872 ))))
21873 } else {
21874 Ok(e)
21875 }
21876 }
21877
21878 Action::StddevToClickHouse => {
21879 if let Expression::Stddev(f) = e {
21880 Ok(Expression::Function(Box::new(Function::new(
21881 "stddevSamp".to_string(),
21882 vec![f.this],
21883 ))))
21884 } else {
21885 Ok(e)
21886 }
21887 }
21888
21889 Action::ApproxQuantileConvert => {
21890 if let Expression::ApproxQuantile(aq) = e {
21891 let mut args = vec![*aq.this];
21892 if let Some(q) = aq.quantile {
21893 args.push(*q);
21894 }
21895 Ok(Expression::Function(Box::new(Function::new(
21896 "APPROX_PERCENTILE".to_string(),
21897 args,
21898 ))))
21899 } else {
21900 Ok(e)
21901 }
21902 }
21903
21904 Action::DollarParamConvert => {
21905 if let Expression::Parameter(p) = e {
21906 Ok(Expression::Parameter(Box::new(
21907 crate::expressions::Parameter {
21908 name: p.name,
21909 index: p.index,
21910 style: crate::expressions::ParameterStyle::At,
21911 quoted: p.quoted,
21912 string_quoted: p.string_quoted,
21913 expression: p.expression,
21914 },
21915 )))
21916 } else {
21917 Ok(e)
21918 }
21919 }
21920
21921 Action::EscapeStringNormalize => {
21922 if let Expression::Literal(ref lit) = e {
21923 if let Literal::EscapeString(s) = lit.as_ref() {
21924 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
21925 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
21926 s[2..].to_string()
21927 } else {
21928 s.clone()
21929 };
21930 let normalized = stripped
21931 .replace('\n', "\\n")
21932 .replace('\r', "\\r")
21933 .replace('\t', "\\t");
21934 match target {
21935 DialectType::BigQuery => {
21936 // BigQuery: e'...' -> CAST(b'...' AS STRING)
21937 // Use Raw for the b'...' part to avoid double-escaping
21938 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
21939 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
21940 }
21941 _ => Ok(Expression::Literal(Box::new(Literal::EscapeString(normalized)))),
21942 }
21943 } else {
21944 Ok(e)
21945 }
21946 } else {
21947 Ok(e)
21948 }
21949 }
21950
21951 Action::StraightJoinCase => {
21952 // straight_join: keep lowercase for DuckDB, quote for MySQL
21953 if let Expression::Column(col) = e {
21954 if col.name.name == "STRAIGHT_JOIN" {
21955 let mut new_col = col;
21956 new_col.name.name = "straight_join".to_string();
21957 if matches!(target, DialectType::MySQL) {
21958 // MySQL: needs quoting since it's a reserved keyword
21959 new_col.name.quoted = true;
21960 }
21961 Ok(Expression::Column(new_col))
21962 } else {
21963 Ok(Expression::Column(col))
21964 }
21965 } else {
21966 Ok(e)
21967 }
21968 }
21969
21970 Action::TablesampleReservoir => {
21971 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
21972 if let Expression::TableSample(mut ts) = e {
21973 if let Some(ref mut sample) = ts.sample {
21974 sample.method = crate::expressions::SampleMethod::Reservoir;
21975 sample.explicit_method = true;
21976 }
21977 Ok(Expression::TableSample(ts))
21978 } else {
21979 Ok(e)
21980 }
21981 }
21982
21983 Action::TablesampleSnowflakeStrip => {
21984 // Strip method and PERCENT for Snowflake target from non-Snowflake source
21985 match e {
21986 Expression::TableSample(mut ts) => {
21987 if let Some(ref mut sample) = ts.sample {
21988 sample.suppress_method_output = true;
21989 sample.unit_after_size = false;
21990 sample.is_percent = false;
21991 }
21992 Ok(Expression::TableSample(ts))
21993 }
21994 Expression::Table(mut t) => {
21995 if let Some(ref mut sample) = t.table_sample {
21996 sample.suppress_method_output = true;
21997 sample.unit_after_size = false;
21998 sample.is_percent = false;
21999 }
22000 Ok(Expression::Table(t))
22001 }
22002 _ => Ok(e),
22003 }
22004 }
22005
22006 Action::FirstToAnyValue => {
22007 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
22008 if let Expression::First(mut agg) = e {
22009 agg.ignore_nulls = None;
22010 agg.name = Some("ANY_VALUE".to_string());
22011 Ok(Expression::AnyValue(agg))
22012 } else {
22013 Ok(e)
22014 }
22015 }
22016
22017 Action::ArrayIndexConvert => {
22018 // Subscript index: 1-based to 0-based for BigQuery
22019 if let Expression::Subscript(mut sub) = e {
22020 if let Expression::Literal(ref lit) = sub.index {
22021 if let Literal::Number(ref n) = lit.as_ref() {
22022 if let Ok(val) = n.parse::<i64>() {
22023 sub.index =
22024 Expression::Literal(Box::new(Literal::Number((val - 1).to_string())));
22025 }
22026 }
22027 }
22028 Ok(Expression::Subscript(sub))
22029 } else {
22030 Ok(e)
22031 }
22032 }
22033
22034 Action::AnyValueIgnoreNulls => {
22035 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
22036 if let Expression::AnyValue(mut av) = e {
22037 if av.ignore_nulls.is_none() {
22038 av.ignore_nulls = Some(true);
22039 }
22040 Ok(Expression::AnyValue(av))
22041 } else {
22042 Ok(e)
22043 }
22044 }
22045
22046 Action::BigQueryNullsOrdering => {
22047 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
22048 if let Expression::WindowFunction(mut wf) = e {
22049 for o in &mut wf.over.order_by {
22050 o.nulls_first = None;
22051 }
22052 Ok(Expression::WindowFunction(wf))
22053 } else if let Expression::Ordered(mut o) = e {
22054 o.nulls_first = None;
22055 Ok(Expression::Ordered(o))
22056 } else {
22057 Ok(e)
22058 }
22059 }
22060
22061 Action::SnowflakeFloatProtect => {
22062 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
22063 // Snowflake's target transform from converting it to DOUBLE.
22064 // Non-Snowflake sources should keep their FLOAT spelling.
22065 if let Expression::DataType(DataType::Float { .. }) = e {
22066 Ok(Expression::DataType(DataType::Custom {
22067 name: "FLOAT".to_string(),
22068 }))
22069 } else {
22070 Ok(e)
22071 }
22072 }
22073
22074 Action::MysqlNullsOrdering => {
22075 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
22076 if let Expression::Ordered(mut o) = e {
22077 let nulls_last = o.nulls_first == Some(false);
22078 let desc = o.desc;
22079 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
22080 // If requested ordering matches default, just strip NULLS clause
22081 let matches_default = if desc {
22082 // DESC default is NULLS FIRST, so nulls_first=true matches
22083 o.nulls_first == Some(true)
22084 } else {
22085 // ASC default is NULLS LAST, so nulls_first=false matches
22086 nulls_last
22087 };
22088 if matches_default {
22089 o.nulls_first = None;
22090 Ok(Expression::Ordered(o))
22091 } else {
22092 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
22093 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
22094 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
22095 let null_val = if desc { 1 } else { 0 };
22096 let non_null_val = if desc { 0 } else { 1 };
22097 let _case_expr = Expression::Case(Box::new(Case {
22098 operand: None,
22099 whens: vec![(
22100 Expression::IsNull(Box::new(crate::expressions::IsNull {
22101 this: o.this.clone(),
22102 not: false,
22103 postfix_form: false,
22104 })),
22105 Expression::number(null_val),
22106 )],
22107 else_: Some(Expression::number(non_null_val)),
22108 comments: Vec::new(),
22109 inferred_type: None,
22110 }));
22111 o.nulls_first = None;
22112 // Return a tuple of [case_expr, ordered_expr]
22113 // We need to return both as part of the ORDER BY
22114 // But since transform_recursive processes individual expressions,
22115 // we can't easily add extra ORDER BY items here.
22116 // Instead, strip the nulls_first
22117 o.nulls_first = None;
22118 Ok(Expression::Ordered(o))
22119 }
22120 } else {
22121 Ok(e)
22122 }
22123 }
22124
22125 Action::MysqlNullsLastRewrite => {
22126 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
22127 // to simulate NULLS LAST for ASC ordering
22128 if let Expression::WindowFunction(mut wf) = e {
22129 let mut new_order_by = Vec::new();
22130 for o in wf.over.order_by {
22131 if !o.desc {
22132 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
22133 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
22134 let case_expr = Expression::Case(Box::new(Case {
22135 operand: None,
22136 whens: vec![(
22137 Expression::IsNull(Box::new(crate::expressions::IsNull {
22138 this: o.this.clone(),
22139 not: false,
22140 postfix_form: false,
22141 })),
22142 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
22143 )],
22144 else_: Some(Expression::Literal(Box::new(Literal::Number(
22145 "0".to_string(),
22146 )))),
22147 comments: Vec::new(),
22148 inferred_type: None,
22149 }));
22150 new_order_by.push(crate::expressions::Ordered {
22151 this: case_expr,
22152 desc: false,
22153 nulls_first: None,
22154 explicit_asc: false,
22155 with_fill: None,
22156 });
22157 let mut ordered = o;
22158 ordered.nulls_first = None;
22159 new_order_by.push(ordered);
22160 } else {
22161 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
22162 // No change needed
22163 let mut ordered = o;
22164 ordered.nulls_first = None;
22165 new_order_by.push(ordered);
22166 }
22167 }
22168 wf.over.order_by = new_order_by;
22169 Ok(Expression::WindowFunction(wf))
22170 } else {
22171 Ok(e)
22172 }
22173 }
22174
22175 Action::RespectNullsConvert => {
22176 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
22177 if let Expression::WindowFunction(mut wf) = e {
22178 match &mut wf.this {
22179 Expression::FirstValue(ref mut vf) => {
22180 if vf.ignore_nulls == Some(false) {
22181 vf.ignore_nulls = None;
22182 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
22183 // but that's handled by the generator's NULLS ordering
22184 }
22185 }
22186 Expression::LastValue(ref mut vf) => {
22187 if vf.ignore_nulls == Some(false) {
22188 vf.ignore_nulls = None;
22189 }
22190 }
22191 _ => {}
22192 }
22193 Ok(Expression::WindowFunction(wf))
22194 } else {
22195 Ok(e)
22196 }
22197 }
22198
22199 Action::SnowflakeWindowFrameStrip => {
22200 // Strip the default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
22201 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when targeting Snowflake
22202 if let Expression::WindowFunction(mut wf) = e {
22203 wf.over.frame = None;
22204 Ok(Expression::WindowFunction(wf))
22205 } else {
22206 Ok(e)
22207 }
22208 }
22209
22210 Action::SnowflakeWindowFrameAdd => {
22211 // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
22212 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when transpiling from Snowflake to non-Snowflake
22213 if let Expression::WindowFunction(mut wf) = e {
22214 wf.over.frame = Some(crate::expressions::WindowFrame {
22215 kind: crate::expressions::WindowFrameKind::Rows,
22216 start: crate::expressions::WindowFrameBound::UnboundedPreceding,
22217 end: Some(crate::expressions::WindowFrameBound::UnboundedFollowing),
22218 exclude: None,
22219 kind_text: None,
22220 start_side_text: None,
22221 end_side_text: None,
22222 });
22223 Ok(Expression::WindowFunction(wf))
22224 } else {
22225 Ok(e)
22226 }
22227 }
22228
22229 Action::CreateTableStripComment => {
22230 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
22231 if let Expression::CreateTable(mut ct) = e {
22232 for col in &mut ct.columns {
22233 col.comment = None;
22234 col.constraints.retain(|c| {
22235 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
22236 });
22237 // Also remove Comment from constraint_order
22238 col.constraint_order.retain(|c| {
22239 !matches!(c, crate::expressions::ConstraintType::Comment)
22240 });
22241 }
22242 // Strip properties (USING, PARTITIONED BY, etc.)
22243 ct.properties.clear();
22244 Ok(Expression::CreateTable(ct))
22245 } else {
22246 Ok(e)
22247 }
22248 }
22249
22250 Action::AlterTableToSpRename => {
22251 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
22252 if let Expression::AlterTable(ref at) = e {
22253 if let Some(crate::expressions::AlterTableAction::RenameTable(
22254 ref new_tbl,
22255 )) = at.actions.first()
22256 {
22257 // Build the old table name using TSQL bracket quoting
22258 let old_name = if let Some(ref schema) = at.name.schema {
22259 if at.name.name.quoted || schema.quoted {
22260 format!("[{}].[{}]", schema.name, at.name.name.name)
22261 } else {
22262 format!("{}.{}", schema.name, at.name.name.name)
22263 }
22264 } else {
22265 if at.name.name.quoted {
22266 format!("[{}]", at.name.name.name)
22267 } else {
22268 at.name.name.name.clone()
22269 }
22270 };
22271 let new_name = new_tbl.name.name.clone();
22272 // EXEC sp_rename 'old_name', 'new_name'
22273 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
22274 Ok(Expression::Raw(crate::expressions::Raw { sql }))
22275 } else {
22276 Ok(e)
22277 }
22278 } else {
22279 Ok(e)
22280 }
22281 }
22282
22283 Action::SnowflakeIntervalFormat => {
22284 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
22285 if let Expression::Interval(mut iv) = e {
22286 if let (
22287 Some(Expression::Literal(lit)),
22288 Some(ref unit_spec),
22289 ) = (&iv.this, &iv.unit)
22290 {
22291 if let Literal::String(ref val) = lit.as_ref() {
22292 let unit_str = match unit_spec {
22293 crate::expressions::IntervalUnitSpec::Simple { unit, .. } => {
22294 match unit {
22295 crate::expressions::IntervalUnit::Year => "YEAR",
22296 crate::expressions::IntervalUnit::Quarter => "QUARTER",
22297 crate::expressions::IntervalUnit::Month => "MONTH",
22298 crate::expressions::IntervalUnit::Week => "WEEK",
22299 crate::expressions::IntervalUnit::Day => "DAY",
22300 crate::expressions::IntervalUnit::Hour => "HOUR",
22301 crate::expressions::IntervalUnit::Minute => "MINUTE",
22302 crate::expressions::IntervalUnit::Second => "SECOND",
22303 crate::expressions::IntervalUnit::Millisecond => {
22304 "MILLISECOND"
22305 }
22306 crate::expressions::IntervalUnit::Microsecond => {
22307 "MICROSECOND"
22308 }
22309 crate::expressions::IntervalUnit::Nanosecond => {
22310 "NANOSECOND"
22311 }
22312 }
22313 }
22314 _ => "",
22315 };
22316 if !unit_str.is_empty() {
22317 let combined = format!("{} {}", val, unit_str);
22318 iv.this = Some(Expression::Literal(Box::new(Literal::String(combined))));
22319 iv.unit = None;
22320 }
22321 }
22322 }
22323 Ok(Expression::Interval(iv))
22324 } else {
22325 Ok(e)
22326 }
22327 }
22328
22329 Action::ArrayConcatBracketConvert => {
22330 // Expression::Array/ArrayFunc -> target-specific
22331 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
22332 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
22333 match e {
22334 Expression::Array(arr) => {
22335 if matches!(target, DialectType::Redshift) {
22336 Ok(Expression::Function(Box::new(Function::new(
22337 "ARRAY".to_string(),
22338 arr.expressions,
22339 ))))
22340 } else {
22341 Ok(Expression::ArrayFunc(Box::new(
22342 crate::expressions::ArrayConstructor {
22343 expressions: arr.expressions,
22344 bracket_notation: false,
22345 use_list_keyword: false,
22346 },
22347 )))
22348 }
22349 }
22350 Expression::ArrayFunc(arr) => {
22351 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
22352 if matches!(target, DialectType::Redshift) {
22353 Ok(Expression::Function(Box::new(Function::new(
22354 "ARRAY".to_string(),
22355 arr.expressions,
22356 ))))
22357 } else {
22358 Ok(Expression::ArrayFunc(arr))
22359 }
22360 }
22361 _ => Ok(e),
22362 }
22363 }
22364
22365 Action::BitAggFloatCast => {
22366 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
22367 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
22368 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
22369 let int_type = DataType::Int {
22370 length: None,
22371 integer_spelling: false,
22372 };
22373 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
22374 if let Expression::Cast(c) = agg_this {
22375 match &c.to {
22376 DataType::Float { .. }
22377 | DataType::Double { .. }
22378 | DataType::Custom { .. } => {
22379 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
22380 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
22381 let inner_type = match &c.to {
22382 DataType::Float {
22383 precision, scale, ..
22384 } => DataType::Float {
22385 precision: *precision,
22386 scale: *scale,
22387 real_spelling: true,
22388 },
22389 other => other.clone(),
22390 };
22391 let inner_cast =
22392 Expression::Cast(Box::new(crate::expressions::Cast {
22393 this: c.this.clone(),
22394 to: inner_type,
22395 trailing_comments: Vec::new(),
22396 double_colon_syntax: false,
22397 format: None,
22398 default: None,
22399 inferred_type: None,
22400 }));
22401 let rounded = Expression::Function(Box::new(Function::new(
22402 "ROUND".to_string(),
22403 vec![inner_cast],
22404 )));
22405 Expression::Cast(Box::new(crate::expressions::Cast {
22406 this: rounded,
22407 to: int_dt,
22408 trailing_comments: Vec::new(),
22409 double_colon_syntax: false,
22410 format: None,
22411 default: None,
22412 inferred_type: None,
22413 }))
22414 }
22415 DataType::Decimal { .. } => {
22416 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
22417 Expression::Cast(Box::new(crate::expressions::Cast {
22418 this: Expression::Cast(c),
22419 to: int_dt,
22420 trailing_comments: Vec::new(),
22421 double_colon_syntax: false,
22422 format: None,
22423 default: None,
22424 inferred_type: None,
22425 }))
22426 }
22427 _ => Expression::Cast(c),
22428 }
22429 } else {
22430 agg_this
22431 }
22432 };
22433 match e {
22434 Expression::BitwiseOrAgg(mut f) => {
22435 f.this = wrap_agg(f.this, int_type);
22436 Ok(Expression::BitwiseOrAgg(f))
22437 }
22438 Expression::BitwiseAndAgg(mut f) => {
22439 let int_type = DataType::Int {
22440 length: None,
22441 integer_spelling: false,
22442 };
22443 f.this = wrap_agg(f.this, int_type);
22444 Ok(Expression::BitwiseAndAgg(f))
22445 }
22446 Expression::BitwiseXorAgg(mut f) => {
22447 let int_type = DataType::Int {
22448 length: None,
22449 integer_spelling: false,
22450 };
22451 f.this = wrap_agg(f.this, int_type);
22452 Ok(Expression::BitwiseXorAgg(f))
22453 }
22454 _ => Ok(e),
22455 }
22456 }
22457
22458 Action::BitAggSnowflakeRename => {
22459 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
22460 match e {
22461 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
22462 Function::new("BITORAGG".to_string(), vec![f.this]),
22463 ))),
22464 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
22465 Function::new("BITANDAGG".to_string(), vec![f.this]),
22466 ))),
22467 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
22468 Function::new("BITXORAGG".to_string(), vec![f.this]),
22469 ))),
22470 _ => Ok(e),
22471 }
22472 }
22473
22474 Action::StrftimeCastTimestamp => {
22475 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
22476 if let Expression::Cast(mut c) = e {
22477 if matches!(
22478 c.to,
22479 DataType::Timestamp {
22480 timezone: false,
22481 ..
22482 }
22483 ) {
22484 c.to = DataType::Custom {
22485 name: "TIMESTAMP_NTZ".to_string(),
22486 };
22487 }
22488 Ok(Expression::Cast(c))
22489 } else {
22490 Ok(e)
22491 }
22492 }
22493
22494 Action::DecimalDefaultPrecision => {
22495 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
22496 if let Expression::Cast(mut c) = e {
22497 if matches!(
22498 c.to,
22499 DataType::Decimal {
22500 precision: None,
22501 ..
22502 }
22503 ) {
22504 c.to = DataType::Decimal {
22505 precision: Some(18),
22506 scale: Some(3),
22507 };
22508 }
22509 Ok(Expression::Cast(c))
22510 } else {
22511 Ok(e)
22512 }
22513 }
22514
22515 Action::FilterToIff => {
22516 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
22517 if let Expression::Filter(f) = e {
22518 let condition = *f.expression;
22519 let agg = *f.this;
22520 // Strip WHERE from condition
22521 let cond = match condition {
22522 Expression::Where(w) => w.this,
22523 other => other,
22524 };
22525 // Extract the aggregate function and its argument
22526 // We want AVG(IFF(condition, x, NULL))
22527 match agg {
22528 Expression::Function(mut func) => {
22529 if !func.args.is_empty() {
22530 let orig_arg = func.args[0].clone();
22531 let iff_call = Expression::Function(Box::new(Function::new(
22532 "IFF".to_string(),
22533 vec![cond, orig_arg, Expression::Null(Null)],
22534 )));
22535 func.args[0] = iff_call;
22536 Ok(Expression::Function(func))
22537 } else {
22538 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
22539 this: Box::new(Expression::Function(func)),
22540 expression: Box::new(cond),
22541 })))
22542 }
22543 }
22544 Expression::Avg(mut avg) => {
22545 let iff_call = Expression::Function(Box::new(Function::new(
22546 "IFF".to_string(),
22547 vec![cond, avg.this.clone(), Expression::Null(Null)],
22548 )));
22549 avg.this = iff_call;
22550 Ok(Expression::Avg(avg))
22551 }
22552 Expression::Sum(mut s) => {
22553 let iff_call = Expression::Function(Box::new(Function::new(
22554 "IFF".to_string(),
22555 vec![cond, s.this.clone(), Expression::Null(Null)],
22556 )));
22557 s.this = iff_call;
22558 Ok(Expression::Sum(s))
22559 }
22560 Expression::Count(mut c) => {
22561 if let Some(ref this_expr) = c.this {
22562 let iff_call = Expression::Function(Box::new(Function::new(
22563 "IFF".to_string(),
22564 vec![cond, this_expr.clone(), Expression::Null(Null)],
22565 )));
22566 c.this = Some(iff_call);
22567 }
22568 Ok(Expression::Count(c))
22569 }
22570 other => {
22571 // Fallback: keep as Filter
22572 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
22573 this: Box::new(other),
22574 expression: Box::new(cond),
22575 })))
22576 }
22577 }
22578 } else {
22579 Ok(e)
22580 }
22581 }
22582
22583 Action::AggFilterToIff => {
22584 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
22585 // Helper macro to handle the common AggFunc case
22586 macro_rules! handle_agg_filter_to_iff {
22587 ($variant:ident, $agg:expr) => {{
22588 let mut agg = $agg;
22589 if let Some(filter_cond) = agg.filter.take() {
22590 let iff_call = Expression::Function(Box::new(Function::new(
22591 "IFF".to_string(),
22592 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
22593 )));
22594 agg.this = iff_call;
22595 }
22596 Ok(Expression::$variant(agg))
22597 }};
22598 }
22599
22600 match e {
22601 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
22602 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
22603 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
22604 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
22605 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
22606 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
22607 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
22608 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
22609 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
22610 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
22611 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
22612 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
22613 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
22614 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
22615 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
22616 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
22617 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
22618 Expression::ApproxDistinct(agg) => {
22619 handle_agg_filter_to_iff!(ApproxDistinct, agg)
22620 }
22621 Expression::Count(mut c) => {
22622 if let Some(filter_cond) = c.filter.take() {
22623 if let Some(ref this_expr) = c.this {
22624 let iff_call = Expression::Function(Box::new(Function::new(
22625 "IFF".to_string(),
22626 vec![
22627 filter_cond,
22628 this_expr.clone(),
22629 Expression::Null(Null),
22630 ],
22631 )));
22632 c.this = Some(iff_call);
22633 }
22634 }
22635 Ok(Expression::Count(c))
22636 }
22637 other => Ok(other),
22638 }
22639 }
22640
22641 Action::JsonToGetPath => {
22642 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
22643 if let Expression::JsonExtract(je) = e {
22644 // Convert to PARSE_JSON() wrapper:
22645 // - JSON(x) -> PARSE_JSON(x)
22646 // - PARSE_JSON(x) -> keep as-is
22647 // - anything else -> wrap in PARSE_JSON()
22648 let this = match &je.this {
22649 Expression::Function(f)
22650 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
22651 {
22652 Expression::Function(Box::new(Function::new(
22653 "PARSE_JSON".to_string(),
22654 f.args.clone(),
22655 )))
22656 }
22657 Expression::Function(f)
22658 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
22659 {
22660 je.this.clone()
22661 }
22662 // GET_PATH result is already JSON, don't wrap
22663 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
22664 je.this.clone()
22665 }
22666 other => {
22667 // Wrap non-JSON expressions in PARSE_JSON()
22668 Expression::Function(Box::new(Function::new(
22669 "PARSE_JSON".to_string(),
22670 vec![other.clone()],
22671 )))
22672 }
22673 };
22674 // Convert path: extract key from JSONPath or strip $. prefix from string
22675 let path = match &je.path {
22676 Expression::JSONPath(jp) => {
22677 // Extract the key from JSONPath: $root.key -> 'key'
22678 let mut key_parts = Vec::new();
22679 for expr in &jp.expressions {
22680 match expr {
22681 Expression::JSONPathRoot(_) => {} // skip root
22682 Expression::JSONPathKey(k) => {
22683 if let Expression::Literal(lit) =
22684 &*k.this
22685 {
22686 if let Literal::String(s) = lit.as_ref() {
22687 key_parts.push(s.clone());
22688 }
22689 }
22690 }
22691 _ => {}
22692 }
22693 }
22694 if !key_parts.is_empty() {
22695 Expression::Literal(Box::new(Literal::String(key_parts.join("."))))
22696 } else {
22697 je.path.clone()
22698 }
22699 }
22700 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with("$.")) => {
22701 let Literal::String(s) = lit.as_ref() else { unreachable!() };
22702 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
22703 Expression::Literal(Box::new(Literal::String(stripped)))
22704 }
22705 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$')) => {
22706 let Literal::String(s) = lit.as_ref() else { unreachable!() };
22707 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
22708 Expression::Literal(Box::new(Literal::String(stripped)))
22709 }
22710 _ => je.path.clone(),
22711 };
22712 Ok(Expression::Function(Box::new(Function::new(
22713 "GET_PATH".to_string(),
22714 vec![this, path],
22715 ))))
22716 } else {
22717 Ok(e)
22718 }
22719 }
22720
22721 Action::StructToRow => {
22722 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
22723 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
22724
22725 // Extract key-value pairs from either Struct or MapFunc
22726 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
22727 Expression::Struct(s) => Some(
22728 s.fields
22729 .iter()
22730 .map(|(opt_name, field_expr)| {
22731 if let Some(name) = opt_name {
22732 (name.clone(), field_expr.clone())
22733 } else if let Expression::NamedArgument(na) = field_expr {
22734 (na.name.name.clone(), na.value.clone())
22735 } else {
22736 (String::new(), field_expr.clone())
22737 }
22738 })
22739 .collect(),
22740 ),
22741 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
22742 m.keys
22743 .iter()
22744 .zip(m.values.iter())
22745 .map(|(key, value)| {
22746 let key_name = match key {
22747 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; s.clone() },
22748 Expression::Identifier(id) => id.name.clone(),
22749 _ => String::new(),
22750 };
22751 (key_name, value.clone())
22752 })
22753 .collect(),
22754 ),
22755 _ => None,
22756 };
22757
22758 if let Some(pairs) = kv_pairs {
22759 let mut named_args = Vec::new();
22760 for (key_name, value) in pairs {
22761 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
22762 named_args.push(Expression::Alias(Box::new(
22763 crate::expressions::Alias::new(
22764 value,
22765 Identifier::new(key_name),
22766 ),
22767 )));
22768 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
22769 named_args.push(value);
22770 } else {
22771 named_args.push(value);
22772 }
22773 }
22774
22775 if matches!(target, DialectType::BigQuery) {
22776 Ok(Expression::Function(Box::new(Function::new(
22777 "STRUCT".to_string(),
22778 named_args,
22779 ))))
22780 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
22781 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
22782 let row_func = Expression::Function(Box::new(Function::new(
22783 "ROW".to_string(),
22784 named_args,
22785 )));
22786
22787 // Try to infer types for each pair
22788 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
22789 Expression::Struct(s) => Some(
22790 s.fields
22791 .iter()
22792 .map(|(opt_name, field_expr)| {
22793 if let Some(name) = opt_name {
22794 (name.clone(), field_expr.clone())
22795 } else if let Expression::NamedArgument(na) = field_expr
22796 {
22797 (na.name.name.clone(), na.value.clone())
22798 } else {
22799 (String::new(), field_expr.clone())
22800 }
22801 })
22802 .collect(),
22803 ),
22804 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
22805 m.keys
22806 .iter()
22807 .zip(m.values.iter())
22808 .map(|(key, value)| {
22809 let key_name = match key {
22810 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
22811 let Literal::String(s) = lit.as_ref() else { unreachable!() };
22812 s.clone()
22813 }
22814 Expression::Identifier(id) => id.name.clone(),
22815 _ => String::new(),
22816 };
22817 (key_name, value.clone())
22818 })
22819 .collect(),
22820 ),
22821 _ => None,
22822 };
22823
22824 if let Some(pairs) = kv_pairs_again {
22825 // Infer types for all values
22826 let mut all_inferred = true;
22827 let mut fields = Vec::new();
22828 for (name, value) in &pairs {
22829 let inferred_type = match value {
22830 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
22831 let Literal::Number(n) = lit.as_ref() else { unreachable!() };
22832 if n.contains('.') {
22833 Some(DataType::Double {
22834 precision: None,
22835 scale: None,
22836 })
22837 } else {
22838 Some(DataType::Int {
22839 length: None,
22840 integer_spelling: true,
22841 })
22842 }
22843 }
22844 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
22845 Some(DataType::VarChar {
22846 length: None,
22847 parenthesized_length: false,
22848 })
22849 }
22850 Expression::Boolean(_) => Some(DataType::Boolean),
22851 _ => None,
22852 };
22853 if let Some(dt) = inferred_type {
22854 fields.push(crate::expressions::StructField::new(
22855 name.clone(),
22856 dt,
22857 ));
22858 } else {
22859 all_inferred = false;
22860 break;
22861 }
22862 }
22863
22864 if all_inferred && !fields.is_empty() {
22865 let row_type = DataType::Struct {
22866 fields,
22867 nested: true,
22868 };
22869 Ok(Expression::Cast(Box::new(Cast {
22870 this: row_func,
22871 to: row_type,
22872 trailing_comments: Vec::new(),
22873 double_colon_syntax: false,
22874 format: None,
22875 default: None,
22876 inferred_type: None,
22877 })))
22878 } else {
22879 Ok(row_func)
22880 }
22881 } else {
22882 Ok(row_func)
22883 }
22884 } else {
22885 Ok(Expression::Function(Box::new(Function::new(
22886 "ROW".to_string(),
22887 named_args,
22888 ))))
22889 }
22890 } else {
22891 Ok(e)
22892 }
22893 }
22894
22895 Action::SparkStructConvert => {
22896 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
22897 // or DuckDB {'name': val, ...}
22898 if let Expression::Function(f) = e {
22899 // Extract name-value pairs from aliased args
22900 let mut pairs: Vec<(String, Expression)> = Vec::new();
22901 for arg in &f.args {
22902 match arg {
22903 Expression::Alias(a) => {
22904 pairs.push((a.alias.name.clone(), a.this.clone()));
22905 }
22906 _ => {
22907 pairs.push((String::new(), arg.clone()));
22908 }
22909 }
22910 }
22911
22912 match target {
22913 DialectType::DuckDB => {
22914 // Convert to DuckDB struct literal {'name': value, ...}
22915 let mut keys = Vec::new();
22916 let mut values = Vec::new();
22917 for (name, value) in &pairs {
22918 keys.push(Expression::Literal(Box::new(Literal::String(name.clone()))));
22919 values.push(value.clone());
22920 }
22921 Ok(Expression::MapFunc(Box::new(
22922 crate::expressions::MapConstructor {
22923 keys,
22924 values,
22925 curly_brace_syntax: true,
22926 with_map_keyword: false,
22927 },
22928 )))
22929 }
22930 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22931 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
22932 let row_args: Vec<Expression> =
22933 pairs.iter().map(|(_, v)| v.clone()).collect();
22934 let row_func = Expression::Function(Box::new(Function::new(
22935 "ROW".to_string(),
22936 row_args,
22937 )));
22938
22939 // Infer types
22940 let mut all_inferred = true;
22941 let mut fields = Vec::new();
22942 for (name, value) in &pairs {
22943 let inferred_type = match value {
22944 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
22945 let Literal::Number(n) = lit.as_ref() else { unreachable!() };
22946 if n.contains('.') {
22947 Some(DataType::Double {
22948 precision: None,
22949 scale: None,
22950 })
22951 } else {
22952 Some(DataType::Int {
22953 length: None,
22954 integer_spelling: true,
22955 })
22956 }
22957 }
22958 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
22959 Some(DataType::VarChar {
22960 length: None,
22961 parenthesized_length: false,
22962 })
22963 }
22964 Expression::Boolean(_) => Some(DataType::Boolean),
22965 _ => None,
22966 };
22967 if let Some(dt) = inferred_type {
22968 fields.push(crate::expressions::StructField::new(
22969 name.clone(),
22970 dt,
22971 ));
22972 } else {
22973 all_inferred = false;
22974 break;
22975 }
22976 }
22977
22978 if all_inferred && !fields.is_empty() {
22979 let row_type = DataType::Struct {
22980 fields,
22981 nested: true,
22982 };
22983 Ok(Expression::Cast(Box::new(Cast {
22984 this: row_func,
22985 to: row_type,
22986 trailing_comments: Vec::new(),
22987 double_colon_syntax: false,
22988 format: None,
22989 default: None,
22990 inferred_type: None,
22991 })))
22992 } else {
22993 Ok(row_func)
22994 }
22995 }
22996 _ => Ok(Expression::Function(f)),
22997 }
22998 } else {
22999 Ok(e)
23000 }
23001 }
23002
23003 Action::ApproxCountDistinctToApproxDistinct => {
23004 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
23005 if let Expression::ApproxCountDistinct(f) = e {
23006 Ok(Expression::ApproxDistinct(f))
23007 } else {
23008 Ok(e)
23009 }
23010 }
23011
23012 Action::CollectListToArrayAgg => {
23013 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
23014 if let Expression::AggregateFunction(f) = e {
23015 let filter_expr = if !f.args.is_empty() {
23016 let arg = f.args[0].clone();
23017 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
23018 this: arg,
23019 not: true,
23020 postfix_form: false,
23021 })))
23022 } else {
23023 None
23024 };
23025 let agg = crate::expressions::AggFunc {
23026 this: if f.args.is_empty() {
23027 Expression::Null(crate::expressions::Null)
23028 } else {
23029 f.args[0].clone()
23030 },
23031 distinct: f.distinct,
23032 order_by: f.order_by.clone(),
23033 filter: filter_expr,
23034 ignore_nulls: None,
23035 name: None,
23036 having_max: None,
23037 limit: None,
23038 inferred_type: None,
23039 };
23040 Ok(Expression::ArrayAgg(Box::new(agg)))
23041 } else {
23042 Ok(e)
23043 }
23044 }
23045
23046 Action::CollectSetConvert => {
23047 // COLLECT_SET(x) -> target-specific
23048 if let Expression::AggregateFunction(f) = e {
23049 match target {
23050 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
23051 crate::expressions::AggregateFunction {
23052 name: "SET_AGG".to_string(),
23053 args: f.args,
23054 distinct: false,
23055 order_by: f.order_by,
23056 filter: f.filter,
23057 limit: f.limit,
23058 ignore_nulls: f.ignore_nulls,
23059 inferred_type: None,
23060 },
23061 ))),
23062 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
23063 crate::expressions::AggregateFunction {
23064 name: "ARRAY_UNIQUE_AGG".to_string(),
23065 args: f.args,
23066 distinct: false,
23067 order_by: f.order_by,
23068 filter: f.filter,
23069 limit: f.limit,
23070 ignore_nulls: f.ignore_nulls,
23071 inferred_type: None,
23072 },
23073 ))),
23074 DialectType::Trino | DialectType::DuckDB => {
23075 let agg = crate::expressions::AggFunc {
23076 this: if f.args.is_empty() {
23077 Expression::Null(crate::expressions::Null)
23078 } else {
23079 f.args[0].clone()
23080 },
23081 distinct: true,
23082 order_by: Vec::new(),
23083 filter: None,
23084 ignore_nulls: None,
23085 name: None,
23086 having_max: None,
23087 limit: None,
23088 inferred_type: None,
23089 };
23090 Ok(Expression::ArrayAgg(Box::new(agg)))
23091 }
23092 _ => Ok(Expression::AggregateFunction(f)),
23093 }
23094 } else {
23095 Ok(e)
23096 }
23097 }
23098
23099 Action::PercentileConvert => {
23100 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
23101 if let Expression::AggregateFunction(f) = e {
23102 let name = match target {
23103 DialectType::DuckDB => "QUANTILE",
23104 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
23105 _ => "PERCENTILE",
23106 };
23107 Ok(Expression::AggregateFunction(Box::new(
23108 crate::expressions::AggregateFunction {
23109 name: name.to_string(),
23110 args: f.args,
23111 distinct: f.distinct,
23112 order_by: f.order_by,
23113 filter: f.filter,
23114 limit: f.limit,
23115 ignore_nulls: f.ignore_nulls,
23116 inferred_type: None,
23117 },
23118 )))
23119 } else {
23120 Ok(e)
23121 }
23122 }
23123
23124 Action::CorrIsnanWrap => {
23125 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
23126 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
23127 let corr_clone = e.clone();
23128 let isnan = Expression::Function(Box::new(Function::new(
23129 "ISNAN".to_string(),
23130 vec![corr_clone.clone()],
23131 )));
23132 let case_expr = Expression::Case(Box::new(Case {
23133 operand: None,
23134 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
23135 else_: Some(corr_clone),
23136 comments: Vec::new(),
23137 inferred_type: None,
23138 }));
23139 Ok(case_expr)
23140 }
23141
23142 Action::TruncToDateTrunc => {
23143 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
23144 if let Expression::Function(f) = e {
23145 if f.args.len() == 2 {
23146 let timestamp = f.args[0].clone();
23147 let unit_expr = f.args[1].clone();
23148
23149 if matches!(target, DialectType::ClickHouse) {
23150 // For ClickHouse, produce Expression::DateTrunc which the generator
23151 // outputs as DATE_TRUNC(...) without going through the ClickHouse
23152 // target transform that would convert it to dateTrunc
23153 let unit_str = Self::get_unit_str_static(&unit_expr);
23154 let dt_field = match unit_str.as_str() {
23155 "YEAR" => DateTimeField::Year,
23156 "MONTH" => DateTimeField::Month,
23157 "DAY" => DateTimeField::Day,
23158 "HOUR" => DateTimeField::Hour,
23159 "MINUTE" => DateTimeField::Minute,
23160 "SECOND" => DateTimeField::Second,
23161 "WEEK" => DateTimeField::Week,
23162 "QUARTER" => DateTimeField::Quarter,
23163 _ => DateTimeField::Custom(unit_str),
23164 };
23165 Ok(Expression::DateTrunc(Box::new(
23166 crate::expressions::DateTruncFunc {
23167 this: timestamp,
23168 unit: dt_field,
23169 },
23170 )))
23171 } else {
23172 let new_args = vec![unit_expr, timestamp];
23173 Ok(Expression::Function(Box::new(Function::new(
23174 "DATE_TRUNC".to_string(),
23175 new_args,
23176 ))))
23177 }
23178 } else {
23179 Ok(Expression::Function(f))
23180 }
23181 } else {
23182 Ok(e)
23183 }
23184 }
23185
23186 Action::ArrayContainsConvert => {
23187 if let Expression::ArrayContains(f) = e {
23188 match target {
23189 DialectType::Presto | DialectType::Trino => {
23190 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
23191 Ok(Expression::Function(Box::new(Function::new(
23192 "CONTAINS".to_string(),
23193 vec![f.this, f.expression],
23194 ))))
23195 }
23196 DialectType::Snowflake => {
23197 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
23198 let cast_val =
23199 Expression::Cast(Box::new(crate::expressions::Cast {
23200 this: f.expression,
23201 to: crate::expressions::DataType::Custom {
23202 name: "VARIANT".to_string(),
23203 },
23204 trailing_comments: Vec::new(),
23205 double_colon_syntax: false,
23206 format: None,
23207 default: None,
23208 inferred_type: None,
23209 }));
23210 Ok(Expression::Function(Box::new(Function::new(
23211 "ARRAY_CONTAINS".to_string(),
23212 vec![cast_val, f.this],
23213 ))))
23214 }
23215 _ => Ok(Expression::ArrayContains(f)),
23216 }
23217 } else {
23218 Ok(e)
23219 }
23220 }
23221
23222 Action::ArrayExceptConvert => {
23223 if let Expression::ArrayExcept(f) = e {
23224 let source_arr = f.this;
23225 let exclude_arr = f.expression;
23226 match target {
23227 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
23228 // Snowflake ARRAY_EXCEPT -> DuckDB bag semantics:
23229 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
23230 // ELSE LIST_TRANSFORM(LIST_FILTER(
23231 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source))),
23232 // pair -> (LENGTH(LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1]))
23233 // > LENGTH(LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])))),
23234 // pair -> pair[1])
23235 // END
23236
23237 // Build null check
23238 let source_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
23239 this: source_arr.clone(),
23240 not: false,
23241 postfix_form: false,
23242 }));
23243 let exclude_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
23244 this: exclude_arr.clone(),
23245 not: false,
23246 postfix_form: false,
23247 }));
23248 let null_check = Expression::Or(Box::new(crate::expressions::BinaryOp {
23249 left: source_is_null,
23250 right: exclude_is_null,
23251 left_comments: vec![],
23252 operator_comments: vec![],
23253 trailing_comments: vec![],
23254 inferred_type: None,
23255 }));
23256
23257 // GENERATE_SERIES(1, LENGTH(source))
23258 let gen_series = Expression::Function(Box::new(Function::new(
23259 "GENERATE_SERIES".to_string(),
23260 vec![
23261 Expression::number(1),
23262 Expression::Function(Box::new(Function::new(
23263 "LENGTH".to_string(),
23264 vec![source_arr.clone()],
23265 ))),
23266 ],
23267 )));
23268
23269 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source)))
23270 let list_zip = Expression::Function(Box::new(Function::new(
23271 "LIST_ZIP".to_string(),
23272 vec![source_arr.clone(), gen_series],
23273 )));
23274
23275 // pair[1] and pair[2]
23276 let pair_col = Expression::column("pair");
23277 let pair_1 = Expression::Subscript(Box::new(crate::expressions::Subscript {
23278 this: pair_col.clone(),
23279 index: Expression::number(1),
23280 }));
23281 let pair_2 = Expression::Subscript(Box::new(crate::expressions::Subscript {
23282 this: pair_col.clone(),
23283 index: Expression::number(2),
23284 }));
23285
23286 // source[1:pair[2]]
23287 let source_slice = Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
23288 this: source_arr.clone(),
23289 start: Some(Expression::number(1)),
23290 end: Some(pair_2),
23291 }));
23292
23293 let e_col = Expression::column("e");
23294
23295 // e -> e IS NOT DISTINCT FROM pair[1]
23296 let inner_lambda1 = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23297 parameters: vec![crate::expressions::Identifier::new("e")],
23298 body: Expression::NullSafeEq(Box::new(crate::expressions::BinaryOp {
23299 left: e_col.clone(),
23300 right: pair_1.clone(),
23301 left_comments: vec![],
23302 operator_comments: vec![],
23303 trailing_comments: vec![],
23304 inferred_type: None,
23305 })),
23306 colon: false,
23307 parameter_types: vec![],
23308 }));
23309
23310 // LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
23311 let inner_filter1 = Expression::Function(Box::new(Function::new(
23312 "LIST_FILTER".to_string(),
23313 vec![source_slice, inner_lambda1],
23314 )));
23315
23316 // LENGTH(LIST_FILTER(source[1:pair[2]], ...))
23317 let len1 = Expression::Function(Box::new(Function::new(
23318 "LENGTH".to_string(),
23319 vec![inner_filter1],
23320 )));
23321
23322 // e -> e IS NOT DISTINCT FROM pair[1]
23323 let inner_lambda2 = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23324 parameters: vec![crate::expressions::Identifier::new("e")],
23325 body: Expression::NullSafeEq(Box::new(crate::expressions::BinaryOp {
23326 left: e_col,
23327 right: pair_1.clone(),
23328 left_comments: vec![],
23329 operator_comments: vec![],
23330 trailing_comments: vec![],
23331 inferred_type: None,
23332 })),
23333 colon: false,
23334 parameter_types: vec![],
23335 }));
23336
23337 // LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])
23338 let inner_filter2 = Expression::Function(Box::new(Function::new(
23339 "LIST_FILTER".to_string(),
23340 vec![exclude_arr.clone(), inner_lambda2],
23341 )));
23342
23343 // LENGTH(LIST_FILTER(exclude, ...))
23344 let len2 = Expression::Function(Box::new(Function::new(
23345 "LENGTH".to_string(),
23346 vec![inner_filter2],
23347 )));
23348
23349 // (LENGTH(...) > LENGTH(...))
23350 let cond = Expression::Paren(Box::new(Paren {
23351 this: Expression::Gt(Box::new(crate::expressions::BinaryOp {
23352 left: len1,
23353 right: len2,
23354 left_comments: vec![],
23355 operator_comments: vec![],
23356 trailing_comments: vec![],
23357 inferred_type: None,
23358 })),
23359 trailing_comments: vec![],
23360 }));
23361
23362 // pair -> (condition)
23363 let filter_lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23364 parameters: vec![crate::expressions::Identifier::new("pair")],
23365 body: cond,
23366 colon: false,
23367 parameter_types: vec![],
23368 }));
23369
23370 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
23371 let outer_filter = Expression::Function(Box::new(Function::new(
23372 "LIST_FILTER".to_string(),
23373 vec![list_zip, filter_lambda],
23374 )));
23375
23376 // pair -> pair[1]
23377 let transform_lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23378 parameters: vec![crate::expressions::Identifier::new("pair")],
23379 body: pair_1,
23380 colon: false,
23381 parameter_types: vec![],
23382 }));
23383
23384 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
23385 let list_transform = Expression::Function(Box::new(Function::new(
23386 "LIST_TRANSFORM".to_string(),
23387 vec![outer_filter, transform_lambda],
23388 )));
23389
23390 Ok(Expression::Case(Box::new(Case {
23391 operand: None,
23392 whens: vec![(null_check, Expression::Null(Null))],
23393 else_: Some(list_transform),
23394 comments: Vec::new(),
23395 inferred_type: None,
23396 })))
23397 }
23398 DialectType::DuckDB => {
23399 // ARRAY_EXCEPT(source, exclude) -> set semantics for DuckDB:
23400 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
23401 // ELSE LIST_FILTER(LIST_DISTINCT(source),
23402 // e -> LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)) = 0)
23403 // END
23404
23405 // Build: source IS NULL
23406 let source_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
23407 this: source_arr.clone(),
23408 not: false,
23409 postfix_form: false,
23410 }));
23411 // Build: exclude IS NULL
23412 let exclude_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
23413 this: exclude_arr.clone(),
23414 not: false,
23415 postfix_form: false,
23416 }));
23417 // source IS NULL OR exclude IS NULL
23418 let null_check = Expression::Or(Box::new(crate::expressions::BinaryOp {
23419 left: source_is_null,
23420 right: exclude_is_null,
23421 left_comments: vec![],
23422 operator_comments: vec![],
23423 trailing_comments: vec![],
23424 inferred_type: None,
23425 }));
23426
23427 // LIST_DISTINCT(source)
23428 let list_distinct = Expression::Function(Box::new(Function::new(
23429 "LIST_DISTINCT".to_string(),
23430 vec![source_arr.clone()],
23431 )));
23432
23433 // x IS NOT DISTINCT FROM e
23434 let x_col = Expression::column("x");
23435 let e_col = Expression::column("e");
23436 let is_not_distinct = Expression::NullSafeEq(Box::new(crate::expressions::BinaryOp {
23437 left: x_col,
23438 right: e_col.clone(),
23439 left_comments: vec![],
23440 operator_comments: vec![],
23441 trailing_comments: vec![],
23442 inferred_type: None,
23443 }));
23444
23445 // x -> x IS NOT DISTINCT FROM e
23446 let inner_lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23447 parameters: vec![crate::expressions::Identifier::new("x")],
23448 body: is_not_distinct,
23449 colon: false,
23450 parameter_types: vec![],
23451 }));
23452
23453 // LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)
23454 let inner_list_filter = Expression::Function(Box::new(Function::new(
23455 "LIST_FILTER".to_string(),
23456 vec![exclude_arr.clone(), inner_lambda],
23457 )));
23458
23459 // LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e))
23460 let len_inner = Expression::Function(Box::new(Function::new(
23461 "LENGTH".to_string(),
23462 vec![inner_list_filter],
23463 )));
23464
23465 // LENGTH(...) = 0
23466 let eq_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp {
23467 left: len_inner,
23468 right: Expression::number(0),
23469 left_comments: vec![],
23470 operator_comments: vec![],
23471 trailing_comments: vec![],
23472 inferred_type: None,
23473 }));
23474
23475 // e -> LENGTH(LIST_FILTER(...)) = 0
23476 let outer_lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23477 parameters: vec![crate::expressions::Identifier::new("e")],
23478 body: eq_zero,
23479 colon: false,
23480 parameter_types: vec![],
23481 }));
23482
23483 // LIST_FILTER(LIST_DISTINCT(source), e -> ...)
23484 let outer_list_filter = Expression::Function(Box::new(Function::new(
23485 "LIST_FILTER".to_string(),
23486 vec![list_distinct, outer_lambda],
23487 )));
23488
23489 // CASE WHEN ... IS NULL ... THEN NULL ELSE LIST_FILTER(...) END
23490 Ok(Expression::Case(Box::new(Case {
23491 operand: None,
23492 whens: vec![(
23493 null_check,
23494 Expression::Null(Null),
23495 )],
23496 else_: Some(outer_list_filter),
23497 comments: Vec::new(),
23498 inferred_type: None,
23499 })))
23500 }
23501 DialectType::Snowflake => {
23502 // Snowflake: ARRAY_EXCEPT(source, exclude) - keep as-is
23503 Ok(Expression::ArrayExcept(Box::new(crate::expressions::BinaryFunc {
23504 this: source_arr,
23505 expression: exclude_arr,
23506 original_name: None,
23507 inferred_type: None,
23508 })))
23509 }
23510 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23511 // Presto/Trino: ARRAY_EXCEPT(source, exclude) - keep function name, array syntax already converted
23512 Ok(Expression::Function(Box::new(Function::new(
23513 "ARRAY_EXCEPT".to_string(),
23514 vec![source_arr, exclude_arr],
23515 ))))
23516 }
23517 _ => Ok(Expression::ArrayExcept(Box::new(crate::expressions::BinaryFunc {
23518 this: source_arr,
23519 expression: exclude_arr,
23520 original_name: None,
23521 inferred_type: None,
23522 }))),
23523 }
23524 } else {
23525 Ok(e)
23526 }
23527 }
23528
23529 Action::RegexpLikeExasolAnchor => {
23530 // RegexpLike -> Exasol: wrap pattern with .*...*
23531 // Exasol REGEXP_LIKE does full-string match, but RLIKE/REGEXP from other
23532 // dialects does partial match, so we need to anchor with .* on both sides
23533 if let Expression::RegexpLike(mut f) = e {
23534 match &f.pattern {
23535 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
23536 let Literal::String(s) = lit.as_ref() else { unreachable!() };
23537 // String literal: wrap with .*...*
23538 f.pattern = Expression::Literal(Box::new(Literal::String(
23539 format!(".*{}.*", s),
23540 )));
23541 }
23542 _ => {
23543 // Non-literal: wrap with CONCAT('.*', pattern, '.*')
23544 f.pattern = Expression::Paren(Box::new(crate::expressions::Paren {
23545 this: Expression::Concat(Box::new(crate::expressions::BinaryOp {
23546 left: Expression::Concat(Box::new(crate::expressions::BinaryOp {
23547 left: Expression::Literal(Box::new(Literal::String(".*".to_string()))),
23548 right: f.pattern,
23549 left_comments: vec![],
23550 operator_comments: vec![],
23551 trailing_comments: vec![],
23552 inferred_type: None,
23553 })),
23554 right: Expression::Literal(Box::new(Literal::String(".*".to_string()))),
23555 left_comments: vec![],
23556 operator_comments: vec![],
23557 trailing_comments: vec![],
23558 inferred_type: None,
23559 })),
23560 trailing_comments: vec![],
23561 }));
23562 }
23563 }
23564 Ok(Expression::RegexpLike(f))
23565 } else {
23566 Ok(e)
23567 }
23568 }
23569
23570 Action::ArrayPositionSnowflakeSwap => {
23571 // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
23572 if let Expression::ArrayPosition(f) = e {
23573 Ok(Expression::ArrayPosition(Box::new(crate::expressions::BinaryFunc {
23574 this: f.expression,
23575 expression: f.this,
23576 original_name: f.original_name,
23577 inferred_type: f.inferred_type,
23578 })))
23579 } else {
23580 Ok(e)
23581 }
23582 }
23583
23584 Action::SnowflakeArrayPositionToDuckDB => {
23585 // Snowflake ARRAY_POSITION(value, array) -> DuckDB ARRAY_POSITION(array, value) - 1
23586 // Snowflake uses 0-based indexing, DuckDB uses 1-based
23587 // The parser has this=value, expression=array (Snowflake order)
23588 if let Expression::ArrayPosition(f) = e {
23589 // Create ARRAY_POSITION(array, value) in standard order
23590 let standard_pos = Expression::ArrayPosition(Box::new(crate::expressions::BinaryFunc {
23591 this: f.expression, // array
23592 expression: f.this, // value
23593 original_name: f.original_name,
23594 inferred_type: f.inferred_type,
23595 }));
23596 // Subtract 1 for zero-based indexing
23597 Ok(Expression::Sub(Box::new(BinaryOp {
23598 left: standard_pos,
23599 right: Expression::number(1),
23600 left_comments: vec![],
23601 operator_comments: vec![],
23602 trailing_comments: vec![],
23603 inferred_type: None,
23604 })))
23605 } else {
23606 Ok(e)
23607 }
23608 }
23609
23610 Action::ArrayDistinctConvert => {
23611 // ARRAY_DISTINCT(arr) -> DuckDB NULL-aware CASE:
23612 // CASE WHEN ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
23613 // THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(arr, _u -> NOT _u IS NULL)), NULL)
23614 // ELSE LIST_DISTINCT(arr)
23615 // END
23616 if let Expression::ArrayDistinct(f) = e {
23617 let arr = f.this;
23618
23619 // ARRAY_LENGTH(arr)
23620 let array_length = Expression::Function(Box::new(Function::new(
23621 "ARRAY_LENGTH".to_string(),
23622 vec![arr.clone()],
23623 )));
23624 // LIST_COUNT(arr)
23625 let list_count = Expression::Function(Box::new(Function::new(
23626 "LIST_COUNT".to_string(),
23627 vec![arr.clone()],
23628 )));
23629 // ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
23630 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
23631 left: array_length,
23632 right: list_count,
23633 left_comments: vec![],
23634 operator_comments: vec![],
23635 trailing_comments: vec![],
23636 inferred_type: None,
23637 }));
23638
23639 // _u column
23640 let u_col = Expression::column("_u");
23641 // NOT _u IS NULL
23642 let u_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
23643 this: u_col.clone(),
23644 not: false,
23645 postfix_form: false,
23646 }));
23647 let not_u_is_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
23648 this: u_is_null,
23649 inferred_type: None,
23650 }));
23651 // _u -> NOT _u IS NULL
23652 let filter_lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23653 parameters: vec![crate::expressions::Identifier::new("_u")],
23654 body: not_u_is_null,
23655 colon: false,
23656 parameter_types: vec![],
23657 }));
23658 // LIST_FILTER(arr, _u -> NOT _u IS NULL)
23659 let list_filter = Expression::Function(Box::new(Function::new(
23660 "LIST_FILTER".to_string(),
23661 vec![arr.clone(), filter_lambda],
23662 )));
23663 // LIST_DISTINCT(LIST_FILTER(arr, ...))
23664 let list_distinct_filtered = Expression::Function(Box::new(Function::new(
23665 "LIST_DISTINCT".to_string(),
23666 vec![list_filter],
23667 )));
23668 // LIST_APPEND(LIST_DISTINCT(LIST_FILTER(...)), NULL)
23669 let list_append = Expression::Function(Box::new(Function::new(
23670 "LIST_APPEND".to_string(),
23671 vec![list_distinct_filtered, Expression::Null(Null)],
23672 )));
23673
23674 // LIST_DISTINCT(arr)
23675 let list_distinct = Expression::Function(Box::new(Function::new(
23676 "LIST_DISTINCT".to_string(),
23677 vec![arr],
23678 )));
23679
23680 // CASE WHEN neq THEN list_append ELSE list_distinct END
23681 Ok(Expression::Case(Box::new(Case {
23682 operand: None,
23683 whens: vec![(neq, list_append)],
23684 else_: Some(list_distinct),
23685 comments: Vec::new(),
23686 inferred_type: None,
23687 })))
23688 } else {
23689 Ok(e)
23690 }
23691 }
23692
23693 Action::ArrayDistinctClickHouse => {
23694 // ARRAY_DISTINCT(arr) -> arrayDistinct(arr) for ClickHouse
23695 if let Expression::ArrayDistinct(f) = e {
23696 Ok(Expression::Function(Box::new(Function::new(
23697 "arrayDistinct".to_string(),
23698 vec![f.this],
23699 ))))
23700 } else {
23701 Ok(e)
23702 }
23703 }
23704
23705 Action::ArrayContainsDuckDBConvert => {
23706 // Snowflake ARRAY_CONTAINS(value, array) -> DuckDB NULL-aware:
23707 // CASE WHEN value IS NULL
23708 // THEN NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
23709 // ELSE ARRAY_CONTAINS(array, value)
23710 // END
23711 // Note: In Rust AST from Snowflake parse, this=value (first arg), expression=array (second arg)
23712 if let Expression::ArrayContains(f) = e {
23713 let value = f.this;
23714 let array = f.expression;
23715
23716 // value IS NULL
23717 let value_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
23718 this: value.clone(),
23719 not: false,
23720 postfix_form: false,
23721 }));
23722
23723 // ARRAY_LENGTH(array)
23724 let array_length = Expression::Function(Box::new(Function::new(
23725 "ARRAY_LENGTH".to_string(),
23726 vec![array.clone()],
23727 )));
23728 // LIST_COUNT(array)
23729 let list_count = Expression::Function(Box::new(Function::new(
23730 "LIST_COUNT".to_string(),
23731 vec![array.clone()],
23732 )));
23733 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
23734 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
23735 left: array_length,
23736 right: list_count,
23737 left_comments: vec![],
23738 operator_comments: vec![],
23739 trailing_comments: vec![],
23740 inferred_type: None,
23741 }));
23742 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
23743 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
23744 this: Box::new(neq),
23745 expression: Box::new(Expression::Boolean(crate::expressions::BooleanLiteral { value: false })),
23746 }));
23747
23748 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
23749 let array_contains = Expression::Function(Box::new(Function::new(
23750 "ARRAY_CONTAINS".to_string(),
23751 vec![array, value],
23752 )));
23753
23754 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
23755 Ok(Expression::Case(Box::new(Case {
23756 operand: None,
23757 whens: vec![(value_is_null, nullif)],
23758 else_: Some(array_contains),
23759 comments: Vec::new(),
23760 inferred_type: None,
23761 })))
23762 } else {
23763 Ok(e)
23764 }
23765 }
23766
23767 Action::StrPositionExpand => {
23768 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
23769 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
23770 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
23771 if let Expression::StrPosition(sp) = e {
23772 let crate::expressions::StrPosition {
23773 this,
23774 substr,
23775 position,
23776 occurrence,
23777 } = *sp;
23778 let string = *this;
23779 let substr_expr = match substr {
23780 Some(s) => *s,
23781 None => Expression::Null(Null),
23782 };
23783 let pos = match position {
23784 Some(p) => *p,
23785 None => Expression::number(1),
23786 };
23787
23788 // SUBSTRING(string, pos)
23789 let substring_call = Expression::Function(Box::new(Function::new(
23790 "SUBSTRING".to_string(),
23791 vec![string.clone(), pos.clone()],
23792 )));
23793 // STRPOS(SUBSTRING(string, pos), substr)
23794 let strpos_call = Expression::Function(Box::new(Function::new(
23795 "STRPOS".to_string(),
23796 vec![substring_call, substr_expr.clone()],
23797 )));
23798 // STRPOS(...) + pos - 1
23799 let pos_adjusted =
23800 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
23801 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
23802 strpos_call.clone(),
23803 pos.clone(),
23804 ))),
23805 Expression::number(1),
23806 )));
23807 // STRPOS(...) = 0
23808 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
23809 strpos_call.clone(),
23810 Expression::number(0),
23811 )));
23812
23813 match target {
23814 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23815 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
23816 Ok(Expression::Function(Box::new(Function::new(
23817 "IF".to_string(),
23818 vec![is_zero, Expression::number(0), pos_adjusted],
23819 ))))
23820 }
23821 DialectType::DuckDB => {
23822 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
23823 Ok(Expression::Case(Box::new(Case {
23824 operand: None,
23825 whens: vec![(is_zero, Expression::number(0))],
23826 else_: Some(pos_adjusted),
23827 comments: Vec::new(),
23828 inferred_type: None,
23829 })))
23830 }
23831 _ => {
23832 // Reconstruct StrPosition
23833 Ok(Expression::StrPosition(Box::new(
23834 crate::expressions::StrPosition {
23835 this: Box::new(string),
23836 substr: Some(Box::new(substr_expr)),
23837 position: Some(Box::new(pos)),
23838 occurrence,
23839 },
23840 )))
23841 }
23842 }
23843 } else {
23844 Ok(e)
23845 }
23846 }
23847
23848 Action::MonthsBetweenConvert => {
23849 if let Expression::MonthsBetween(mb) = e {
23850 let crate::expressions::BinaryFunc {
23851 this: end_date,
23852 expression: start_date,
23853 ..
23854 } = *mb;
23855 match target {
23856 DialectType::DuckDB => {
23857 let cast_end = Self::ensure_cast_date(end_date);
23858 let cast_start = Self::ensure_cast_date(start_date);
23859 let dd = Expression::Function(Box::new(Function::new(
23860 "DATE_DIFF".to_string(),
23861 vec![
23862 Expression::string("MONTH"),
23863 cast_start.clone(),
23864 cast_end.clone(),
23865 ],
23866 )));
23867 let day_end = Expression::Function(Box::new(Function::new(
23868 "DAY".to_string(),
23869 vec![cast_end.clone()],
23870 )));
23871 let day_start = Expression::Function(Box::new(Function::new(
23872 "DAY".to_string(),
23873 vec![cast_start.clone()],
23874 )));
23875 let last_day_end = Expression::Function(Box::new(Function::new(
23876 "LAST_DAY".to_string(),
23877 vec![cast_end.clone()],
23878 )));
23879 let last_day_start = Expression::Function(Box::new(Function::new(
23880 "LAST_DAY".to_string(),
23881 vec![cast_start.clone()],
23882 )));
23883 let day_last_end = Expression::Function(Box::new(Function::new(
23884 "DAY".to_string(),
23885 vec![last_day_end],
23886 )));
23887 let day_last_start = Expression::Function(Box::new(Function::new(
23888 "DAY".to_string(),
23889 vec![last_day_start],
23890 )));
23891 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
23892 day_end.clone(),
23893 day_last_end,
23894 )));
23895 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
23896 day_start.clone(),
23897 day_last_start,
23898 )));
23899 let both_cond =
23900 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
23901 let day_diff =
23902 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
23903 let day_diff_paren =
23904 Expression::Paren(Box::new(crate::expressions::Paren {
23905 this: day_diff,
23906 trailing_comments: Vec::new(),
23907 }));
23908 let frac = Expression::Div(Box::new(BinaryOp::new(
23909 day_diff_paren,
23910 Expression::Literal(Box::new(Literal::Number("31.0".to_string()))),
23911 )));
23912 let case_expr = Expression::Case(Box::new(Case {
23913 operand: None,
23914 whens: vec![(both_cond, Expression::number(0))],
23915 else_: Some(frac),
23916 comments: Vec::new(),
23917 inferred_type: None,
23918 }));
23919 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
23920 }
23921 DialectType::Snowflake | DialectType::Redshift => {
23922 let unit = Expression::Identifier(Identifier::new("MONTH"));
23923 Ok(Expression::Function(Box::new(Function::new(
23924 "DATEDIFF".to_string(),
23925 vec![unit, start_date, end_date],
23926 ))))
23927 }
23928 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23929 Ok(Expression::Function(Box::new(Function::new(
23930 "DATE_DIFF".to_string(),
23931 vec![Expression::string("MONTH"), start_date, end_date],
23932 ))))
23933 }
23934 _ => Ok(Expression::MonthsBetween(Box::new(
23935 crate::expressions::BinaryFunc {
23936 this: end_date,
23937 expression: start_date,
23938 original_name: None,
23939 inferred_type: None,
23940 },
23941 ))),
23942 }
23943 } else {
23944 Ok(e)
23945 }
23946 }
23947
23948 Action::AddMonthsConvert => {
23949 if let Expression::AddMonths(am) = e {
23950 let date = am.this;
23951 let val = am.expression;
23952 match target {
23953 DialectType::TSQL | DialectType::Fabric => {
23954 let cast_date = Self::ensure_cast_datetime2(date);
23955 Ok(Expression::Function(Box::new(Function::new(
23956 "DATEADD".to_string(),
23957 vec![
23958 Expression::Identifier(Identifier::new("MONTH")),
23959 val,
23960 cast_date,
23961 ],
23962 ))))
23963 }
23964 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
23965 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
23966 // Optionally wrapped in CAST(... AS type) if the input had a specific type
23967
23968 // Determine the cast type from the date expression
23969 let (cast_date, return_type) = match &date {
23970 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
23971 // String literal: CAST(str AS TIMESTAMP), no outer CAST
23972 (
23973 Expression::Cast(Box::new(Cast {
23974 this: date.clone(),
23975 to: DataType::Timestamp {
23976 precision: None,
23977 timezone: false,
23978 },
23979 trailing_comments: Vec::new(),
23980 double_colon_syntax: false,
23981 format: None,
23982 default: None,
23983 inferred_type: None,
23984 })),
23985 None,
23986 )
23987 }
23988 Expression::Cast(c) => {
23989 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
23990 (date.clone(), Some(c.to.clone()))
23991 }
23992 _ => {
23993 // Expression or NULL::TYPE - keep as-is, check for cast type
23994 if let Expression::Cast(c) = &date {
23995 (date.clone(), Some(c.to.clone()))
23996 } else {
23997 (date.clone(), None)
23998 }
23999 }
24000 };
24001
24002 // Build the interval expression
24003 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
24004 // For integer values, use INTERVAL val MONTH
24005 let is_non_integer_val = match &val {
24006 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => { let Literal::Number(n) = lit.as_ref() else { unreachable!() }; n.contains('.') },
24007 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
24008 Expression::Neg(n) => {
24009 if let Expression::Literal(lit) = &n.this {
24010 if let Literal::Number(s) = lit.as_ref() {
24011 s.contains('.')
24012 } else { false }
24013 } else {
24014 false
24015 }
24016 }
24017 _ => false,
24018 };
24019
24020 let add_interval = if is_non_integer_val {
24021 // TO_MONTHS(CAST(ROUND(val) AS INT))
24022 let round_val = Expression::Function(Box::new(Function::new(
24023 "ROUND".to_string(),
24024 vec![val.clone()],
24025 )));
24026 let cast_int = Expression::Cast(Box::new(Cast {
24027 this: round_val,
24028 to: DataType::Int {
24029 length: None,
24030 integer_spelling: false,
24031 },
24032 trailing_comments: Vec::new(),
24033 double_colon_syntax: false,
24034 format: None,
24035 default: None,
24036 inferred_type: None,
24037 }));
24038 Expression::Function(Box::new(Function::new(
24039 "TO_MONTHS".to_string(),
24040 vec![cast_int],
24041 )))
24042 } else {
24043 // INTERVAL val MONTH
24044 // For negative numbers, wrap in parens
24045 let interval_val = match &val {
24046 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n.starts_with('-'))
24047 =>
24048 {
24049 let Literal::Number(_) = lit.as_ref() else { unreachable!() };
24050 Expression::Paren(Box::new(Paren {
24051 this: val.clone(),
24052 trailing_comments: Vec::new(),
24053 }))
24054 }
24055 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
24056 this: val.clone(),
24057 trailing_comments: Vec::new(),
24058 })),
24059 Expression::Null(_) => Expression::Paren(Box::new(Paren {
24060 this: val.clone(),
24061 trailing_comments: Vec::new(),
24062 })),
24063 _ => val.clone(),
24064 };
24065 Expression::Interval(Box::new(crate::expressions::Interval {
24066 this: Some(interval_val),
24067 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24068 unit: crate::expressions::IntervalUnit::Month,
24069 use_plural: false,
24070 }),
24071 }))
24072 };
24073
24074 // Build: date + interval
24075 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
24076 cast_date.clone(),
24077 add_interval.clone(),
24078 )));
24079
24080 // Build LAST_DAY(date)
24081 let last_day_date = Expression::Function(Box::new(Function::new(
24082 "LAST_DAY".to_string(),
24083 vec![cast_date.clone()],
24084 )));
24085
24086 // Build LAST_DAY(date + interval)
24087 let last_day_date_plus =
24088 Expression::Function(Box::new(Function::new(
24089 "LAST_DAY".to_string(),
24090 vec![date_plus_interval.clone()],
24091 )));
24092
24093 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
24094 let case_expr = Expression::Case(Box::new(Case {
24095 operand: None,
24096 whens: vec![(
24097 Expression::Eq(Box::new(BinaryOp::new(
24098 last_day_date,
24099 cast_date.clone(),
24100 ))),
24101 last_day_date_plus,
24102 )],
24103 else_: Some(date_plus_interval),
24104 comments: Vec::new(),
24105 inferred_type: None,
24106 }));
24107
24108 // Wrap in CAST(... AS type) if needed
24109 if let Some(dt) = return_type {
24110 Ok(Expression::Cast(Box::new(Cast {
24111 this: case_expr,
24112 to: dt,
24113 trailing_comments: Vec::new(),
24114 double_colon_syntax: false,
24115 format: None,
24116 default: None,
24117 inferred_type: None,
24118 })))
24119 } else {
24120 Ok(case_expr)
24121 }
24122 }
24123 DialectType::DuckDB => {
24124 // Non-Snowflake source: simple date + INTERVAL
24125 let cast_date =
24126 if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) {
24127 Expression::Cast(Box::new(Cast {
24128 this: date,
24129 to: DataType::Timestamp {
24130 precision: None,
24131 timezone: false,
24132 },
24133 trailing_comments: Vec::new(),
24134 double_colon_syntax: false,
24135 format: None,
24136 default: None,
24137 inferred_type: None,
24138 }))
24139 } else {
24140 date
24141 };
24142 let interval =
24143 Expression::Interval(Box::new(crate::expressions::Interval {
24144 this: Some(val),
24145 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24146 unit: crate::expressions::IntervalUnit::Month,
24147 use_plural: false,
24148 }),
24149 }));
24150 Ok(Expression::Add(Box::new(BinaryOp::new(
24151 cast_date, interval,
24152 ))))
24153 }
24154 DialectType::Snowflake => {
24155 // Keep ADD_MONTHS when source is also Snowflake
24156 if matches!(source, DialectType::Snowflake) {
24157 Ok(Expression::Function(Box::new(Function::new(
24158 "ADD_MONTHS".to_string(),
24159 vec![date, val],
24160 ))))
24161 } else {
24162 Ok(Expression::Function(Box::new(Function::new(
24163 "DATEADD".to_string(),
24164 vec![
24165 Expression::Identifier(Identifier::new("MONTH")),
24166 val,
24167 date,
24168 ],
24169 ))))
24170 }
24171 }
24172 DialectType::Redshift => {
24173 Ok(Expression::Function(Box::new(Function::new(
24174 "DATEADD".to_string(),
24175 vec![
24176 Expression::Identifier(Identifier::new("MONTH")),
24177 val,
24178 date,
24179 ],
24180 ))))
24181 }
24182 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24183 let cast_date =
24184 if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) {
24185 Expression::Cast(Box::new(Cast {
24186 this: date,
24187 to: DataType::Timestamp {
24188 precision: None,
24189 timezone: false,
24190 },
24191 trailing_comments: Vec::new(),
24192 double_colon_syntax: false,
24193 format: None,
24194 default: None,
24195 inferred_type: None,
24196 }))
24197 } else {
24198 date
24199 };
24200 Ok(Expression::Function(Box::new(Function::new(
24201 "DATE_ADD".to_string(),
24202 vec![Expression::string("MONTH"), val, cast_date],
24203 ))))
24204 }
24205 DialectType::BigQuery => {
24206 let interval =
24207 Expression::Interval(Box::new(crate::expressions::Interval {
24208 this: Some(val),
24209 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
24210 unit: crate::expressions::IntervalUnit::Month,
24211 use_plural: false,
24212 }),
24213 }));
24214 let cast_date =
24215 if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) {
24216 Expression::Cast(Box::new(Cast {
24217 this: date,
24218 to: DataType::Custom {
24219 name: "DATETIME".to_string(),
24220 },
24221 trailing_comments: Vec::new(),
24222 double_colon_syntax: false,
24223 format: None,
24224 default: None,
24225 inferred_type: None,
24226 }))
24227 } else {
24228 date
24229 };
24230 Ok(Expression::Function(Box::new(Function::new(
24231 "DATE_ADD".to_string(),
24232 vec![cast_date, interval],
24233 ))))
24234 }
24235 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
24236 Ok(Expression::Function(Box::new(Function::new(
24237 "ADD_MONTHS".to_string(),
24238 vec![date, val],
24239 ))))
24240 }
24241 _ => {
24242 // Default: keep as AddMonths expression
24243 Ok(Expression::AddMonths(Box::new(
24244 crate::expressions::BinaryFunc {
24245 this: date,
24246 expression: val,
24247 original_name: None,
24248 inferred_type: None,
24249 },
24250 )))
24251 }
24252 }
24253 } else {
24254 Ok(e)
24255 }
24256 }
24257
24258 Action::PercentileContConvert => {
24259 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
24260 // Presto/Trino: APPROX_PERCENTILE(col, p)
24261 // Spark/Databricks: PERCENTILE_APPROX(col, p)
24262 if let Expression::WithinGroup(wg) = e {
24263 // Extract percentile value and order by column
24264 let (percentile, _is_disc) = match &wg.this {
24265 Expression::Function(f) => {
24266 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
24267 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(Box::new(Literal::Number("0.5".to_string()),)));
24268 (pct, is_disc)
24269 }
24270 Expression::AggregateFunction(af) => {
24271 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
24272 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(Box::new(Literal::Number("0.5".to_string()),)));
24273 (pct, is_disc)
24274 }
24275 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
24276 _ => return Ok(Expression::WithinGroup(wg)),
24277 };
24278 let col = wg
24279 .order_by
24280 .first()
24281 .map(|o| o.this.clone())
24282 .unwrap_or(Expression::Literal(Box::new(Literal::Number("1".to_string()))));
24283
24284 let func_name = match target {
24285 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24286 "APPROX_PERCENTILE"
24287 }
24288 _ => "PERCENTILE_APPROX", // Spark, Databricks
24289 };
24290 Ok(Expression::Function(Box::new(Function::new(
24291 func_name.to_string(),
24292 vec![col, percentile],
24293 ))))
24294 } else {
24295 Ok(e)
24296 }
24297 }
24298
24299 Action::CurrentUserSparkParens => {
24300 // CURRENT_USER -> CURRENT_USER() for Spark
24301 if let Expression::CurrentUser(_) = e {
24302 Ok(Expression::Function(Box::new(Function::new(
24303 "CURRENT_USER".to_string(),
24304 vec![],
24305 ))))
24306 } else {
24307 Ok(e)
24308 }
24309 }
24310
24311 Action::SparkDateFuncCast => {
24312 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
24313 let cast_arg = |arg: Expression| -> Expression {
24314 match target {
24315 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24316 Self::double_cast_timestamp_date(arg)
24317 }
24318 _ => {
24319 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
24320 Self::ensure_cast_date(arg)
24321 }
24322 }
24323 };
24324 match e {
24325 Expression::Month(f) => Ok(Expression::Month(Box::new(
24326 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
24327 ))),
24328 Expression::Year(f) => Ok(Expression::Year(Box::new(
24329 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
24330 ))),
24331 Expression::Day(f) => Ok(Expression::Day(Box::new(
24332 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
24333 ))),
24334 other => Ok(other),
24335 }
24336 }
24337
24338 Action::MapFromArraysConvert => {
24339 // Expression::MapFromArrays -> target-specific
24340 if let Expression::MapFromArrays(mfa) = e {
24341 let keys = mfa.this;
24342 let values = mfa.expression;
24343 match target {
24344 DialectType::Snowflake => Ok(Expression::Function(Box::new(
24345 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
24346 ))),
24347 _ => {
24348 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
24349 Ok(Expression::Function(Box::new(Function::new(
24350 "MAP".to_string(),
24351 vec![keys, values],
24352 ))))
24353 }
24354 }
24355 } else {
24356 Ok(e)
24357 }
24358 }
24359
24360 Action::AnyToExists => {
24361 if let Expression::Any(q) = e {
24362 if let Some(op) = q.op.clone() {
24363 let lambda_param = crate::expressions::Identifier::new("x");
24364 let rhs = Expression::Identifier(lambda_param.clone());
24365 let body = match op {
24366 crate::expressions::QuantifiedOp::Eq => {
24367 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
24368 }
24369 crate::expressions::QuantifiedOp::Neq => {
24370 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
24371 }
24372 crate::expressions::QuantifiedOp::Lt => {
24373 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
24374 }
24375 crate::expressions::QuantifiedOp::Lte => {
24376 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
24377 }
24378 crate::expressions::QuantifiedOp::Gt => {
24379 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
24380 }
24381 crate::expressions::QuantifiedOp::Gte => {
24382 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
24383 }
24384 };
24385 let lambda =
24386 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
24387 parameters: vec![lambda_param],
24388 body,
24389 colon: false,
24390 parameter_types: Vec::new(),
24391 }));
24392 Ok(Expression::Function(Box::new(Function::new(
24393 "EXISTS".to_string(),
24394 vec![q.subquery, lambda],
24395 ))))
24396 } else {
24397 Ok(Expression::Any(q))
24398 }
24399 } else {
24400 Ok(e)
24401 }
24402 }
24403
24404 Action::GenerateSeriesConvert => {
24405 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
24406 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
24407 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
24408 if let Expression::Function(f) = e {
24409 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
24410 let start = f.args[0].clone();
24411 let end = f.args[1].clone();
24412 let step = f.args.get(2).cloned();
24413
24414 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
24415 let step = step.map(|s| Self::normalize_interval_string(s, target));
24416
24417 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
24418 let maybe_cast_timestamp = |arg: Expression| -> Expression {
24419 if matches!(
24420 target,
24421 DialectType::Presto
24422 | DialectType::Trino
24423 | DialectType::Athena
24424 | DialectType::Spark
24425 | DialectType::Databricks
24426 | DialectType::Hive
24427 ) {
24428 match &arg {
24429 Expression::CurrentTimestamp(_) => {
24430 Expression::Cast(Box::new(Cast {
24431 this: arg,
24432 to: DataType::Timestamp {
24433 precision: None,
24434 timezone: false,
24435 },
24436 trailing_comments: Vec::new(),
24437 double_colon_syntax: false,
24438 format: None,
24439 default: None,
24440 inferred_type: None,
24441 }))
24442 }
24443 _ => arg,
24444 }
24445 } else {
24446 arg
24447 }
24448 };
24449
24450 let start = maybe_cast_timestamp(start);
24451 let end = maybe_cast_timestamp(end);
24452
24453 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
24454 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
24455 let mut gs_args = vec![start, end];
24456 if let Some(step) = step {
24457 gs_args.push(step);
24458 }
24459 return Ok(Expression::Function(Box::new(Function::new(
24460 "GENERATE_SERIES".to_string(),
24461 gs_args,
24462 ))));
24463 }
24464
24465 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
24466 if matches!(target, DialectType::DuckDB) {
24467 let mut gs_args = vec![start, end];
24468 if let Some(step) = step {
24469 gs_args.push(step);
24470 }
24471 let gs = Expression::Function(Box::new(Function::new(
24472 "GENERATE_SERIES".to_string(),
24473 gs_args,
24474 )));
24475 return Ok(Expression::Function(Box::new(Function::new(
24476 "UNNEST".to_string(),
24477 vec![gs],
24478 ))));
24479 }
24480
24481 let mut seq_args = vec![start, end];
24482 if let Some(step) = step {
24483 seq_args.push(step);
24484 }
24485
24486 let seq = Expression::Function(Box::new(Function::new(
24487 "SEQUENCE".to_string(),
24488 seq_args,
24489 )));
24490
24491 match target {
24492 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24493 // Wrap in UNNEST
24494 Ok(Expression::Function(Box::new(Function::new(
24495 "UNNEST".to_string(),
24496 vec![seq],
24497 ))))
24498 }
24499 DialectType::Spark
24500 | DialectType::Databricks
24501 | DialectType::Hive => {
24502 // Wrap in EXPLODE
24503 Ok(Expression::Function(Box::new(Function::new(
24504 "EXPLODE".to_string(),
24505 vec![seq],
24506 ))))
24507 }
24508 _ => {
24509 // Just SEQUENCE for others
24510 Ok(seq)
24511 }
24512 }
24513 } else {
24514 Ok(Expression::Function(f))
24515 }
24516 } else {
24517 Ok(e)
24518 }
24519 }
24520
24521 Action::ConcatCoalesceWrap => {
24522 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
24523 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
24524 if let Expression::Function(f) = e {
24525 if f.name.eq_ignore_ascii_case("CONCAT") {
24526 let new_args: Vec<Expression> = f
24527 .args
24528 .into_iter()
24529 .map(|arg| {
24530 let cast_arg = if matches!(
24531 target,
24532 DialectType::Presto
24533 | DialectType::Trino
24534 | DialectType::Athena
24535 ) {
24536 Expression::Cast(Box::new(Cast {
24537 this: arg,
24538 to: DataType::VarChar {
24539 length: None,
24540 parenthesized_length: false,
24541 },
24542 trailing_comments: Vec::new(),
24543 double_colon_syntax: false,
24544 format: None,
24545 default: None,
24546 inferred_type: None,
24547 }))
24548 } else {
24549 arg
24550 };
24551 Expression::Function(Box::new(Function::new(
24552 "COALESCE".to_string(),
24553 vec![cast_arg, Expression::string("")],
24554 )))
24555 })
24556 .collect();
24557 Ok(Expression::Function(Box::new(Function::new(
24558 "CONCAT".to_string(),
24559 new_args,
24560 ))))
24561 } else {
24562 Ok(Expression::Function(f))
24563 }
24564 } else {
24565 Ok(e)
24566 }
24567 }
24568
24569 Action::PipeConcatToConcat => {
24570 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
24571 if let Expression::Concat(op) = e {
24572 let cast_left = Expression::Cast(Box::new(Cast {
24573 this: op.left,
24574 to: DataType::VarChar {
24575 length: None,
24576 parenthesized_length: false,
24577 },
24578 trailing_comments: Vec::new(),
24579 double_colon_syntax: false,
24580 format: None,
24581 default: None,
24582 inferred_type: None,
24583 }));
24584 let cast_right = Expression::Cast(Box::new(Cast {
24585 this: op.right,
24586 to: DataType::VarChar {
24587 length: None,
24588 parenthesized_length: false,
24589 },
24590 trailing_comments: Vec::new(),
24591 double_colon_syntax: false,
24592 format: None,
24593 default: None,
24594 inferred_type: None,
24595 }));
24596 Ok(Expression::Function(Box::new(Function::new(
24597 "CONCAT".to_string(),
24598 vec![cast_left, cast_right],
24599 ))))
24600 } else {
24601 Ok(e)
24602 }
24603 }
24604
24605 Action::DivFuncConvert => {
24606 // DIV(a, b) -> target-specific integer division
24607 if let Expression::Function(f) = e {
24608 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
24609 let a = f.args[0].clone();
24610 let b = f.args[1].clone();
24611 match target {
24612 DialectType::DuckDB => {
24613 // DIV(a, b) -> CAST(a // b AS DECIMAL)
24614 let int_div = Expression::IntDiv(Box::new(
24615 crate::expressions::BinaryFunc {
24616 this: a,
24617 expression: b,
24618 original_name: None,
24619 inferred_type: None,
24620 },
24621 ));
24622 Ok(Expression::Cast(Box::new(Cast {
24623 this: int_div,
24624 to: DataType::Decimal {
24625 precision: None,
24626 scale: None,
24627 },
24628 trailing_comments: Vec::new(),
24629 double_colon_syntax: false,
24630 format: None,
24631 default: None,
24632 inferred_type: None,
24633 })))
24634 }
24635 DialectType::BigQuery => {
24636 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
24637 let div_func = Expression::Function(Box::new(Function::new(
24638 "DIV".to_string(),
24639 vec![a, b],
24640 )));
24641 Ok(Expression::Cast(Box::new(Cast {
24642 this: div_func,
24643 to: DataType::Custom {
24644 name: "NUMERIC".to_string(),
24645 },
24646 trailing_comments: Vec::new(),
24647 double_colon_syntax: false,
24648 format: None,
24649 default: None,
24650 inferred_type: None,
24651 })))
24652 }
24653 DialectType::SQLite => {
24654 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
24655 let cast_a = Expression::Cast(Box::new(Cast {
24656 this: a,
24657 to: DataType::Custom {
24658 name: "REAL".to_string(),
24659 },
24660 trailing_comments: Vec::new(),
24661 double_colon_syntax: false,
24662 format: None,
24663 default: None,
24664 inferred_type: None,
24665 }));
24666 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
24667 let cast_int = Expression::Cast(Box::new(Cast {
24668 this: div,
24669 to: DataType::Int {
24670 length: None,
24671 integer_spelling: true,
24672 },
24673 trailing_comments: Vec::new(),
24674 double_colon_syntax: false,
24675 format: None,
24676 default: None,
24677 inferred_type: None,
24678 }));
24679 Ok(Expression::Cast(Box::new(Cast {
24680 this: cast_int,
24681 to: DataType::Custom {
24682 name: "REAL".to_string(),
24683 },
24684 trailing_comments: Vec::new(),
24685 double_colon_syntax: false,
24686 format: None,
24687 default: None,
24688 inferred_type: None,
24689 })))
24690 }
24691 _ => Ok(Expression::Function(f)),
24692 }
24693 } else {
24694 Ok(Expression::Function(f))
24695 }
24696 } else {
24697 Ok(e)
24698 }
24699 }
24700
24701 Action::JsonObjectAggConvert => {
24702 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
24703 match e {
24704 Expression::Function(f) => Ok(Expression::Function(Box::new(
24705 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
24706 ))),
24707 Expression::AggregateFunction(af) => {
24708 // AggregateFunction stores all args in the `args` vec
24709 Ok(Expression::Function(Box::new(Function::new(
24710 "JSON_GROUP_OBJECT".to_string(),
24711 af.args,
24712 ))))
24713 }
24714 other => Ok(other),
24715 }
24716 }
24717
24718 Action::JsonbExistsConvert => {
24719 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
24720 if let Expression::Function(f) = e {
24721 if f.args.len() == 2 {
24722 let json_expr = f.args[0].clone();
24723 let key = match &f.args[1] {
24724 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
24725 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
24726 format!("$.{}", s)
24727 }
24728 _ => return Ok(Expression::Function(f)),
24729 };
24730 Ok(Expression::Function(Box::new(Function::new(
24731 "JSON_EXISTS".to_string(),
24732 vec![json_expr, Expression::string(&key)],
24733 ))))
24734 } else {
24735 Ok(Expression::Function(f))
24736 }
24737 } else {
24738 Ok(e)
24739 }
24740 }
24741
24742 Action::DateBinConvert => {
24743 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
24744 if let Expression::Function(f) = e {
24745 Ok(Expression::Function(Box::new(Function::new(
24746 "TIME_BUCKET".to_string(),
24747 f.args,
24748 ))))
24749 } else {
24750 Ok(e)
24751 }
24752 }
24753
24754 Action::MysqlCastCharToText => {
24755 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
24756 if let Expression::Cast(mut c) = e {
24757 c.to = DataType::Text;
24758 Ok(Expression::Cast(c))
24759 } else {
24760 Ok(e)
24761 }
24762 }
24763
24764 Action::SparkCastVarcharToString => {
24765 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
24766 match e {
24767 Expression::Cast(mut c) => {
24768 c.to = Self::normalize_varchar_to_string(c.to);
24769 Ok(Expression::Cast(c))
24770 }
24771 Expression::TryCast(mut c) => {
24772 c.to = Self::normalize_varchar_to_string(c.to);
24773 Ok(Expression::TryCast(c))
24774 }
24775 _ => Ok(e),
24776 }
24777 }
24778
24779 Action::MinMaxToLeastGreatest => {
24780 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
24781 if let Expression::Function(f) = e {
24782 let new_name = if f.name.eq_ignore_ascii_case("MIN") {
24783 "LEAST"
24784 } else if f.name.eq_ignore_ascii_case("MAX") {
24785 "GREATEST"
24786 } else {
24787 return Ok(Expression::Function(f));
24788 };
24789 Ok(Expression::Function(Box::new(Function::new(
24790 new_name.to_string(),
24791 f.args,
24792 ))))
24793 } else {
24794 Ok(e)
24795 }
24796 }
24797
24798 Action::ClickHouseUniqToApproxCountDistinct => {
24799 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
24800 if let Expression::Function(f) = e {
24801 Ok(Expression::Function(Box::new(Function::new(
24802 "APPROX_COUNT_DISTINCT".to_string(),
24803 f.args,
24804 ))))
24805 } else {
24806 Ok(e)
24807 }
24808 }
24809
24810 Action::ClickHouseAnyToAnyValue => {
24811 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
24812 if let Expression::Function(f) = e {
24813 Ok(Expression::Function(Box::new(Function::new(
24814 "ANY_VALUE".to_string(),
24815 f.args,
24816 ))))
24817 } else {
24818 Ok(e)
24819 }
24820 }
24821
24822 Action::OracleVarchar2ToVarchar => {
24823 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
24824 if let Expression::DataType(DataType::Custom { ref name }) = e {
24825 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
24826 let starts_varchar2 = name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2(");
24827 let starts_nvarchar2 = name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2(");
24828 let inner =
24829 if starts_varchar2 || starts_nvarchar2 {
24830 let start = if starts_nvarchar2 { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
24831 let end = name.len() - 1; // skip trailing ")"
24832 Some(&name[start..end])
24833 } else {
24834 Option::None
24835 };
24836 if let Some(inner_str) = inner {
24837 // Parse the number part, ignoring BYTE/CHAR qualifier
24838 let num_str = inner_str.split_whitespace().next().unwrap_or("");
24839 if let Ok(n) = num_str.parse::<u32>() {
24840 Ok(Expression::DataType(DataType::VarChar {
24841 length: Some(n),
24842 parenthesized_length: false,
24843 }))
24844 } else {
24845 Ok(e)
24846 }
24847 } else {
24848 // Plain VARCHAR2 / NVARCHAR2 without parens
24849 Ok(Expression::DataType(DataType::VarChar {
24850 length: Option::None,
24851 parenthesized_length: false,
24852 }))
24853 }
24854 } else {
24855 Ok(e)
24856 }
24857 }
24858
24859 Action::Nvl2Expand => {
24860 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
24861 // But keep as NVL2 for dialects that support it natively
24862 let nvl2_native = matches!(
24863 target,
24864 DialectType::Oracle
24865 | DialectType::Snowflake
24866 | DialectType::Redshift
24867 | DialectType::Teradata
24868 | DialectType::Spark
24869 | DialectType::Databricks
24870 );
24871 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
24872 if nvl2_native {
24873 return Ok(Expression::Nvl2(nvl2));
24874 }
24875 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
24876 } else if let Expression::Function(f) = e {
24877 if nvl2_native {
24878 return Ok(Expression::Function(Box::new(Function::new(
24879 "NVL2".to_string(),
24880 f.args,
24881 ))));
24882 }
24883 if f.args.len() < 2 {
24884 return Ok(Expression::Function(f));
24885 }
24886 let mut args = f.args;
24887 let a = args.remove(0);
24888 let b = args.remove(0);
24889 let c = if !args.is_empty() {
24890 Some(args.remove(0))
24891 } else {
24892 Option::None
24893 };
24894 (a, b, c)
24895 } else {
24896 return Ok(e);
24897 };
24898 // Build: NOT (a IS NULL)
24899 let is_null = Expression::IsNull(Box::new(IsNull {
24900 this: a,
24901 not: false,
24902 postfix_form: false,
24903 }));
24904 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
24905 this: is_null,
24906 inferred_type: None,
24907 }));
24908 Ok(Expression::Case(Box::new(Case {
24909 operand: Option::None,
24910 whens: vec![(not_null, b)],
24911 else_: c,
24912 comments: Vec::new(),
24913 inferred_type: None,
24914 })))
24915 }
24916
24917 Action::IfnullToCoalesce => {
24918 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
24919 if let Expression::Coalesce(mut cf) = e {
24920 cf.original_name = Option::None;
24921 Ok(Expression::Coalesce(cf))
24922 } else if let Expression::Function(f) = e {
24923 Ok(Expression::Function(Box::new(Function::new(
24924 "COALESCE".to_string(),
24925 f.args,
24926 ))))
24927 } else {
24928 Ok(e)
24929 }
24930 }
24931
24932 Action::IsAsciiConvert => {
24933 // IS_ASCII(x) -> dialect-specific ASCII check
24934 if let Expression::Function(f) = e {
24935 let arg = f.args.into_iter().next().unwrap();
24936 match target {
24937 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
24938 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
24939 Ok(Expression::Function(Box::new(Function::new(
24940 "REGEXP_LIKE".to_string(),
24941 vec![
24942 arg,
24943 Expression::Literal(Box::new(Literal::String(
24944 "^[[:ascii:]]*$".to_string(),
24945 ))),
24946 ],
24947 ))))
24948 }
24949 DialectType::PostgreSQL
24950 | DialectType::Redshift
24951 | DialectType::Materialize
24952 | DialectType::RisingWave => {
24953 // (x ~ '^[[:ascii:]]*$')
24954 Ok(Expression::Paren(Box::new(Paren {
24955 this: Expression::RegexpLike(Box::new(
24956 crate::expressions::RegexpFunc {
24957 this: arg,
24958 pattern: Expression::Literal(Box::new(Literal::String(
24959 "^[[:ascii:]]*$".to_string(),
24960 ))),
24961 flags: Option::None,
24962 },
24963 )),
24964 trailing_comments: Vec::new(),
24965 })))
24966 }
24967 DialectType::SQLite => {
24968 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
24969 let hex_lit = Expression::Literal(Box::new(Literal::HexString(
24970 "2a5b5e012d7f5d2a".to_string(),
24971 )));
24972 let cast_expr = Expression::Cast(Box::new(Cast {
24973 this: hex_lit,
24974 to: DataType::Text,
24975 trailing_comments: Vec::new(),
24976 double_colon_syntax: false,
24977 format: Option::None,
24978 default: Option::None,
24979 inferred_type: None,
24980 }));
24981 let glob = Expression::Glob(Box::new(BinaryOp {
24982 left: arg,
24983 right: cast_expr,
24984 left_comments: Vec::new(),
24985 operator_comments: Vec::new(),
24986 trailing_comments: Vec::new(),
24987 inferred_type: None,
24988 }));
24989 Ok(Expression::Paren(Box::new(Paren {
24990 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
24991 this: glob,
24992 inferred_type: None,
24993 })),
24994 trailing_comments: Vec::new(),
24995 })))
24996 }
24997 DialectType::TSQL | DialectType::Fabric => {
24998 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
24999 let hex_lit = Expression::Literal(Box::new(Literal::HexNumber(
25000 "255b5e002d7f5d25".to_string(),
25001 )));
25002 let convert_expr = Expression::Convert(Box::new(
25003 crate::expressions::ConvertFunc {
25004 this: hex_lit,
25005 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
25006 style: None,
25007 },
25008 ));
25009 let collated = Expression::Collation(Box::new(
25010 crate::expressions::CollationExpr {
25011 this: convert_expr,
25012 collation: "Latin1_General_BIN".to_string(),
25013 quoted: false,
25014 double_quoted: false,
25015 },
25016 ));
25017 let patindex = Expression::Function(Box::new(Function::new(
25018 "PATINDEX".to_string(),
25019 vec![collated, arg],
25020 )));
25021 let zero = Expression::Literal(Box::new(Literal::Number("0".to_string())));
25022 let eq_zero = Expression::Eq(Box::new(BinaryOp {
25023 left: patindex,
25024 right: zero,
25025 left_comments: Vec::new(),
25026 operator_comments: Vec::new(),
25027 trailing_comments: Vec::new(),
25028 inferred_type: None,
25029 }));
25030 Ok(Expression::Paren(Box::new(Paren {
25031 this: eq_zero,
25032 trailing_comments: Vec::new(),
25033 })))
25034 }
25035 DialectType::Oracle => {
25036 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
25037 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
25038 let s1 = Expression::Literal(Box::new(Literal::String("^[".to_string())));
25039 let chr1 = Expression::Function(Box::new(Function::new(
25040 "CHR".to_string(),
25041 vec![Expression::Literal(Box::new(Literal::Number("1".to_string())))],
25042 )));
25043 let dash = Expression::Literal(Box::new(Literal::String("-".to_string())));
25044 let chr127 = Expression::Function(Box::new(Function::new(
25045 "CHR".to_string(),
25046 vec![Expression::Literal(Box::new(Literal::Number("127".to_string())))],
25047 )));
25048 let s2 = Expression::Literal(Box::new(Literal::String("]*$".to_string())));
25049 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
25050 let concat1 =
25051 Expression::DPipe(Box::new(crate::expressions::DPipe {
25052 this: Box::new(s1),
25053 expression: Box::new(chr1),
25054 safe: None,
25055 }));
25056 let concat2 =
25057 Expression::DPipe(Box::new(crate::expressions::DPipe {
25058 this: Box::new(concat1),
25059 expression: Box::new(dash),
25060 safe: None,
25061 }));
25062 let concat3 =
25063 Expression::DPipe(Box::new(crate::expressions::DPipe {
25064 this: Box::new(concat2),
25065 expression: Box::new(chr127),
25066 safe: None,
25067 }));
25068 let concat4 =
25069 Expression::DPipe(Box::new(crate::expressions::DPipe {
25070 this: Box::new(concat3),
25071 expression: Box::new(s2),
25072 safe: None,
25073 }));
25074 let regexp_like = Expression::Function(Box::new(Function::new(
25075 "REGEXP_LIKE".to_string(),
25076 vec![arg, concat4],
25077 )));
25078 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
25079 let true_expr = Expression::Column(Box::new(crate::expressions::Column {
25080 name: Identifier {
25081 name: "TRUE".to_string(),
25082 quoted: false,
25083 trailing_comments: Vec::new(),
25084 span: None,
25085 },
25086 table: None,
25087 join_mark: false,
25088 trailing_comments: Vec::new(),
25089 span: None,
25090 inferred_type: None,
25091 }));
25092 let nvl = Expression::Function(Box::new(Function::new(
25093 "NVL".to_string(),
25094 vec![regexp_like, true_expr],
25095 )));
25096 Ok(nvl)
25097 }
25098 _ => Ok(Expression::Function(Box::new(Function::new(
25099 "IS_ASCII".to_string(),
25100 vec![arg],
25101 )))),
25102 }
25103 } else {
25104 Ok(e)
25105 }
25106 }
25107
25108 Action::StrPositionConvert => {
25109 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
25110 if let Expression::Function(f) = e {
25111 if f.args.len() < 2 {
25112 return Ok(Expression::Function(f));
25113 }
25114 let mut args = f.args;
25115
25116 let haystack = args.remove(0);
25117 let needle = args.remove(0);
25118 let position = if !args.is_empty() {
25119 Some(args.remove(0))
25120 } else {
25121 Option::None
25122 };
25123 let occurrence = if !args.is_empty() {
25124 Some(args.remove(0))
25125 } else {
25126 Option::None
25127 };
25128
25129 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
25130 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
25131 fn build_position_expansion(
25132 haystack: Expression,
25133 needle: Expression,
25134 pos: Expression,
25135 occurrence: Option<Expression>,
25136 inner_func: &str,
25137 wrapper: &str, // "CASE", "IF", "IIF"
25138 ) -> Expression {
25139 let substr = Expression::Function(Box::new(Function::new(
25140 "SUBSTRING".to_string(),
25141 vec![haystack, pos.clone()],
25142 )));
25143 let mut inner_args = vec![substr, needle];
25144 if let Some(occ) = occurrence {
25145 inner_args.push(occ);
25146 }
25147 let inner_call = Expression::Function(Box::new(Function::new(
25148 inner_func.to_string(),
25149 inner_args,
25150 )));
25151 let zero = Expression::Literal(Box::new(Literal::Number("0".to_string())));
25152 let one = Expression::Literal(Box::new(Literal::Number("1".to_string())));
25153 let eq_zero = Expression::Eq(Box::new(BinaryOp {
25154 left: inner_call.clone(),
25155 right: zero.clone(),
25156 left_comments: Vec::new(),
25157 operator_comments: Vec::new(),
25158 trailing_comments: Vec::new(),
25159 inferred_type: None,
25160 }));
25161 let add_pos = Expression::Add(Box::new(BinaryOp {
25162 left: inner_call,
25163 right: pos,
25164 left_comments: Vec::new(),
25165 operator_comments: Vec::new(),
25166 trailing_comments: Vec::new(),
25167 inferred_type: None,
25168 }));
25169 let sub_one = Expression::Sub(Box::new(BinaryOp {
25170 left: add_pos,
25171 right: one,
25172 left_comments: Vec::new(),
25173 operator_comments: Vec::new(),
25174 trailing_comments: Vec::new(),
25175 inferred_type: None,
25176 }));
25177
25178 match wrapper {
25179 "CASE" => Expression::Case(Box::new(Case {
25180 operand: Option::None,
25181 whens: vec![(eq_zero, zero)],
25182 else_: Some(sub_one),
25183 comments: Vec::new(),
25184 inferred_type: None,
25185 })),
25186 "IIF" => Expression::Function(Box::new(Function::new(
25187 "IIF".to_string(),
25188 vec![eq_zero, zero, sub_one],
25189 ))),
25190 _ => Expression::Function(Box::new(Function::new(
25191 "IF".to_string(),
25192 vec![eq_zero, zero, sub_one],
25193 ))),
25194 }
25195 }
25196
25197 match target {
25198 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
25199 DialectType::Athena
25200 | DialectType::DuckDB
25201 | DialectType::Presto
25202 | DialectType::Trino
25203 | DialectType::Drill => {
25204 if let Some(pos) = position {
25205 let wrapper = if matches!(target, DialectType::DuckDB) {
25206 "CASE"
25207 } else {
25208 "IF"
25209 };
25210 let result = build_position_expansion(
25211 haystack, needle, pos, occurrence, "STRPOS", wrapper,
25212 );
25213 if matches!(target, DialectType::Drill) {
25214 // Drill uses backtick-quoted `IF`
25215 if let Expression::Function(mut f) = result {
25216 f.name = "`IF`".to_string();
25217 Ok(Expression::Function(f))
25218 } else {
25219 Ok(result)
25220 }
25221 } else {
25222 Ok(result)
25223 }
25224 } else {
25225 Ok(Expression::Function(Box::new(Function::new(
25226 "STRPOS".to_string(),
25227 vec![haystack, needle],
25228 ))))
25229 }
25230 }
25231 // SQLite: IIF wrapper
25232 DialectType::SQLite => {
25233 if let Some(pos) = position {
25234 Ok(build_position_expansion(
25235 haystack, needle, pos, occurrence, "INSTR", "IIF",
25236 ))
25237 } else {
25238 Ok(Expression::Function(Box::new(Function::new(
25239 "INSTR".to_string(),
25240 vec![haystack, needle],
25241 ))))
25242 }
25243 }
25244 // INSTR group: Teradata, BigQuery, Oracle
25245 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
25246 let mut a = vec![haystack, needle];
25247 if let Some(pos) = position {
25248 a.push(pos);
25249 }
25250 if let Some(occ) = occurrence {
25251 a.push(occ);
25252 }
25253 Ok(Expression::Function(Box::new(Function::new(
25254 "INSTR".to_string(),
25255 a,
25256 ))))
25257 }
25258 // CHARINDEX group: Snowflake, TSQL
25259 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
25260 let mut a = vec![needle, haystack];
25261 if let Some(pos) = position {
25262 a.push(pos);
25263 }
25264 Ok(Expression::Function(Box::new(Function::new(
25265 "CHARINDEX".to_string(),
25266 a,
25267 ))))
25268 }
25269 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
25270 DialectType::PostgreSQL
25271 | DialectType::Materialize
25272 | DialectType::RisingWave
25273 | DialectType::Redshift => {
25274 if let Some(pos) = position {
25275 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
25276 // ELSE POSITION(...) + pos - 1 END
25277 let substr = Expression::Substring(Box::new(
25278 crate::expressions::SubstringFunc {
25279 this: haystack,
25280 start: pos.clone(),
25281 length: Option::None,
25282 from_for_syntax: true,
25283 },
25284 ));
25285 let pos_in = Expression::StrPosition(Box::new(
25286 crate::expressions::StrPosition {
25287 this: Box::new(substr),
25288 substr: Some(Box::new(needle)),
25289 position: Option::None,
25290 occurrence: Option::None,
25291 },
25292 ));
25293 let zero =
25294 Expression::Literal(Box::new(Literal::Number("0".to_string())));
25295 let one = Expression::Literal(Box::new(Literal::Number("1".to_string())));
25296 let eq_zero = Expression::Eq(Box::new(BinaryOp {
25297 left: pos_in.clone(),
25298 right: zero.clone(),
25299 left_comments: Vec::new(),
25300 operator_comments: Vec::new(),
25301 trailing_comments: Vec::new(),
25302 inferred_type: None,
25303 }));
25304 let add_pos = Expression::Add(Box::new(BinaryOp {
25305 left: pos_in,
25306 right: pos,
25307 left_comments: Vec::new(),
25308 operator_comments: Vec::new(),
25309 trailing_comments: Vec::new(),
25310 inferred_type: None,
25311 }));
25312 let sub_one = Expression::Sub(Box::new(BinaryOp {
25313 left: add_pos,
25314 right: one,
25315 left_comments: Vec::new(),
25316 operator_comments: Vec::new(),
25317 trailing_comments: Vec::new(),
25318 inferred_type: None,
25319 }));
25320 Ok(Expression::Case(Box::new(Case {
25321 operand: Option::None,
25322 whens: vec![(eq_zero, zero)],
25323 else_: Some(sub_one),
25324 comments: Vec::new(),
25325 inferred_type: None,
25326 })))
25327 } else {
25328 Ok(Expression::StrPosition(Box::new(
25329 crate::expressions::StrPosition {
25330 this: Box::new(haystack),
25331 substr: Some(Box::new(needle)),
25332 position: Option::None,
25333 occurrence: Option::None,
25334 },
25335 )))
25336 }
25337 }
25338 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
25339 DialectType::MySQL
25340 | DialectType::SingleStore
25341 | DialectType::TiDB
25342 | DialectType::Hive
25343 | DialectType::Spark
25344 | DialectType::Databricks
25345 | DialectType::Doris
25346 | DialectType::StarRocks => {
25347 let mut a = vec![needle, haystack];
25348 if let Some(pos) = position {
25349 a.push(pos);
25350 }
25351 Ok(Expression::Function(Box::new(Function::new(
25352 "LOCATE".to_string(),
25353 a,
25354 ))))
25355 }
25356 // ClickHouse: POSITION(haystack, needle[, position])
25357 DialectType::ClickHouse => {
25358 let mut a = vec![haystack, needle];
25359 if let Some(pos) = position {
25360 a.push(pos);
25361 }
25362 Ok(Expression::Function(Box::new(Function::new(
25363 "POSITION".to_string(),
25364 a,
25365 ))))
25366 }
25367 _ => {
25368 let mut a = vec![haystack, needle];
25369 if let Some(pos) = position {
25370 a.push(pos);
25371 }
25372 if let Some(occ) = occurrence {
25373 a.push(occ);
25374 }
25375 Ok(Expression::Function(Box::new(Function::new(
25376 "STR_POSITION".to_string(),
25377 a,
25378 ))))
25379 }
25380 }
25381 } else {
25382 Ok(e)
25383 }
25384 }
25385
25386 Action::ArraySumConvert => {
25387 // ARRAY_SUM(arr) -> dialect-specific
25388 if let Expression::Function(f) = e {
25389 let args = f.args;
25390 match target {
25391 DialectType::DuckDB => Ok(Expression::Function(Box::new(
25392 Function::new("LIST_SUM".to_string(), args),
25393 ))),
25394 DialectType::Spark | DialectType::Databricks => {
25395 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
25396 let arr = args.into_iter().next().unwrap();
25397 let zero = Expression::Literal(Box::new(Literal::Number("0".to_string())));
25398 let acc_id = Identifier::new("acc");
25399 let x_id = Identifier::new("x");
25400 let acc = Expression::Identifier(acc_id.clone());
25401 let x = Expression::Identifier(x_id.clone());
25402 let add = Expression::Add(Box::new(BinaryOp {
25403 left: acc.clone(),
25404 right: x,
25405 left_comments: Vec::new(),
25406 operator_comments: Vec::new(),
25407 trailing_comments: Vec::new(),
25408 inferred_type: None,
25409 }));
25410 let lambda1 =
25411 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25412 parameters: vec![acc_id.clone(), x_id],
25413 body: add,
25414 colon: false,
25415 parameter_types: Vec::new(),
25416 }));
25417 let lambda2 =
25418 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25419 parameters: vec![acc_id],
25420 body: acc,
25421 colon: false,
25422 parameter_types: Vec::new(),
25423 }));
25424 Ok(Expression::Function(Box::new(Function::new(
25425 "AGGREGATE".to_string(),
25426 vec![arr, zero, lambda1, lambda2],
25427 ))))
25428 }
25429 DialectType::Presto | DialectType::Athena => {
25430 // Presto/Athena keep ARRAY_SUM natively
25431 Ok(Expression::Function(Box::new(Function::new(
25432 "ARRAY_SUM".to_string(),
25433 args,
25434 ))))
25435 }
25436 DialectType::Trino => {
25437 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
25438 if args.len() == 1 {
25439 let arr = args.into_iter().next().unwrap();
25440 let zero =
25441 Expression::Literal(Box::new(Literal::Number("0".to_string())));
25442 let acc_id = Identifier::new("acc");
25443 let x_id = Identifier::new("x");
25444 let acc = Expression::Identifier(acc_id.clone());
25445 let x = Expression::Identifier(x_id.clone());
25446 let add = Expression::Add(Box::new(BinaryOp {
25447 left: acc.clone(),
25448 right: x,
25449 left_comments: Vec::new(),
25450 operator_comments: Vec::new(),
25451 trailing_comments: Vec::new(),
25452 inferred_type: None,
25453 }));
25454 let lambda1 = Expression::Lambda(Box::new(
25455 crate::expressions::LambdaExpr {
25456 parameters: vec![acc_id.clone(), x_id],
25457 body: add,
25458 colon: false,
25459 parameter_types: Vec::new(),
25460 },
25461 ));
25462 let lambda2 = Expression::Lambda(Box::new(
25463 crate::expressions::LambdaExpr {
25464 parameters: vec![acc_id],
25465 body: acc,
25466 colon: false,
25467 parameter_types: Vec::new(),
25468 },
25469 ));
25470 Ok(Expression::Function(Box::new(Function::new(
25471 "REDUCE".to_string(),
25472 vec![arr, zero, lambda1, lambda2],
25473 ))))
25474 } else {
25475 Ok(Expression::Function(Box::new(Function::new(
25476 "ARRAY_SUM".to_string(),
25477 args,
25478 ))))
25479 }
25480 }
25481 DialectType::ClickHouse => {
25482 // arraySum(lambda, arr) or arraySum(arr)
25483 Ok(Expression::Function(Box::new(Function::new(
25484 "arraySum".to_string(),
25485 args,
25486 ))))
25487 }
25488 _ => Ok(Expression::Function(Box::new(Function::new(
25489 "ARRAY_SUM".to_string(),
25490 args,
25491 )))),
25492 }
25493 } else {
25494 Ok(e)
25495 }
25496 }
25497
25498 Action::ArraySizeConvert => {
25499 if let Expression::Function(f) = e {
25500 Ok(Expression::Function(Box::new(Function::new(
25501 "REPEATED_COUNT".to_string(),
25502 f.args,
25503 ))))
25504 } else {
25505 Ok(e)
25506 }
25507 }
25508
25509 Action::ArrayAnyConvert => {
25510 if let Expression::Function(f) = e {
25511 let mut args = f.args;
25512 if args.len() == 2 {
25513 let arr = args.remove(0);
25514 let lambda = args.remove(0);
25515
25516 // Extract lambda parameter name and body
25517 let (param_name, pred_body) =
25518 if let Expression::Lambda(ref lam) = lambda {
25519 let name = if let Some(p) = lam.parameters.first() {
25520 p.name.clone()
25521 } else {
25522 "x".to_string()
25523 };
25524 (name, lam.body.clone())
25525 } else {
25526 ("x".to_string(), lambda.clone())
25527 };
25528
25529 // Helper: build a function call Expression
25530 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
25531 Expression::Function(Box::new(Function::new(
25532 name.to_string(),
25533 args,
25534 )))
25535 };
25536
25537 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
25538 let build_filter_pattern = |len_func: &str,
25539 len_args_extra: Vec<Expression>,
25540 filter_expr: Expression|
25541 -> Expression {
25542 // len_func(arr, ...extra) = 0
25543 let mut len_arr_args = vec![arr.clone()];
25544 len_arr_args.extend(len_args_extra.clone());
25545 let len_arr = make_func(len_func, len_arr_args);
25546 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
25547 len_arr,
25548 Expression::number(0),
25549 )));
25550
25551 // len_func(filter_expr, ...extra) <> 0
25552 let mut len_filter_args = vec![filter_expr];
25553 len_filter_args.extend(len_args_extra);
25554 let len_filter = make_func(len_func, len_filter_args);
25555 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
25556 len_filter,
25557 Expression::number(0),
25558 )));
25559
25560 // (eq_zero OR neq_zero)
25561 let or_expr =
25562 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
25563 Expression::Paren(Box::new(Paren {
25564 this: or_expr,
25565 trailing_comments: Vec::new(),
25566 }))
25567 };
25568
25569 match target {
25570 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
25571 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
25572 }
25573 DialectType::ClickHouse => {
25574 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
25575 // ClickHouse arrayFilter takes lambda first, then array
25576 let filter_expr =
25577 make_func("arrayFilter", vec![lambda, arr.clone()]);
25578 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
25579 }
25580 DialectType::Databricks | DialectType::Spark => {
25581 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
25582 let filter_expr =
25583 make_func("FILTER", vec![arr.clone(), lambda]);
25584 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
25585 }
25586 DialectType::DuckDB => {
25587 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
25588 let filter_expr =
25589 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
25590 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
25591 }
25592 DialectType::Teradata => {
25593 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
25594 let filter_expr =
25595 make_func("FILTER", vec![arr.clone(), lambda]);
25596 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
25597 }
25598 DialectType::BigQuery => {
25599 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
25600 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
25601 let param_col = Expression::column(¶m_name);
25602 let unnest_expr = Expression::Unnest(Box::new(
25603 crate::expressions::UnnestFunc {
25604 this: arr.clone(),
25605 expressions: vec![],
25606 with_ordinality: false,
25607 alias: Some(Identifier::new(¶m_name)),
25608 offset_alias: None,
25609 },
25610 ));
25611 let mut sel = crate::expressions::Select::default();
25612 sel.expressions = vec![param_col];
25613 sel.from = Some(crate::expressions::From {
25614 expressions: vec![unnest_expr],
25615 });
25616 sel.where_clause =
25617 Some(crate::expressions::Where { this: pred_body });
25618 let array_subquery =
25619 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
25620 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
25621 }
25622 DialectType::PostgreSQL => {
25623 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
25624 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
25625 let param_col = Expression::column(¶m_name);
25626 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
25627 let unnest_with_alias =
25628 Expression::Alias(Box::new(crate::expressions::Alias {
25629 this: Expression::Unnest(Box::new(
25630 crate::expressions::UnnestFunc {
25631 this: arr.clone(),
25632 expressions: vec![],
25633 with_ordinality: false,
25634 alias: None,
25635 offset_alias: None,
25636 },
25637 )),
25638 alias: Identifier::new("_t0"),
25639 column_aliases: vec![Identifier::new(¶m_name)],
25640 pre_alias_comments: Vec::new(),
25641 trailing_comments: Vec::new(),
25642 inferred_type: None,
25643 }));
25644 let mut sel = crate::expressions::Select::default();
25645 sel.expressions = vec![param_col];
25646 sel.from = Some(crate::expressions::From {
25647 expressions: vec![unnest_with_alias],
25648 });
25649 sel.where_clause =
25650 Some(crate::expressions::Where { this: pred_body });
25651 let array_subquery =
25652 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
25653 Ok(build_filter_pattern(
25654 "ARRAY_LENGTH",
25655 vec![Expression::number(1)],
25656 array_subquery,
25657 ))
25658 }
25659 _ => Ok(Expression::Function(Box::new(Function::new(
25660 "ARRAY_ANY".to_string(),
25661 vec![arr, lambda],
25662 )))),
25663 }
25664 } else {
25665 Ok(Expression::Function(Box::new(Function::new(
25666 "ARRAY_ANY".to_string(),
25667 args,
25668 ))))
25669 }
25670 } else {
25671 Ok(e)
25672 }
25673 }
25674
25675 Action::DecodeSimplify => {
25676 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
25677 // For literal search values: CASE WHEN x = search THEN result
25678 // For NULL search: CASE WHEN x IS NULL THEN result
25679 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
25680 fn is_decode_literal(e: &Expression) -> bool {
25681 matches!(
25682 e,
25683 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
25684 )
25685 }
25686
25687 let build_decode_case =
25688 |this_expr: Expression,
25689 pairs: Vec<(Expression, Expression)>,
25690 default: Option<Expression>| {
25691 let whens: Vec<(Expression, Expression)> = pairs
25692 .into_iter()
25693 .map(|(search, result)| {
25694 if matches!(&search, Expression::Null(_)) {
25695 // NULL search -> IS NULL
25696 let condition = Expression::Is(Box::new(BinaryOp {
25697 left: this_expr.clone(),
25698 right: Expression::Null(crate::expressions::Null),
25699 left_comments: Vec::new(),
25700 operator_comments: Vec::new(),
25701 trailing_comments: Vec::new(),
25702 inferred_type: None,
25703 }));
25704 (condition, result)
25705 } else if is_decode_literal(&search)
25706 || is_decode_literal(&this_expr)
25707 {
25708 // At least one side is a literal -> simple equality (no NULL check needed)
25709 let eq = Expression::Eq(Box::new(BinaryOp {
25710 left: this_expr.clone(),
25711 right: search,
25712 left_comments: Vec::new(),
25713 operator_comments: Vec::new(),
25714 trailing_comments: Vec::new(),
25715 inferred_type: None,
25716 }));
25717 (eq, result)
25718 } else {
25719 // Non-literal -> null-safe comparison
25720 let needs_paren = matches!(
25721 &search,
25722 Expression::Eq(_)
25723 | Expression::Neq(_)
25724 | Expression::Gt(_)
25725 | Expression::Gte(_)
25726 | Expression::Lt(_)
25727 | Expression::Lte(_)
25728 );
25729 let search_ref = if needs_paren {
25730 Expression::Paren(Box::new(crate::expressions::Paren {
25731 this: search.clone(),
25732 trailing_comments: Vec::new(),
25733 }))
25734 } else {
25735 search.clone()
25736 };
25737 // Build: x = search OR (x IS NULL AND search IS NULL)
25738 let eq = Expression::Eq(Box::new(BinaryOp {
25739 left: this_expr.clone(),
25740 right: search_ref,
25741 left_comments: Vec::new(),
25742 operator_comments: Vec::new(),
25743 trailing_comments: Vec::new(),
25744 inferred_type: None,
25745 }));
25746 let search_in_null = if needs_paren {
25747 Expression::Paren(Box::new(crate::expressions::Paren {
25748 this: search.clone(),
25749 trailing_comments: Vec::new(),
25750 }))
25751 } else {
25752 search.clone()
25753 };
25754 let x_is_null = Expression::Is(Box::new(BinaryOp {
25755 left: this_expr.clone(),
25756 right: Expression::Null(crate::expressions::Null),
25757 left_comments: Vec::new(),
25758 operator_comments: Vec::new(),
25759 trailing_comments: Vec::new(),
25760 inferred_type: None,
25761 }));
25762 let search_is_null = Expression::Is(Box::new(BinaryOp {
25763 left: search_in_null,
25764 right: Expression::Null(crate::expressions::Null),
25765 left_comments: Vec::new(),
25766 operator_comments: Vec::new(),
25767 trailing_comments: Vec::new(),
25768 inferred_type: None,
25769 }));
25770 let both_null = Expression::And(Box::new(BinaryOp {
25771 left: x_is_null,
25772 right: search_is_null,
25773 left_comments: Vec::new(),
25774 operator_comments: Vec::new(),
25775 trailing_comments: Vec::new(),
25776 inferred_type: None,
25777 }));
25778 let condition = Expression::Or(Box::new(BinaryOp {
25779 left: eq,
25780 right: Expression::Paren(Box::new(
25781 crate::expressions::Paren {
25782 this: both_null,
25783 trailing_comments: Vec::new(),
25784 },
25785 )),
25786 left_comments: Vec::new(),
25787 operator_comments: Vec::new(),
25788 trailing_comments: Vec::new(),
25789 inferred_type: None,
25790 }));
25791 (condition, result)
25792 }
25793 })
25794 .collect();
25795 Expression::Case(Box::new(Case {
25796 operand: None,
25797 whens,
25798 else_: default,
25799 comments: Vec::new(),
25800 inferred_type: None,
25801 }))
25802 };
25803
25804 if let Expression::Decode(decode) = e {
25805 Ok(build_decode_case(
25806 decode.this,
25807 decode.search_results,
25808 decode.default,
25809 ))
25810 } else if let Expression::DecodeCase(dc) = e {
25811 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
25812 let mut exprs = dc.expressions;
25813 if exprs.len() < 3 {
25814 return Ok(Expression::DecodeCase(Box::new(
25815 crate::expressions::DecodeCase { expressions: exprs },
25816 )));
25817 }
25818 let this_expr = exprs.remove(0);
25819 let mut pairs = Vec::new();
25820 let mut default = None;
25821 let mut i = 0;
25822 while i + 1 < exprs.len() {
25823 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
25824 i += 2;
25825 }
25826 if i < exprs.len() {
25827 // Odd remaining element is the default
25828 default = Some(exprs[i].clone());
25829 }
25830 Ok(build_decode_case(this_expr, pairs, default))
25831 } else {
25832 Ok(e)
25833 }
25834 }
25835
25836 Action::CreateTableLikeToCtas => {
25837 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
25838 if let Expression::CreateTable(ct) = e {
25839 let like_source = ct.constraints.iter().find_map(|c| {
25840 if let crate::expressions::TableConstraint::Like { source, .. } = c {
25841 Some(source.clone())
25842 } else {
25843 None
25844 }
25845 });
25846 if let Some(source_table) = like_source {
25847 let mut new_ct = *ct;
25848 new_ct.constraints.clear();
25849 // Build: SELECT * FROM b LIMIT 0
25850 let select = Expression::Select(Box::new(crate::expressions::Select {
25851 expressions: vec![Expression::Star(crate::expressions::Star {
25852 table: None,
25853 except: None,
25854 replace: None,
25855 rename: None,
25856 trailing_comments: Vec::new(),
25857 span: None,
25858 })],
25859 from: Some(crate::expressions::From {
25860 expressions: vec![Expression::Table(Box::new(source_table))],
25861 }),
25862 limit: Some(crate::expressions::Limit {
25863 this: Expression::Literal(Box::new(Literal::Number("0".to_string()))),
25864 percent: false,
25865 comments: Vec::new(),
25866 }),
25867 ..Default::default()
25868 }));
25869 new_ct.as_select = Some(select);
25870 Ok(Expression::CreateTable(Box::new(new_ct)))
25871 } else {
25872 Ok(Expression::CreateTable(ct))
25873 }
25874 } else {
25875 Ok(e)
25876 }
25877 }
25878
25879 Action::CreateTableLikeToSelectInto => {
25880 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
25881 if let Expression::CreateTable(ct) = e {
25882 let like_source = ct.constraints.iter().find_map(|c| {
25883 if let crate::expressions::TableConstraint::Like { source, .. } = c {
25884 Some(source.clone())
25885 } else {
25886 None
25887 }
25888 });
25889 if let Some(source_table) = like_source {
25890 let mut aliased_source = source_table;
25891 aliased_source.alias = Some(Identifier::new("temp"));
25892 // Build: SELECT TOP 0 * INTO a FROM b AS temp
25893 let select = Expression::Select(Box::new(crate::expressions::Select {
25894 expressions: vec![Expression::Star(crate::expressions::Star {
25895 table: None,
25896 except: None,
25897 replace: None,
25898 rename: None,
25899 trailing_comments: Vec::new(),
25900 span: None,
25901 })],
25902 from: Some(crate::expressions::From {
25903 expressions: vec![Expression::Table(Box::new(aliased_source))],
25904 }),
25905 into: Some(crate::expressions::SelectInto {
25906 this: Expression::Table(Box::new(ct.name.clone())),
25907 temporary: false,
25908 unlogged: false,
25909 bulk_collect: false,
25910 expressions: Vec::new(),
25911 }),
25912 top: Some(crate::expressions::Top {
25913 this: Expression::Literal(Box::new(Literal::Number("0".to_string()))),
25914 percent: false,
25915 with_ties: false,
25916 parenthesized: false,
25917 }),
25918 ..Default::default()
25919 }));
25920 Ok(select)
25921 } else {
25922 Ok(Expression::CreateTable(ct))
25923 }
25924 } else {
25925 Ok(e)
25926 }
25927 }
25928
25929 Action::CreateTableLikeToAs => {
25930 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
25931 if let Expression::CreateTable(ct) = e {
25932 let like_source = ct.constraints.iter().find_map(|c| {
25933 if let crate::expressions::TableConstraint::Like { source, .. } = c {
25934 Some(source.clone())
25935 } else {
25936 None
25937 }
25938 });
25939 if let Some(source_table) = like_source {
25940 let mut new_ct = *ct;
25941 new_ct.constraints.clear();
25942 // AS b (just a table reference, not a SELECT)
25943 new_ct.as_select = Some(Expression::Table(Box::new(source_table)));
25944 Ok(Expression::CreateTable(Box::new(new_ct)))
25945 } else {
25946 Ok(Expression::CreateTable(ct))
25947 }
25948 } else {
25949 Ok(e)
25950 }
25951 }
25952
25953 Action::TsOrDsToDateConvert => {
25954 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
25955 if let Expression::Function(f) = e {
25956 let mut args = f.args;
25957 let this = args.remove(0);
25958 let fmt = if !args.is_empty() {
25959 match &args[0] {
25960 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; Some(s.clone()) },
25961 _ => None,
25962 }
25963 } else {
25964 None
25965 };
25966 Ok(Expression::TsOrDsToDate(Box::new(
25967 crate::expressions::TsOrDsToDate {
25968 this: Box::new(this),
25969 format: fmt,
25970 safe: None,
25971 },
25972 )))
25973 } else {
25974 Ok(e)
25975 }
25976 }
25977
25978 Action::TsOrDsToDateStrConvert => {
25979 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
25980 if let Expression::Function(f) = e {
25981 let arg = f.args.into_iter().next().unwrap();
25982 let str_type = match target {
25983 DialectType::DuckDB
25984 | DialectType::PostgreSQL
25985 | DialectType::Materialize => DataType::Text,
25986 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
25987 DataType::Custom {
25988 name: "STRING".to_string(),
25989 }
25990 }
25991 DialectType::Presto
25992 | DialectType::Trino
25993 | DialectType::Athena
25994 | DialectType::Drill => DataType::VarChar {
25995 length: None,
25996 parenthesized_length: false,
25997 },
25998 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
25999 DataType::Custom {
26000 name: "STRING".to_string(),
26001 }
26002 }
26003 _ => DataType::VarChar {
26004 length: None,
26005 parenthesized_length: false,
26006 },
26007 };
26008 let cast_expr = Expression::Cast(Box::new(Cast {
26009 this: arg,
26010 to: str_type,
26011 double_colon_syntax: false,
26012 trailing_comments: Vec::new(),
26013 format: None,
26014 default: None,
26015 inferred_type: None,
26016 }));
26017 Ok(Expression::Substring(Box::new(
26018 crate::expressions::SubstringFunc {
26019 this: cast_expr,
26020 start: Expression::number(1),
26021 length: Some(Expression::number(10)),
26022 from_for_syntax: false,
26023 },
26024 )))
26025 } else {
26026 Ok(e)
26027 }
26028 }
26029
26030 Action::DateStrToDateConvert => {
26031 // DATE_STR_TO_DATE(x) -> dialect-specific
26032 if let Expression::Function(f) = e {
26033 let arg = f.args.into_iter().next().unwrap();
26034 match target {
26035 DialectType::SQLite => {
26036 // SQLite: just the bare expression (dates are strings)
26037 Ok(arg)
26038 }
26039 _ => Ok(Expression::Cast(Box::new(Cast {
26040 this: arg,
26041 to: DataType::Date,
26042 double_colon_syntax: false,
26043 trailing_comments: Vec::new(),
26044 format: None,
26045 default: None,
26046 inferred_type: None,
26047 }))),
26048 }
26049 } else {
26050 Ok(e)
26051 }
26052 }
26053
26054 Action::TimeStrToDateConvert => {
26055 // TIME_STR_TO_DATE(x) -> dialect-specific
26056 if let Expression::Function(f) = e {
26057 let arg = f.args.into_iter().next().unwrap();
26058 match target {
26059 DialectType::Hive
26060 | DialectType::Doris
26061 | DialectType::StarRocks
26062 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
26063 Function::new("TO_DATE".to_string(), vec![arg]),
26064 ))),
26065 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26066 // Presto: CAST(x AS TIMESTAMP)
26067 Ok(Expression::Cast(Box::new(Cast {
26068 this: arg,
26069 to: DataType::Timestamp {
26070 timezone: false,
26071 precision: None,
26072 },
26073 double_colon_syntax: false,
26074 trailing_comments: Vec::new(),
26075 format: None,
26076 default: None,
26077 inferred_type: None,
26078 })))
26079 }
26080 _ => {
26081 // Default: CAST(x AS DATE)
26082 Ok(Expression::Cast(Box::new(Cast {
26083 this: arg,
26084 to: DataType::Date,
26085 double_colon_syntax: false,
26086 trailing_comments: Vec::new(),
26087 format: None,
26088 default: None,
26089 inferred_type: None,
26090 })))
26091 }
26092 }
26093 } else {
26094 Ok(e)
26095 }
26096 }
26097
26098 Action::TimeStrToTimeConvert => {
26099 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
26100 if let Expression::Function(f) = e {
26101 let mut args = f.args;
26102 let this = args.remove(0);
26103 let zone = if !args.is_empty() {
26104 match &args[0] {
26105 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; Some(s.clone()) },
26106 _ => None,
26107 }
26108 } else {
26109 None
26110 };
26111 let has_zone = zone.is_some();
26112
26113 match target {
26114 DialectType::SQLite => {
26115 // SQLite: just the bare expression
26116 Ok(this)
26117 }
26118 DialectType::MySQL => {
26119 if has_zone {
26120 // MySQL with zone: TIMESTAMP(x)
26121 Ok(Expression::Function(Box::new(Function::new(
26122 "TIMESTAMP".to_string(),
26123 vec![this],
26124 ))))
26125 } else {
26126 // MySQL: CAST(x AS DATETIME) or with precision
26127 // Use DataType::Custom to avoid MySQL's transform_cast converting
26128 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
26129 let precision =
26130 if let Expression::Literal(ref lit) = this {
26131 if let Literal::String(ref s) = lit.as_ref() {
26132 if let Some(dot_pos) = s.rfind('.') {
26133 let frac = &s[dot_pos + 1..];
26134 let digit_count = frac
26135 .chars()
26136 .take_while(|c| c.is_ascii_digit())
26137 .count();
26138 if digit_count > 0 {
26139 Some(digit_count)
26140 } else {
26141 None
26142 }
26143 } else {
26144 None
26145 }
26146 } else { None }
26147 } else {
26148 None
26149 };
26150 let type_name = match precision {
26151 Some(p) => format!("DATETIME({})", p),
26152 None => "DATETIME".to_string(),
26153 };
26154 Ok(Expression::Cast(Box::new(Cast {
26155 this,
26156 to: DataType::Custom { name: type_name },
26157 double_colon_syntax: false,
26158 trailing_comments: Vec::new(),
26159 format: None,
26160 default: None,
26161 inferred_type: None,
26162 })))
26163 }
26164 }
26165 DialectType::ClickHouse => {
26166 if has_zone {
26167 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
26168 // We need to strip the timezone offset from the literal if present
26169 let clean_this =
26170 if let Expression::Literal(ref lit) = this {
26171 if let Literal::String(ref s) = lit.as_ref() {
26172 // Strip timezone offset like "-08:00" or "+00:00"
26173 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
26174 if let Some(offset_pos) = re_offset {
26175 if offset_pos > 10 {
26176 // After the date part
26177 let trimmed = s[..offset_pos].to_string();
26178 Expression::Literal(Box::new(Literal::String(trimmed)))
26179 } else {
26180 this.clone()
26181 }
26182 } else {
26183 this.clone()
26184 }
26185 } else { this.clone() }
26186 } else {
26187 this.clone()
26188 };
26189 let zone_str = zone.unwrap();
26190 // Build: CAST(x AS DateTime64(6, 'zone'))
26191 let type_name = format!("DateTime64(6, '{}')", zone_str);
26192 Ok(Expression::Cast(Box::new(Cast {
26193 this: clean_this,
26194 to: DataType::Custom { name: type_name },
26195 double_colon_syntax: false,
26196 trailing_comments: Vec::new(),
26197 format: None,
26198 default: None,
26199 inferred_type: None,
26200 })))
26201 } else {
26202 Ok(Expression::Cast(Box::new(Cast {
26203 this,
26204 to: DataType::Custom {
26205 name: "DateTime64(6)".to_string(),
26206 },
26207 double_colon_syntax: false,
26208 trailing_comments: Vec::new(),
26209 format: None,
26210 default: None,
26211 inferred_type: None,
26212 })))
26213 }
26214 }
26215 DialectType::BigQuery => {
26216 if has_zone {
26217 // BigQuery with zone: CAST(x AS TIMESTAMP)
26218 Ok(Expression::Cast(Box::new(Cast {
26219 this,
26220 to: DataType::Timestamp {
26221 timezone: false,
26222 precision: None,
26223 },
26224 double_colon_syntax: false,
26225 trailing_comments: Vec::new(),
26226 format: None,
26227 default: None,
26228 inferred_type: None,
26229 })))
26230 } else {
26231 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
26232 Ok(Expression::Cast(Box::new(Cast {
26233 this,
26234 to: DataType::Custom {
26235 name: "DATETIME".to_string(),
26236 },
26237 double_colon_syntax: false,
26238 trailing_comments: Vec::new(),
26239 format: None,
26240 default: None,
26241 inferred_type: None,
26242 })))
26243 }
26244 }
26245 DialectType::Doris => {
26246 // Doris: CAST(x AS DATETIME)
26247 Ok(Expression::Cast(Box::new(Cast {
26248 this,
26249 to: DataType::Custom {
26250 name: "DATETIME".to_string(),
26251 },
26252 double_colon_syntax: false,
26253 trailing_comments: Vec::new(),
26254 format: None,
26255 default: None,
26256 inferred_type: None,
26257 })))
26258 }
26259 DialectType::TSQL | DialectType::Fabric => {
26260 if has_zone {
26261 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
26262 let cast_expr = Expression::Cast(Box::new(Cast {
26263 this,
26264 to: DataType::Custom {
26265 name: "DATETIMEOFFSET".to_string(),
26266 },
26267 double_colon_syntax: false,
26268 trailing_comments: Vec::new(),
26269 format: None,
26270 default: None,
26271 inferred_type: None,
26272 }));
26273 Ok(Expression::AtTimeZone(Box::new(
26274 crate::expressions::AtTimeZone {
26275 this: cast_expr,
26276 zone: Expression::Literal(Box::new(Literal::String(
26277 "UTC".to_string(),
26278 ))),
26279 },
26280 )))
26281 } else {
26282 // TSQL: CAST(x AS DATETIME2)
26283 Ok(Expression::Cast(Box::new(Cast {
26284 this,
26285 to: DataType::Custom {
26286 name: "DATETIME2".to_string(),
26287 },
26288 double_colon_syntax: false,
26289 trailing_comments: Vec::new(),
26290 format: None,
26291 default: None,
26292 inferred_type: None,
26293 })))
26294 }
26295 }
26296 DialectType::DuckDB => {
26297 if has_zone {
26298 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
26299 Ok(Expression::Cast(Box::new(Cast {
26300 this,
26301 to: DataType::Timestamp {
26302 timezone: true,
26303 precision: None,
26304 },
26305 double_colon_syntax: false,
26306 trailing_comments: Vec::new(),
26307 format: None,
26308 default: None,
26309 inferred_type: None,
26310 })))
26311 } else {
26312 // DuckDB: CAST(x AS TIMESTAMP)
26313 Ok(Expression::Cast(Box::new(Cast {
26314 this,
26315 to: DataType::Timestamp {
26316 timezone: false,
26317 precision: None,
26318 },
26319 double_colon_syntax: false,
26320 trailing_comments: Vec::new(),
26321 format: None,
26322 default: None,
26323 inferred_type: None,
26324 })))
26325 }
26326 }
26327 DialectType::PostgreSQL
26328 | DialectType::Materialize
26329 | DialectType::RisingWave => {
26330 if has_zone {
26331 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
26332 Ok(Expression::Cast(Box::new(Cast {
26333 this,
26334 to: DataType::Timestamp {
26335 timezone: true,
26336 precision: None,
26337 },
26338 double_colon_syntax: false,
26339 trailing_comments: Vec::new(),
26340 format: None,
26341 default: None,
26342 inferred_type: None,
26343 })))
26344 } else {
26345 // PostgreSQL: CAST(x AS TIMESTAMP)
26346 Ok(Expression::Cast(Box::new(Cast {
26347 this,
26348 to: DataType::Timestamp {
26349 timezone: false,
26350 precision: None,
26351 },
26352 double_colon_syntax: false,
26353 trailing_comments: Vec::new(),
26354 format: None,
26355 default: None,
26356 inferred_type: None,
26357 })))
26358 }
26359 }
26360 DialectType::Snowflake => {
26361 if has_zone {
26362 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
26363 Ok(Expression::Cast(Box::new(Cast {
26364 this,
26365 to: DataType::Timestamp {
26366 timezone: true,
26367 precision: None,
26368 },
26369 double_colon_syntax: false,
26370 trailing_comments: Vec::new(),
26371 format: None,
26372 default: None,
26373 inferred_type: None,
26374 })))
26375 } else {
26376 // Snowflake: CAST(x AS TIMESTAMP)
26377 Ok(Expression::Cast(Box::new(Cast {
26378 this,
26379 to: DataType::Timestamp {
26380 timezone: false,
26381 precision: None,
26382 },
26383 double_colon_syntax: false,
26384 trailing_comments: Vec::new(),
26385 format: None,
26386 default: None,
26387 inferred_type: None,
26388 })))
26389 }
26390 }
26391 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26392 if has_zone {
26393 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
26394 // Check for precision from sub-second digits
26395 let precision =
26396 if let Expression::Literal(ref lit) = this {
26397 if let Literal::String(ref s) = lit.as_ref() {
26398 if let Some(dot_pos) = s.rfind('.') {
26399 let frac = &s[dot_pos + 1..];
26400 let digit_count = frac
26401 .chars()
26402 .take_while(|c| c.is_ascii_digit())
26403 .count();
26404 if digit_count > 0
26405 && matches!(target, DialectType::Trino)
26406 {
26407 Some(digit_count as u32)
26408 } else {
26409 None
26410 }
26411 } else {
26412 None
26413 }
26414 } else { None }
26415 } else {
26416 None
26417 };
26418 let dt = if let Some(prec) = precision {
26419 DataType::Timestamp {
26420 timezone: true,
26421 precision: Some(prec),
26422 }
26423 } else {
26424 DataType::Timestamp {
26425 timezone: true,
26426 precision: None,
26427 }
26428 };
26429 Ok(Expression::Cast(Box::new(Cast {
26430 this,
26431 to: dt,
26432 double_colon_syntax: false,
26433 trailing_comments: Vec::new(),
26434 format: None,
26435 default: None,
26436 inferred_type: None,
26437 })))
26438 } else {
26439 // Check for sub-second precision for Trino
26440 let precision =
26441 if let Expression::Literal(ref lit) = this {
26442 if let Literal::String(ref s) = lit.as_ref() {
26443 if let Some(dot_pos) = s.rfind('.') {
26444 let frac = &s[dot_pos + 1..];
26445 let digit_count = frac
26446 .chars()
26447 .take_while(|c| c.is_ascii_digit())
26448 .count();
26449 if digit_count > 0
26450 && matches!(target, DialectType::Trino)
26451 {
26452 Some(digit_count as u32)
26453 } else {
26454 None
26455 }
26456 } else {
26457 None
26458 }
26459 } else { None }
26460 } else {
26461 None
26462 };
26463 let dt = DataType::Timestamp {
26464 timezone: false,
26465 precision,
26466 };
26467 Ok(Expression::Cast(Box::new(Cast {
26468 this,
26469 to: dt,
26470 double_colon_syntax: false,
26471 trailing_comments: Vec::new(),
26472 format: None,
26473 default: None,
26474 inferred_type: None,
26475 })))
26476 }
26477 }
26478 DialectType::Redshift => {
26479 if has_zone {
26480 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
26481 Ok(Expression::Cast(Box::new(Cast {
26482 this,
26483 to: DataType::Timestamp {
26484 timezone: true,
26485 precision: None,
26486 },
26487 double_colon_syntax: false,
26488 trailing_comments: Vec::new(),
26489 format: None,
26490 default: None,
26491 inferred_type: None,
26492 })))
26493 } else {
26494 // Redshift: CAST(x AS TIMESTAMP)
26495 Ok(Expression::Cast(Box::new(Cast {
26496 this,
26497 to: DataType::Timestamp {
26498 timezone: false,
26499 precision: None,
26500 },
26501 double_colon_syntax: false,
26502 trailing_comments: Vec::new(),
26503 format: None,
26504 default: None,
26505 inferred_type: None,
26506 })))
26507 }
26508 }
26509 _ => {
26510 // Default: CAST(x AS TIMESTAMP)
26511 Ok(Expression::Cast(Box::new(Cast {
26512 this,
26513 to: DataType::Timestamp {
26514 timezone: false,
26515 precision: None,
26516 },
26517 double_colon_syntax: false,
26518 trailing_comments: Vec::new(),
26519 format: None,
26520 default: None,
26521 inferred_type: None,
26522 })))
26523 }
26524 }
26525 } else {
26526 Ok(e)
26527 }
26528 }
26529
26530 Action::DateToDateStrConvert => {
26531 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
26532 if let Expression::Function(f) = e {
26533 let arg = f.args.into_iter().next().unwrap();
26534 let str_type = match target {
26535 DialectType::DuckDB => DataType::Text,
26536 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
26537 DataType::Custom {
26538 name: "STRING".to_string(),
26539 }
26540 }
26541 DialectType::Presto
26542 | DialectType::Trino
26543 | DialectType::Athena
26544 | DialectType::Drill => DataType::VarChar {
26545 length: None,
26546 parenthesized_length: false,
26547 },
26548 _ => DataType::VarChar {
26549 length: None,
26550 parenthesized_length: false,
26551 },
26552 };
26553 Ok(Expression::Cast(Box::new(Cast {
26554 this: arg,
26555 to: str_type,
26556 double_colon_syntax: false,
26557 trailing_comments: Vec::new(),
26558 format: None,
26559 default: None,
26560 inferred_type: None,
26561 })))
26562 } else {
26563 Ok(e)
26564 }
26565 }
26566
26567 Action::DateToDiConvert => {
26568 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
26569 if let Expression::Function(f) = e {
26570 let arg = f.args.into_iter().next().unwrap();
26571 let inner = match target {
26572 DialectType::DuckDB => {
26573 // STRFTIME(x, '%Y%m%d')
26574 Expression::Function(Box::new(Function::new(
26575 "STRFTIME".to_string(),
26576 vec![arg, Expression::string("%Y%m%d")],
26577 )))
26578 }
26579 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
26580 // DATE_FORMAT(x, 'yyyyMMdd')
26581 Expression::Function(Box::new(Function::new(
26582 "DATE_FORMAT".to_string(),
26583 vec![arg, Expression::string("yyyyMMdd")],
26584 )))
26585 }
26586 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26587 // DATE_FORMAT(x, '%Y%m%d')
26588 Expression::Function(Box::new(Function::new(
26589 "DATE_FORMAT".to_string(),
26590 vec![arg, Expression::string("%Y%m%d")],
26591 )))
26592 }
26593 DialectType::Drill => {
26594 // TO_DATE(x, 'yyyyMMdd')
26595 Expression::Function(Box::new(Function::new(
26596 "TO_DATE".to_string(),
26597 vec![arg, Expression::string("yyyyMMdd")],
26598 )))
26599 }
26600 _ => {
26601 // Default: STRFTIME(x, '%Y%m%d')
26602 Expression::Function(Box::new(Function::new(
26603 "STRFTIME".to_string(),
26604 vec![arg, Expression::string("%Y%m%d")],
26605 )))
26606 }
26607 };
26608 // Use INT (not INTEGER) for Presto/Trino
26609 let int_type = match target {
26610 DialectType::Presto
26611 | DialectType::Trino
26612 | DialectType::Athena
26613 | DialectType::TSQL
26614 | DialectType::Fabric
26615 | DialectType::SQLite
26616 | DialectType::Redshift => DataType::Custom {
26617 name: "INT".to_string(),
26618 },
26619 _ => DataType::Int {
26620 length: None,
26621 integer_spelling: false,
26622 },
26623 };
26624 Ok(Expression::Cast(Box::new(Cast {
26625 this: inner,
26626 to: int_type,
26627 double_colon_syntax: false,
26628 trailing_comments: Vec::new(),
26629 format: None,
26630 default: None,
26631 inferred_type: None,
26632 })))
26633 } else {
26634 Ok(e)
26635 }
26636 }
26637
26638 Action::DiToDateConvert => {
26639 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
26640 if let Expression::Function(f) = e {
26641 let arg = f.args.into_iter().next().unwrap();
26642 match target {
26643 DialectType::DuckDB => {
26644 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
26645 let cast_text = Expression::Cast(Box::new(Cast {
26646 this: arg,
26647 to: DataType::Text,
26648 double_colon_syntax: false,
26649 trailing_comments: Vec::new(),
26650 format: None,
26651 default: None,
26652 inferred_type: None,
26653 }));
26654 let strptime = Expression::Function(Box::new(Function::new(
26655 "STRPTIME".to_string(),
26656 vec![cast_text, Expression::string("%Y%m%d")],
26657 )));
26658 Ok(Expression::Cast(Box::new(Cast {
26659 this: strptime,
26660 to: DataType::Date,
26661 double_colon_syntax: false,
26662 trailing_comments: Vec::new(),
26663 format: None,
26664 default: None,
26665 inferred_type: None,
26666 })))
26667 }
26668 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
26669 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
26670 let cast_str = Expression::Cast(Box::new(Cast {
26671 this: arg,
26672 to: DataType::Custom {
26673 name: "STRING".to_string(),
26674 },
26675 double_colon_syntax: false,
26676 trailing_comments: Vec::new(),
26677 format: None,
26678 default: None,
26679 inferred_type: None,
26680 }));
26681 Ok(Expression::Function(Box::new(Function::new(
26682 "TO_DATE".to_string(),
26683 vec![cast_str, Expression::string("yyyyMMdd")],
26684 ))))
26685 }
26686 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26687 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
26688 let cast_varchar = Expression::Cast(Box::new(Cast {
26689 this: arg,
26690 to: DataType::VarChar {
26691 length: None,
26692 parenthesized_length: false,
26693 },
26694 double_colon_syntax: false,
26695 trailing_comments: Vec::new(),
26696 format: None,
26697 default: None,
26698 inferred_type: None,
26699 }));
26700 let date_parse = Expression::Function(Box::new(Function::new(
26701 "DATE_PARSE".to_string(),
26702 vec![cast_varchar, Expression::string("%Y%m%d")],
26703 )));
26704 Ok(Expression::Cast(Box::new(Cast {
26705 this: date_parse,
26706 to: DataType::Date,
26707 double_colon_syntax: false,
26708 trailing_comments: Vec::new(),
26709 format: None,
26710 default: None,
26711 inferred_type: None,
26712 })))
26713 }
26714 DialectType::Drill => {
26715 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
26716 let cast_varchar = Expression::Cast(Box::new(Cast {
26717 this: arg,
26718 to: DataType::VarChar {
26719 length: None,
26720 parenthesized_length: false,
26721 },
26722 double_colon_syntax: false,
26723 trailing_comments: Vec::new(),
26724 format: None,
26725 default: None,
26726 inferred_type: None,
26727 }));
26728 Ok(Expression::Function(Box::new(Function::new(
26729 "TO_DATE".to_string(),
26730 vec![cast_varchar, Expression::string("yyyyMMdd")],
26731 ))))
26732 }
26733 _ => Ok(Expression::Function(Box::new(Function::new(
26734 "DI_TO_DATE".to_string(),
26735 vec![arg],
26736 )))),
26737 }
26738 } else {
26739 Ok(e)
26740 }
26741 }
26742
26743 Action::TsOrDiToDiConvert => {
26744 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
26745 if let Expression::Function(f) = e {
26746 let arg = f.args.into_iter().next().unwrap();
26747 let str_type = match target {
26748 DialectType::DuckDB => DataType::Text,
26749 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
26750 DataType::Custom {
26751 name: "STRING".to_string(),
26752 }
26753 }
26754 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26755 DataType::VarChar {
26756 length: None,
26757 parenthesized_length: false,
26758 }
26759 }
26760 _ => DataType::VarChar {
26761 length: None,
26762 parenthesized_length: false,
26763 },
26764 };
26765 let cast_str = Expression::Cast(Box::new(Cast {
26766 this: arg,
26767 to: str_type,
26768 double_colon_syntax: false,
26769 trailing_comments: Vec::new(),
26770 format: None,
26771 default: None,
26772 inferred_type: None,
26773 }));
26774 let replace_expr = Expression::Function(Box::new(Function::new(
26775 "REPLACE".to_string(),
26776 vec![cast_str, Expression::string("-"), Expression::string("")],
26777 )));
26778 let substr_name = match target {
26779 DialectType::DuckDB
26780 | DialectType::Hive
26781 | DialectType::Spark
26782 | DialectType::Databricks => "SUBSTR",
26783 _ => "SUBSTR",
26784 };
26785 let substr = Expression::Function(Box::new(Function::new(
26786 substr_name.to_string(),
26787 vec![replace_expr, Expression::number(1), Expression::number(8)],
26788 )));
26789 // Use INT (not INTEGER) for Presto/Trino etc.
26790 let int_type = match target {
26791 DialectType::Presto
26792 | DialectType::Trino
26793 | DialectType::Athena
26794 | DialectType::TSQL
26795 | DialectType::Fabric
26796 | DialectType::SQLite
26797 | DialectType::Redshift => DataType::Custom {
26798 name: "INT".to_string(),
26799 },
26800 _ => DataType::Int {
26801 length: None,
26802 integer_spelling: false,
26803 },
26804 };
26805 Ok(Expression::Cast(Box::new(Cast {
26806 this: substr,
26807 to: int_type,
26808 double_colon_syntax: false,
26809 trailing_comments: Vec::new(),
26810 format: None,
26811 default: None,
26812 inferred_type: None,
26813 })))
26814 } else {
26815 Ok(e)
26816 }
26817 }
26818
26819 Action::UnixToStrConvert => {
26820 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
26821 if let Expression::Function(f) = e {
26822 let mut args = f.args;
26823 let this = args.remove(0);
26824 let fmt_expr = if !args.is_empty() {
26825 Some(args.remove(0))
26826 } else {
26827 None
26828 };
26829
26830 // Check if format is a string literal
26831 let fmt_str = fmt_expr.as_ref().and_then(|f| {
26832 if let Expression::Literal(lit) = f {
26833 if let Literal::String(s) = lit.as_ref() {
26834 Some(s.clone())
26835 } else { None }
26836 } else {
26837 None
26838 }
26839 });
26840
26841 if let Some(fmt_string) = fmt_str {
26842 // String literal format -> use UnixToStr expression (generator handles it)
26843 Ok(Expression::UnixToStr(Box::new(
26844 crate::expressions::UnixToStr {
26845 this: Box::new(this),
26846 format: Some(fmt_string),
26847 },
26848 )))
26849 } else if let Some(fmt_e) = fmt_expr {
26850 // Non-literal format (e.g., identifier `y`) -> build target expression directly
26851 match target {
26852 DialectType::DuckDB => {
26853 // STRFTIME(TO_TIMESTAMP(x), y)
26854 let to_ts = Expression::Function(Box::new(Function::new(
26855 "TO_TIMESTAMP".to_string(),
26856 vec![this],
26857 )));
26858 Ok(Expression::Function(Box::new(Function::new(
26859 "STRFTIME".to_string(),
26860 vec![to_ts, fmt_e],
26861 ))))
26862 }
26863 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26864 // DATE_FORMAT(FROM_UNIXTIME(x), y)
26865 let from_unix = Expression::Function(Box::new(Function::new(
26866 "FROM_UNIXTIME".to_string(),
26867 vec![this],
26868 )));
26869 Ok(Expression::Function(Box::new(Function::new(
26870 "DATE_FORMAT".to_string(),
26871 vec![from_unix, fmt_e],
26872 ))))
26873 }
26874 DialectType::Hive
26875 | DialectType::Spark
26876 | DialectType::Databricks
26877 | DialectType::Doris
26878 | DialectType::StarRocks => {
26879 // FROM_UNIXTIME(x, y)
26880 Ok(Expression::Function(Box::new(Function::new(
26881 "FROM_UNIXTIME".to_string(),
26882 vec![this, fmt_e],
26883 ))))
26884 }
26885 _ => {
26886 // Default: keep as UNIX_TO_STR(x, y)
26887 Ok(Expression::Function(Box::new(Function::new(
26888 "UNIX_TO_STR".to_string(),
26889 vec![this, fmt_e],
26890 ))))
26891 }
26892 }
26893 } else {
26894 Ok(Expression::UnixToStr(Box::new(
26895 crate::expressions::UnixToStr {
26896 this: Box::new(this),
26897 format: None,
26898 },
26899 )))
26900 }
26901 } else {
26902 Ok(e)
26903 }
26904 }
26905
26906 Action::UnixToTimeConvert => {
26907 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
26908 if let Expression::Function(f) = e {
26909 let arg = f.args.into_iter().next().unwrap();
26910 Ok(Expression::UnixToTime(Box::new(
26911 crate::expressions::UnixToTime {
26912 this: Box::new(arg),
26913 scale: None,
26914 zone: None,
26915 hours: None,
26916 minutes: None,
26917 format: None,
26918 target_type: None,
26919 },
26920 )))
26921 } else {
26922 Ok(e)
26923 }
26924 }
26925
26926 Action::UnixToTimeStrConvert => {
26927 // UNIX_TO_TIME_STR(x) -> dialect-specific
26928 if let Expression::Function(f) = e {
26929 let arg = f.args.into_iter().next().unwrap();
26930 match target {
26931 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
26932 // FROM_UNIXTIME(x)
26933 Ok(Expression::Function(Box::new(Function::new(
26934 "FROM_UNIXTIME".to_string(),
26935 vec![arg],
26936 ))))
26937 }
26938 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26939 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
26940 let from_unix = Expression::Function(Box::new(Function::new(
26941 "FROM_UNIXTIME".to_string(),
26942 vec![arg],
26943 )));
26944 Ok(Expression::Cast(Box::new(Cast {
26945 this: from_unix,
26946 to: DataType::VarChar {
26947 length: None,
26948 parenthesized_length: false,
26949 },
26950 double_colon_syntax: false,
26951 trailing_comments: Vec::new(),
26952 format: None,
26953 default: None,
26954 inferred_type: None,
26955 })))
26956 }
26957 DialectType::DuckDB => {
26958 // CAST(TO_TIMESTAMP(x) AS TEXT)
26959 let to_ts = Expression::Function(Box::new(Function::new(
26960 "TO_TIMESTAMP".to_string(),
26961 vec![arg],
26962 )));
26963 Ok(Expression::Cast(Box::new(Cast {
26964 this: to_ts,
26965 to: DataType::Text,
26966 double_colon_syntax: false,
26967 trailing_comments: Vec::new(),
26968 format: None,
26969 default: None,
26970 inferred_type: None,
26971 })))
26972 }
26973 _ => Ok(Expression::Function(Box::new(Function::new(
26974 "UNIX_TO_TIME_STR".to_string(),
26975 vec![arg],
26976 )))),
26977 }
26978 } else {
26979 Ok(e)
26980 }
26981 }
26982
26983 Action::TimeToUnixConvert => {
26984 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
26985 if let Expression::Function(f) = e {
26986 let arg = f.args.into_iter().next().unwrap();
26987 Ok(Expression::TimeToUnix(Box::new(
26988 crate::expressions::UnaryFunc {
26989 this: arg,
26990 original_name: None,
26991 inferred_type: None,
26992 },
26993 )))
26994 } else {
26995 Ok(e)
26996 }
26997 }
26998
26999 Action::TimeToStrConvert => {
27000 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
27001 if let Expression::Function(f) = e {
27002 let mut args = f.args;
27003 let this = args.remove(0);
27004 let fmt = match args.remove(0) {
27005 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; s.clone() },
27006 other => {
27007 return Ok(Expression::Function(Box::new(Function::new(
27008 "TIME_TO_STR".to_string(),
27009 vec![this, other],
27010 ))));
27011 }
27012 };
27013 Ok(Expression::TimeToStr(Box::new(
27014 crate::expressions::TimeToStr {
27015 this: Box::new(this),
27016 format: fmt,
27017 culture: None,
27018 zone: None,
27019 },
27020 )))
27021 } else {
27022 Ok(e)
27023 }
27024 }
27025
27026 Action::StrToUnixConvert => {
27027 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
27028 if let Expression::Function(f) = e {
27029 let mut args = f.args;
27030 let this = args.remove(0);
27031 let fmt = match args.remove(0) {
27032 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; s.clone() },
27033 other => {
27034 return Ok(Expression::Function(Box::new(Function::new(
27035 "STR_TO_UNIX".to_string(),
27036 vec![this, other],
27037 ))));
27038 }
27039 };
27040 Ok(Expression::StrToUnix(Box::new(
27041 crate::expressions::StrToUnix {
27042 this: Some(Box::new(this)),
27043 format: Some(fmt),
27044 },
27045 )))
27046 } else {
27047 Ok(e)
27048 }
27049 }
27050
27051 Action::TimeStrToUnixConvert => {
27052 // TIME_STR_TO_UNIX(x) -> dialect-specific
27053 if let Expression::Function(f) = e {
27054 let arg = f.args.into_iter().next().unwrap();
27055 match target {
27056 DialectType::DuckDB => {
27057 // EPOCH(CAST(x AS TIMESTAMP))
27058 let cast_ts = Expression::Cast(Box::new(Cast {
27059 this: arg,
27060 to: DataType::Timestamp {
27061 timezone: false,
27062 precision: None,
27063 },
27064 double_colon_syntax: false,
27065 trailing_comments: Vec::new(),
27066 format: None,
27067 default: None,
27068 inferred_type: None,
27069 }));
27070 Ok(Expression::Function(Box::new(Function::new(
27071 "EPOCH".to_string(),
27072 vec![cast_ts],
27073 ))))
27074 }
27075 DialectType::Hive
27076 | DialectType::Doris
27077 | DialectType::StarRocks
27078 | DialectType::MySQL => {
27079 // UNIX_TIMESTAMP(x)
27080 Ok(Expression::Function(Box::new(Function::new(
27081 "UNIX_TIMESTAMP".to_string(),
27082 vec![arg],
27083 ))))
27084 }
27085 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27086 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
27087 let date_parse = Expression::Function(Box::new(Function::new(
27088 "DATE_PARSE".to_string(),
27089 vec![arg, Expression::string("%Y-%m-%d %T")],
27090 )));
27091 Ok(Expression::Function(Box::new(Function::new(
27092 "TO_UNIXTIME".to_string(),
27093 vec![date_parse],
27094 ))))
27095 }
27096 _ => Ok(Expression::Function(Box::new(Function::new(
27097 "TIME_STR_TO_UNIX".to_string(),
27098 vec![arg],
27099 )))),
27100 }
27101 } else {
27102 Ok(e)
27103 }
27104 }
27105
27106 Action::TimeToTimeStrConvert => {
27107 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
27108 if let Expression::Function(f) = e {
27109 let arg = f.args.into_iter().next().unwrap();
27110 let str_type = match target {
27111 DialectType::DuckDB => DataType::Text,
27112 DialectType::Hive
27113 | DialectType::Spark
27114 | DialectType::Databricks
27115 | DialectType::Doris
27116 | DialectType::StarRocks => DataType::Custom {
27117 name: "STRING".to_string(),
27118 },
27119 DialectType::Redshift => DataType::Custom {
27120 name: "VARCHAR(MAX)".to_string(),
27121 },
27122 _ => DataType::VarChar {
27123 length: None,
27124 parenthesized_length: false,
27125 },
27126 };
27127 Ok(Expression::Cast(Box::new(Cast {
27128 this: arg,
27129 to: str_type,
27130 double_colon_syntax: false,
27131 trailing_comments: Vec::new(),
27132 format: None,
27133 default: None,
27134 inferred_type: None,
27135 })))
27136 } else {
27137 Ok(e)
27138 }
27139 }
27140
27141 Action::DateTruncSwapArgs => {
27142 // DATE_TRUNC('unit', x) from Generic -> target-specific
27143 if let Expression::Function(f) = e {
27144 if f.args.len() == 2 {
27145 let unit_arg = f.args[0].clone();
27146 let expr_arg = f.args[1].clone();
27147 // Extract unit string from the first arg
27148 let unit_str = match &unit_arg {
27149 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; s.to_ascii_uppercase() },
27150 _ => return Ok(Expression::Function(f)),
27151 };
27152 match target {
27153 DialectType::BigQuery => {
27154 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
27155 let unit_ident =
27156 Expression::Column(Box::new(crate::expressions::Column {
27157 name: crate::expressions::Identifier::new(unit_str),
27158 table: None,
27159 join_mark: false,
27160 trailing_comments: Vec::new(),
27161 span: None,
27162 inferred_type: None,
27163 }));
27164 Ok(Expression::Function(Box::new(Function::new(
27165 "DATE_TRUNC".to_string(),
27166 vec![expr_arg, unit_ident],
27167 ))))
27168 }
27169 DialectType::Doris => {
27170 // Doris: DATE_TRUNC(x, 'UNIT')
27171 Ok(Expression::Function(Box::new(Function::new(
27172 "DATE_TRUNC".to_string(),
27173 vec![expr_arg, Expression::string(&unit_str)],
27174 ))))
27175 }
27176 DialectType::StarRocks => {
27177 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
27178 Ok(Expression::Function(Box::new(Function::new(
27179 "DATE_TRUNC".to_string(),
27180 vec![Expression::string(&unit_str), expr_arg],
27181 ))))
27182 }
27183 DialectType::Spark | DialectType::Databricks => {
27184 // Spark: TRUNC(x, 'UNIT')
27185 Ok(Expression::Function(Box::new(Function::new(
27186 "TRUNC".to_string(),
27187 vec![expr_arg, Expression::string(&unit_str)],
27188 ))))
27189 }
27190 DialectType::MySQL => {
27191 // MySQL: complex expansion based on unit
27192 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
27193 }
27194 _ => Ok(Expression::Function(f)),
27195 }
27196 } else {
27197 Ok(Expression::Function(f))
27198 }
27199 } else {
27200 Ok(e)
27201 }
27202 }
27203
27204 Action::TimestampTruncConvert => {
27205 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
27206 if let Expression::Function(f) = e {
27207 if f.args.len() >= 2 {
27208 let expr_arg = f.args[0].clone();
27209 let unit_arg = f.args[1].clone();
27210 let tz_arg = if f.args.len() >= 3 {
27211 Some(f.args[2].clone())
27212 } else {
27213 None
27214 };
27215 // Extract unit string
27216 let unit_str = match &unit_arg {
27217 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; s.to_ascii_uppercase() },
27218 Expression::Column(c) => c.name.name.to_ascii_uppercase(),
27219 _ => {
27220 return Ok(Expression::Function(f));
27221 }
27222 };
27223 match target {
27224 DialectType::Spark | DialectType::Databricks => {
27225 // Spark: DATE_TRUNC('UNIT', x)
27226 Ok(Expression::Function(Box::new(Function::new(
27227 "DATE_TRUNC".to_string(),
27228 vec![Expression::string(&unit_str), expr_arg],
27229 ))))
27230 }
27231 DialectType::Doris | DialectType::StarRocks => {
27232 // Doris: DATE_TRUNC(x, 'UNIT')
27233 Ok(Expression::Function(Box::new(Function::new(
27234 "DATE_TRUNC".to_string(),
27235 vec![expr_arg, Expression::string(&unit_str)],
27236 ))))
27237 }
27238 DialectType::BigQuery => {
27239 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
27240 let unit_ident =
27241 Expression::Column(Box::new(crate::expressions::Column {
27242 name: crate::expressions::Identifier::new(unit_str),
27243 table: None,
27244 join_mark: false,
27245 trailing_comments: Vec::new(),
27246 span: None,
27247 inferred_type: None,
27248 }));
27249 let mut args = vec![expr_arg, unit_ident];
27250 if let Some(tz) = tz_arg {
27251 args.push(tz);
27252 }
27253 Ok(Expression::Function(Box::new(Function::new(
27254 "TIMESTAMP_TRUNC".to_string(),
27255 args,
27256 ))))
27257 }
27258 DialectType::DuckDB => {
27259 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
27260 if let Some(tz) = tz_arg {
27261 let tz_str = match &tz {
27262 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; s.clone() },
27263 _ => "UTC".to_string(),
27264 };
27265 // x AT TIME ZONE 'tz'
27266 let at_tz = Expression::AtTimeZone(Box::new(
27267 crate::expressions::AtTimeZone {
27268 this: expr_arg,
27269 zone: Expression::string(&tz_str),
27270 },
27271 ));
27272 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
27273 let trunc = Expression::Function(Box::new(Function::new(
27274 "DATE_TRUNC".to_string(),
27275 vec![Expression::string(&unit_str), at_tz],
27276 )));
27277 // DATE_TRUNC(...) AT TIME ZONE 'tz'
27278 Ok(Expression::AtTimeZone(Box::new(
27279 crate::expressions::AtTimeZone {
27280 this: trunc,
27281 zone: Expression::string(&tz_str),
27282 },
27283 )))
27284 } else {
27285 Ok(Expression::Function(Box::new(Function::new(
27286 "DATE_TRUNC".to_string(),
27287 vec![Expression::string(&unit_str), expr_arg],
27288 ))))
27289 }
27290 }
27291 DialectType::Presto
27292 | DialectType::Trino
27293 | DialectType::Athena
27294 | DialectType::Snowflake => {
27295 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
27296 Ok(Expression::Function(Box::new(Function::new(
27297 "DATE_TRUNC".to_string(),
27298 vec![Expression::string(&unit_str), expr_arg],
27299 ))))
27300 }
27301 _ => {
27302 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
27303 let mut args = vec![Expression::string(&unit_str), expr_arg];
27304 if let Some(tz) = tz_arg {
27305 args.push(tz);
27306 }
27307 Ok(Expression::Function(Box::new(Function::new(
27308 "DATE_TRUNC".to_string(),
27309 args,
27310 ))))
27311 }
27312 }
27313 } else {
27314 Ok(Expression::Function(f))
27315 }
27316 } else {
27317 Ok(e)
27318 }
27319 }
27320
27321 Action::StrToDateConvert => {
27322 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
27323 if let Expression::Function(f) = e {
27324 if f.args.len() == 2 {
27325 let mut args = f.args;
27326 let this = args.remove(0);
27327 let fmt_expr = args.remove(0);
27328 let fmt_str = match &fmt_expr {
27329 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; Some(s.clone()) },
27330 _ => None,
27331 };
27332 let default_date = "%Y-%m-%d";
27333 let default_time = "%Y-%m-%d %H:%M:%S";
27334 let is_default = fmt_str
27335 .as_ref()
27336 .map_or(false, |f| f == default_date || f == default_time);
27337
27338 if is_default {
27339 // Default format: handle per-dialect
27340 match target {
27341 DialectType::MySQL
27342 | DialectType::Doris
27343 | DialectType::StarRocks => {
27344 // Keep STR_TO_DATE(x, fmt) as-is
27345 Ok(Expression::Function(Box::new(Function::new(
27346 "STR_TO_DATE".to_string(),
27347 vec![this, fmt_expr],
27348 ))))
27349 }
27350 DialectType::Hive => {
27351 // Hive: CAST(x AS DATE)
27352 Ok(Expression::Cast(Box::new(Cast {
27353 this,
27354 to: DataType::Date,
27355 double_colon_syntax: false,
27356 trailing_comments: Vec::new(),
27357 format: None,
27358 default: None,
27359 inferred_type: None,
27360 })))
27361 }
27362 DialectType::Presto
27363 | DialectType::Trino
27364 | DialectType::Athena => {
27365 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
27366 let date_parse =
27367 Expression::Function(Box::new(Function::new(
27368 "DATE_PARSE".to_string(),
27369 vec![this, fmt_expr],
27370 )));
27371 Ok(Expression::Cast(Box::new(Cast {
27372 this: date_parse,
27373 to: DataType::Date,
27374 double_colon_syntax: false,
27375 trailing_comments: Vec::new(),
27376 format: None,
27377 default: None,
27378 inferred_type: None,
27379 })))
27380 }
27381 _ => {
27382 // Others: TsOrDsToDate (delegates to generator)
27383 Ok(Expression::TsOrDsToDate(Box::new(
27384 crate::expressions::TsOrDsToDate {
27385 this: Box::new(this),
27386 format: None,
27387 safe: None,
27388 },
27389 )))
27390 }
27391 }
27392 } else if let Some(fmt) = fmt_str {
27393 match target {
27394 DialectType::Doris
27395 | DialectType::StarRocks
27396 | DialectType::MySQL => {
27397 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
27398 let mut normalized = fmt.clone();
27399 normalized = normalized.replace("%-d", "%e");
27400 normalized = normalized.replace("%-m", "%c");
27401 normalized = normalized.replace("%H:%M:%S", "%T");
27402 Ok(Expression::Function(Box::new(Function::new(
27403 "STR_TO_DATE".to_string(),
27404 vec![this, Expression::string(&normalized)],
27405 ))))
27406 }
27407 DialectType::Hive => {
27408 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
27409 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
27410 let unix_ts =
27411 Expression::Function(Box::new(Function::new(
27412 "UNIX_TIMESTAMP".to_string(),
27413 vec![this, Expression::string(&java_fmt)],
27414 )));
27415 let from_unix =
27416 Expression::Function(Box::new(Function::new(
27417 "FROM_UNIXTIME".to_string(),
27418 vec![unix_ts],
27419 )));
27420 Ok(Expression::Cast(Box::new(Cast {
27421 this: from_unix,
27422 to: DataType::Date,
27423 double_colon_syntax: false,
27424 trailing_comments: Vec::new(),
27425 format: None,
27426 default: None,
27427 inferred_type: None,
27428 })))
27429 }
27430 DialectType::Spark | DialectType::Databricks => {
27431 // Spark: TO_DATE(x, java_fmt)
27432 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
27433 Ok(Expression::Function(Box::new(Function::new(
27434 "TO_DATE".to_string(),
27435 vec![this, Expression::string(&java_fmt)],
27436 ))))
27437 }
27438 DialectType::Drill => {
27439 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
27440 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
27441 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
27442 let java_fmt = java_fmt.replace('T', "'T'");
27443 Ok(Expression::Function(Box::new(Function::new(
27444 "TO_DATE".to_string(),
27445 vec![this, Expression::string(&java_fmt)],
27446 ))))
27447 }
27448 _ => {
27449 // For other dialects: use TsOrDsToDate which delegates to generator
27450 Ok(Expression::TsOrDsToDate(Box::new(
27451 crate::expressions::TsOrDsToDate {
27452 this: Box::new(this),
27453 format: Some(fmt),
27454 safe: None,
27455 },
27456 )))
27457 }
27458 }
27459 } else {
27460 // Non-string format - keep as-is
27461 let mut new_args = Vec::new();
27462 new_args.push(this);
27463 new_args.push(fmt_expr);
27464 Ok(Expression::Function(Box::new(Function::new(
27465 "STR_TO_DATE".to_string(),
27466 new_args,
27467 ))))
27468 }
27469 } else {
27470 Ok(Expression::Function(f))
27471 }
27472 } else {
27473 Ok(e)
27474 }
27475 }
27476
27477 Action::TsOrDsAddConvert => {
27478 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
27479 if let Expression::Function(f) = e {
27480 if f.args.len() == 3 {
27481 let mut args = f.args;
27482 let x = args.remove(0);
27483 let n = args.remove(0);
27484 let unit_expr = args.remove(0);
27485 let unit_str = match &unit_expr {
27486 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; s.to_ascii_uppercase() },
27487 _ => "DAY".to_string(),
27488 };
27489
27490 match target {
27491 DialectType::Hive
27492 | DialectType::Spark
27493 | DialectType::Databricks => {
27494 // DATE_ADD(x, n) - only supports DAY unit
27495 Ok(Expression::Function(Box::new(Function::new(
27496 "DATE_ADD".to_string(),
27497 vec![x, n],
27498 ))))
27499 }
27500 DialectType::MySQL => {
27501 // DATE_ADD(x, INTERVAL n UNIT)
27502 let iu = match unit_str.as_str() {
27503 "YEAR" => crate::expressions::IntervalUnit::Year,
27504 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
27505 "MONTH" => crate::expressions::IntervalUnit::Month,
27506 "WEEK" => crate::expressions::IntervalUnit::Week,
27507 "HOUR" => crate::expressions::IntervalUnit::Hour,
27508 "MINUTE" => crate::expressions::IntervalUnit::Minute,
27509 "SECOND" => crate::expressions::IntervalUnit::Second,
27510 _ => crate::expressions::IntervalUnit::Day,
27511 };
27512 let interval = Expression::Interval(Box::new(
27513 crate::expressions::Interval {
27514 this: Some(n),
27515 unit: Some(
27516 crate::expressions::IntervalUnitSpec::Simple {
27517 unit: iu,
27518 use_plural: false,
27519 },
27520 ),
27521 },
27522 ));
27523 Ok(Expression::Function(Box::new(Function::new(
27524 "DATE_ADD".to_string(),
27525 vec![x, interval],
27526 ))))
27527 }
27528 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27529 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
27530 let cast_ts = Expression::Cast(Box::new(Cast {
27531 this: x,
27532 to: DataType::Timestamp {
27533 precision: None,
27534 timezone: false,
27535 },
27536 double_colon_syntax: false,
27537 trailing_comments: Vec::new(),
27538 format: None,
27539 default: None,
27540 inferred_type: None,
27541 }));
27542 let cast_date = Expression::Cast(Box::new(Cast {
27543 this: cast_ts,
27544 to: DataType::Date,
27545 double_colon_syntax: false,
27546 trailing_comments: Vec::new(),
27547 format: None,
27548 default: None,
27549 inferred_type: None,
27550 }));
27551 Ok(Expression::Function(Box::new(Function::new(
27552 "DATE_ADD".to_string(),
27553 vec![Expression::string(&unit_str), n, cast_date],
27554 ))))
27555 }
27556 DialectType::DuckDB => {
27557 // CAST(x AS DATE) + INTERVAL n UNIT
27558 let cast_date = Expression::Cast(Box::new(Cast {
27559 this: x,
27560 to: DataType::Date,
27561 double_colon_syntax: false,
27562 trailing_comments: Vec::new(),
27563 format: None,
27564 default: None,
27565 inferred_type: None,
27566 }));
27567 let iu = match unit_str.as_str() {
27568 "YEAR" => crate::expressions::IntervalUnit::Year,
27569 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
27570 "MONTH" => crate::expressions::IntervalUnit::Month,
27571 "WEEK" => crate::expressions::IntervalUnit::Week,
27572 "HOUR" => crate::expressions::IntervalUnit::Hour,
27573 "MINUTE" => crate::expressions::IntervalUnit::Minute,
27574 "SECOND" => crate::expressions::IntervalUnit::Second,
27575 _ => crate::expressions::IntervalUnit::Day,
27576 };
27577 let interval = Expression::Interval(Box::new(
27578 crate::expressions::Interval {
27579 this: Some(n),
27580 unit: Some(
27581 crate::expressions::IntervalUnitSpec::Simple {
27582 unit: iu,
27583 use_plural: false,
27584 },
27585 ),
27586 },
27587 ));
27588 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
27589 left: cast_date,
27590 right: interval,
27591 left_comments: Vec::new(),
27592 operator_comments: Vec::new(),
27593 trailing_comments: Vec::new(),
27594 inferred_type: None,
27595 })))
27596 }
27597 DialectType::Drill => {
27598 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
27599 let cast_date = Expression::Cast(Box::new(Cast {
27600 this: x,
27601 to: DataType::Date,
27602 double_colon_syntax: false,
27603 trailing_comments: Vec::new(),
27604 format: None,
27605 default: None,
27606 inferred_type: None,
27607 }));
27608 let iu = match unit_str.as_str() {
27609 "YEAR" => crate::expressions::IntervalUnit::Year,
27610 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
27611 "MONTH" => crate::expressions::IntervalUnit::Month,
27612 "WEEK" => crate::expressions::IntervalUnit::Week,
27613 "HOUR" => crate::expressions::IntervalUnit::Hour,
27614 "MINUTE" => crate::expressions::IntervalUnit::Minute,
27615 "SECOND" => crate::expressions::IntervalUnit::Second,
27616 _ => crate::expressions::IntervalUnit::Day,
27617 };
27618 let interval = Expression::Interval(Box::new(
27619 crate::expressions::Interval {
27620 this: Some(n),
27621 unit: Some(
27622 crate::expressions::IntervalUnitSpec::Simple {
27623 unit: iu,
27624 use_plural: false,
27625 },
27626 ),
27627 },
27628 ));
27629 Ok(Expression::Function(Box::new(Function::new(
27630 "DATE_ADD".to_string(),
27631 vec![cast_date, interval],
27632 ))))
27633 }
27634 _ => {
27635 // Default: keep as TS_OR_DS_ADD
27636 Ok(Expression::Function(Box::new(Function::new(
27637 "TS_OR_DS_ADD".to_string(),
27638 vec![x, n, unit_expr],
27639 ))))
27640 }
27641 }
27642 } else {
27643 Ok(Expression::Function(f))
27644 }
27645 } else {
27646 Ok(e)
27647 }
27648 }
27649
27650 Action::DateFromUnixDateConvert => {
27651 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
27652 if let Expression::Function(f) = e {
27653 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
27654 if matches!(
27655 target,
27656 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
27657 ) {
27658 return Ok(Expression::Function(Box::new(Function::new(
27659 "DATE_FROM_UNIX_DATE".to_string(),
27660 f.args,
27661 ))));
27662 }
27663 let n = f.args.into_iter().next().unwrap();
27664 let epoch_date = Expression::Cast(Box::new(Cast {
27665 this: Expression::string("1970-01-01"),
27666 to: DataType::Date,
27667 double_colon_syntax: false,
27668 trailing_comments: Vec::new(),
27669 format: None,
27670 default: None,
27671 inferred_type: None,
27672 }));
27673 match target {
27674 DialectType::DuckDB => {
27675 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
27676 let interval =
27677 Expression::Interval(Box::new(crate::expressions::Interval {
27678 this: Some(n),
27679 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27680 unit: crate::expressions::IntervalUnit::Day,
27681 use_plural: false,
27682 }),
27683 }));
27684 Ok(Expression::Add(Box::new(
27685 crate::expressions::BinaryOp::new(epoch_date, interval),
27686 )))
27687 }
27688 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27689 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
27690 Ok(Expression::Function(Box::new(Function::new(
27691 "DATE_ADD".to_string(),
27692 vec![Expression::string("DAY"), n, epoch_date],
27693 ))))
27694 }
27695 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
27696 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
27697 Ok(Expression::Function(Box::new(Function::new(
27698 "DATEADD".to_string(),
27699 vec![
27700 Expression::Identifier(Identifier::new("DAY")),
27701 n,
27702 epoch_date,
27703 ],
27704 ))))
27705 }
27706 DialectType::BigQuery => {
27707 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
27708 let interval =
27709 Expression::Interval(Box::new(crate::expressions::Interval {
27710 this: Some(n),
27711 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27712 unit: crate::expressions::IntervalUnit::Day,
27713 use_plural: false,
27714 }),
27715 }));
27716 Ok(Expression::Function(Box::new(Function::new(
27717 "DATE_ADD".to_string(),
27718 vec![epoch_date, interval],
27719 ))))
27720 }
27721 DialectType::MySQL
27722 | DialectType::Doris
27723 | DialectType::StarRocks
27724 | DialectType::Drill => {
27725 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
27726 let interval =
27727 Expression::Interval(Box::new(crate::expressions::Interval {
27728 this: Some(n),
27729 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
27730 unit: crate::expressions::IntervalUnit::Day,
27731 use_plural: false,
27732 }),
27733 }));
27734 Ok(Expression::Function(Box::new(Function::new(
27735 "DATE_ADD".to_string(),
27736 vec![epoch_date, interval],
27737 ))))
27738 }
27739 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
27740 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
27741 Ok(Expression::Function(Box::new(Function::new(
27742 "DATE_ADD".to_string(),
27743 vec![epoch_date, n],
27744 ))))
27745 }
27746 DialectType::PostgreSQL
27747 | DialectType::Materialize
27748 | DialectType::RisingWave => {
27749 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
27750 let n_str = match &n {
27751 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => { let Literal::Number(s) = lit.as_ref() else { unreachable!() }; s.clone() },
27752 _ => Self::expr_to_string_static(&n),
27753 };
27754 let interval =
27755 Expression::Interval(Box::new(crate::expressions::Interval {
27756 this: Some(Expression::string(&format!("{} DAY", n_str))),
27757 unit: None,
27758 }));
27759 Ok(Expression::Add(Box::new(
27760 crate::expressions::BinaryOp::new(epoch_date, interval),
27761 )))
27762 }
27763 _ => {
27764 // Default: keep as-is
27765 Ok(Expression::Function(Box::new(Function::new(
27766 "DATE_FROM_UNIX_DATE".to_string(),
27767 vec![n],
27768 ))))
27769 }
27770 }
27771 } else {
27772 Ok(e)
27773 }
27774 }
27775
27776 Action::ArrayRemoveConvert => {
27777 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
27778 if let Expression::ArrayRemove(bf) = e {
27779 let arr = bf.this;
27780 let target_val = bf.expression;
27781 match target {
27782 DialectType::DuckDB => {
27783 let u_id = crate::expressions::Identifier::new("_u");
27784 let lambda =
27785 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27786 parameters: vec![u_id.clone()],
27787 body: Expression::Neq(Box::new(BinaryOp {
27788 left: Expression::Identifier(u_id),
27789 right: target_val,
27790 left_comments: Vec::new(),
27791 operator_comments: Vec::new(),
27792 trailing_comments: Vec::new(),
27793 inferred_type: None,
27794 })),
27795 colon: false,
27796 parameter_types: Vec::new(),
27797 }));
27798 Ok(Expression::Function(Box::new(Function::new(
27799 "LIST_FILTER".to_string(),
27800 vec![arr, lambda],
27801 ))))
27802 }
27803 DialectType::ClickHouse => {
27804 let u_id = crate::expressions::Identifier::new("_u");
27805 let lambda =
27806 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27807 parameters: vec![u_id.clone()],
27808 body: Expression::Neq(Box::new(BinaryOp {
27809 left: Expression::Identifier(u_id),
27810 right: target_val,
27811 left_comments: Vec::new(),
27812 operator_comments: Vec::new(),
27813 trailing_comments: Vec::new(),
27814 inferred_type: None,
27815 })),
27816 colon: false,
27817 parameter_types: Vec::new(),
27818 }));
27819 Ok(Expression::Function(Box::new(Function::new(
27820 "arrayFilter".to_string(),
27821 vec![lambda, arr],
27822 ))))
27823 }
27824 DialectType::BigQuery => {
27825 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
27826 let u_id = crate::expressions::Identifier::new("_u");
27827 let u_col = Expression::Column(Box::new(crate::expressions::Column {
27828 name: u_id.clone(),
27829 table: None,
27830 join_mark: false,
27831 trailing_comments: Vec::new(),
27832 span: None,
27833 inferred_type: None,
27834 }));
27835 let unnest_expr =
27836 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
27837 this: arr,
27838 expressions: Vec::new(),
27839 with_ordinality: false,
27840 alias: None,
27841 offset_alias: None,
27842 }));
27843 let aliased_unnest =
27844 Expression::Alias(Box::new(crate::expressions::Alias {
27845 this: unnest_expr,
27846 alias: u_id.clone(),
27847 column_aliases: Vec::new(),
27848 pre_alias_comments: Vec::new(),
27849 trailing_comments: Vec::new(),
27850 inferred_type: None,
27851 }));
27852 let where_cond = Expression::Neq(Box::new(BinaryOp {
27853 left: u_col.clone(),
27854 right: target_val,
27855 left_comments: Vec::new(),
27856 operator_comments: Vec::new(),
27857 trailing_comments: Vec::new(),
27858 inferred_type: None,
27859 }));
27860 let subquery = Expression::Select(Box::new(
27861 crate::expressions::Select::new()
27862 .column(u_col)
27863 .from(aliased_unnest)
27864 .where_(where_cond),
27865 ));
27866 Ok(Expression::ArrayFunc(Box::new(
27867 crate::expressions::ArrayConstructor {
27868 expressions: vec![subquery],
27869 bracket_notation: false,
27870 use_list_keyword: false,
27871 },
27872 )))
27873 }
27874 _ => Ok(Expression::ArrayRemove(Box::new(
27875 crate::expressions::BinaryFunc {
27876 original_name: None,
27877 this: arr,
27878 expression: target_val,
27879 inferred_type: None,
27880 },
27881 ))),
27882 }
27883 } else {
27884 Ok(e)
27885 }
27886 }
27887
27888 Action::ArrayReverseConvert => {
27889 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
27890 if let Expression::ArrayReverse(af) = e {
27891 Ok(Expression::Function(Box::new(Function::new(
27892 "arrayReverse".to_string(),
27893 vec![af.this],
27894 ))))
27895 } else {
27896 Ok(e)
27897 }
27898 }
27899
27900 Action::JsonKeysConvert => {
27901 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
27902 if let Expression::JsonKeys(uf) = e {
27903 match target {
27904 DialectType::Spark | DialectType::Databricks => {
27905 Ok(Expression::Function(Box::new(Function::new(
27906 "JSON_OBJECT_KEYS".to_string(),
27907 vec![uf.this],
27908 ))))
27909 }
27910 DialectType::Snowflake => Ok(Expression::Function(Box::new(
27911 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
27912 ))),
27913 _ => Ok(Expression::JsonKeys(uf)),
27914 }
27915 } else {
27916 Ok(e)
27917 }
27918 }
27919
27920 Action::ParseJsonStrip => {
27921 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
27922 if let Expression::ParseJson(uf) = e {
27923 Ok(uf.this)
27924 } else {
27925 Ok(e)
27926 }
27927 }
27928
27929 Action::ArraySizeDrill => {
27930 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
27931 if let Expression::ArraySize(uf) = e {
27932 Ok(Expression::Function(Box::new(Function::new(
27933 "REPEATED_COUNT".to_string(),
27934 vec![uf.this],
27935 ))))
27936 } else {
27937 Ok(e)
27938 }
27939 }
27940
27941 Action::WeekOfYearToWeekIso => {
27942 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
27943 if let Expression::WeekOfYear(uf) = e {
27944 Ok(Expression::Function(Box::new(Function::new(
27945 "WEEKISO".to_string(),
27946 vec![uf.this],
27947 ))))
27948 } else {
27949 Ok(e)
27950 }
27951 }
27952
27953 }
27954 })
27955 }
27956
27957 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
27958 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
27959 use crate::expressions::Function;
27960 match unit {
27961 "DAY" => {
27962 // DATE(x)
27963 Ok(Expression::Function(Box::new(Function::new(
27964 "DATE".to_string(),
27965 vec![expr.clone()],
27966 ))))
27967 }
27968 "WEEK" => {
27969 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
27970 let year_x = Expression::Function(Box::new(Function::new(
27971 "YEAR".to_string(),
27972 vec![expr.clone()],
27973 )));
27974 let week_x = Expression::Function(Box::new(Function::new(
27975 "WEEK".to_string(),
27976 vec![expr.clone(), Expression::number(1)],
27977 )));
27978 let concat_args = vec![
27979 year_x,
27980 Expression::string(" "),
27981 week_x,
27982 Expression::string(" 1"),
27983 ];
27984 let concat = Expression::Function(Box::new(Function::new(
27985 "CONCAT".to_string(),
27986 concat_args,
27987 )));
27988 Ok(Expression::Function(Box::new(Function::new(
27989 "STR_TO_DATE".to_string(),
27990 vec![concat, Expression::string("%Y %u %w")],
27991 ))))
27992 }
27993 "MONTH" => {
27994 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
27995 let year_x = Expression::Function(Box::new(Function::new(
27996 "YEAR".to_string(),
27997 vec![expr.clone()],
27998 )));
27999 let month_x = Expression::Function(Box::new(Function::new(
28000 "MONTH".to_string(),
28001 vec![expr.clone()],
28002 )));
28003 let concat_args = vec![
28004 year_x,
28005 Expression::string(" "),
28006 month_x,
28007 Expression::string(" 1"),
28008 ];
28009 let concat = Expression::Function(Box::new(Function::new(
28010 "CONCAT".to_string(),
28011 concat_args,
28012 )));
28013 Ok(Expression::Function(Box::new(Function::new(
28014 "STR_TO_DATE".to_string(),
28015 vec![concat, Expression::string("%Y %c %e")],
28016 ))))
28017 }
28018 "QUARTER" => {
28019 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
28020 let year_x = Expression::Function(Box::new(Function::new(
28021 "YEAR".to_string(),
28022 vec![expr.clone()],
28023 )));
28024 let quarter_x = Expression::Function(Box::new(Function::new(
28025 "QUARTER".to_string(),
28026 vec![expr.clone()],
28027 )));
28028 // QUARTER(x) * 3 - 2
28029 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
28030 left: quarter_x,
28031 right: Expression::number(3),
28032 left_comments: Vec::new(),
28033 operator_comments: Vec::new(),
28034 trailing_comments: Vec::new(),
28035 inferred_type: None,
28036 }));
28037 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
28038 left: mul,
28039 right: Expression::number(2),
28040 left_comments: Vec::new(),
28041 operator_comments: Vec::new(),
28042 trailing_comments: Vec::new(),
28043 inferred_type: None,
28044 }));
28045 let concat_args = vec![
28046 year_x,
28047 Expression::string(" "),
28048 sub,
28049 Expression::string(" 1"),
28050 ];
28051 let concat = Expression::Function(Box::new(Function::new(
28052 "CONCAT".to_string(),
28053 concat_args,
28054 )));
28055 Ok(Expression::Function(Box::new(Function::new(
28056 "STR_TO_DATE".to_string(),
28057 vec![concat, Expression::string("%Y %c %e")],
28058 ))))
28059 }
28060 "YEAR" => {
28061 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
28062 let year_x = Expression::Function(Box::new(Function::new(
28063 "YEAR".to_string(),
28064 vec![expr.clone()],
28065 )));
28066 let concat_args = vec![year_x, Expression::string(" 1 1")];
28067 let concat = Expression::Function(Box::new(Function::new(
28068 "CONCAT".to_string(),
28069 concat_args,
28070 )));
28071 Ok(Expression::Function(Box::new(Function::new(
28072 "STR_TO_DATE".to_string(),
28073 vec![concat, Expression::string("%Y %c %e")],
28074 ))))
28075 }
28076 _ => {
28077 // Unsupported unit -> keep as DATE_TRUNC
28078 Ok(Expression::Function(Box::new(Function::new(
28079 "DATE_TRUNC".to_string(),
28080 vec![Expression::string(unit), expr.clone()],
28081 ))))
28082 }
28083 }
28084 }
28085
28086 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
28087 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
28088 use crate::expressions::DataType;
28089 match dt {
28090 DataType::VarChar { .. } | DataType::Char { .. } => true,
28091 DataType::Struct { fields, .. } => fields
28092 .iter()
28093 .any(|f| Self::has_varchar_char_type(&f.data_type)),
28094 _ => false,
28095 }
28096 }
28097
28098 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
28099 fn normalize_varchar_to_string(
28100 dt: crate::expressions::DataType,
28101 ) -> crate::expressions::DataType {
28102 use crate::expressions::DataType;
28103 match dt {
28104 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
28105 name: "STRING".to_string(),
28106 },
28107 DataType::Struct { fields, nested } => {
28108 let fields = fields
28109 .into_iter()
28110 .map(|mut f| {
28111 f.data_type = Self::normalize_varchar_to_string(f.data_type);
28112 f
28113 })
28114 .collect();
28115 DataType::Struct { fields, nested }
28116 }
28117 other => other,
28118 }
28119 }
28120
28121 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
28122 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
28123 if let Expression::Literal(ref lit) = expr {
28124 if let crate::expressions::Literal::String(ref s) = lit.as_ref() {
28125 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
28126 let trimmed = s.trim();
28127
28128 // Find where digits end and unit text begins
28129 let digit_end = trimmed
28130 .find(|c: char| !c.is_ascii_digit())
28131 .unwrap_or(trimmed.len());
28132 if digit_end == 0 || digit_end == trimmed.len() {
28133 return expr;
28134 }
28135 let num = &trimmed[..digit_end];
28136 let unit_text = trimmed[digit_end..].trim().to_ascii_uppercase();
28137 if unit_text.is_empty() {
28138 return expr;
28139 }
28140
28141 let known_units = [
28142 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS", "WEEK",
28143 "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
28144 ];
28145 if !known_units.contains(&unit_text.as_str()) {
28146 return expr;
28147 }
28148
28149 let unit_str = unit_text.clone();
28150 // Singularize
28151 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
28152 &unit_str[..unit_str.len() - 1]
28153 } else {
28154 &unit_str
28155 };
28156 let unit = unit_singular;
28157
28158 match target {
28159 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28160 // INTERVAL '2' DAY
28161 let iu = match unit {
28162 "DAY" => crate::expressions::IntervalUnit::Day,
28163 "HOUR" => crate::expressions::IntervalUnit::Hour,
28164 "MINUTE" => crate::expressions::IntervalUnit::Minute,
28165 "SECOND" => crate::expressions::IntervalUnit::Second,
28166 "WEEK" => crate::expressions::IntervalUnit::Week,
28167 "MONTH" => crate::expressions::IntervalUnit::Month,
28168 "YEAR" => crate::expressions::IntervalUnit::Year,
28169 _ => return expr,
28170 };
28171 return Expression::Interval(Box::new(crate::expressions::Interval {
28172 this: Some(Expression::string(num)),
28173 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28174 unit: iu,
28175 use_plural: false,
28176 }),
28177 }));
28178 }
28179 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
28180 // INTERVAL '2 DAYS'
28181 let plural = if num != "1" && !unit_str.ends_with('S') {
28182 format!("{} {}S", num, unit)
28183 } else if unit_str.ends_with('S') {
28184 format!("{} {}", num, unit_str)
28185 } else {
28186 format!("{} {}", num, unit)
28187 };
28188 return Expression::Interval(Box::new(crate::expressions::Interval {
28189 this: Some(Expression::string(&plural)),
28190 unit: None,
28191 }));
28192 }
28193 _ => {
28194 // Spark/Databricks/Hive: INTERVAL '1' DAY
28195 let iu = match unit {
28196 "DAY" => crate::expressions::IntervalUnit::Day,
28197 "HOUR" => crate::expressions::IntervalUnit::Hour,
28198 "MINUTE" => crate::expressions::IntervalUnit::Minute,
28199 "SECOND" => crate::expressions::IntervalUnit::Second,
28200 "WEEK" => crate::expressions::IntervalUnit::Week,
28201 "MONTH" => crate::expressions::IntervalUnit::Month,
28202 "YEAR" => crate::expressions::IntervalUnit::Year,
28203 _ => return expr,
28204 };
28205 return Expression::Interval(Box::new(crate::expressions::Interval {
28206 this: Some(Expression::string(num)),
28207 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28208 unit: iu,
28209 use_plural: false,
28210 }),
28211 }));
28212 }
28213 }
28214 }
28215 }
28216 // If it's already an INTERVAL expression, pass through
28217 expr
28218 }
28219
28220 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
28221 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
28222 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
28223 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
28224 fn rewrite_unnest_expansion(
28225 select: &crate::expressions::Select,
28226 target: DialectType,
28227 ) -> Option<crate::expressions::Select> {
28228 use crate::expressions::{
28229 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
28230 UnnestFunc,
28231 };
28232
28233 let index_offset: i64 = match target {
28234 DialectType::Presto | DialectType::Trino => 1,
28235 _ => 0, // BigQuery, Snowflake
28236 };
28237
28238 let if_func_name = match target {
28239 DialectType::Snowflake => "IFF",
28240 _ => "IF",
28241 };
28242
28243 let array_length_func = match target {
28244 DialectType::BigQuery => "ARRAY_LENGTH",
28245 DialectType::Presto | DialectType::Trino => "CARDINALITY",
28246 DialectType::Snowflake => "ARRAY_SIZE",
28247 _ => "ARRAY_LENGTH",
28248 };
28249
28250 let use_table_aliases = matches!(
28251 target,
28252 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
28253 );
28254 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
28255
28256 fn make_col(name: &str, table: Option<&str>) -> Expression {
28257 if let Some(tbl) = table {
28258 Expression::boxed_column(Column {
28259 name: Identifier::new(name.to_string()),
28260 table: Some(Identifier::new(tbl.to_string())),
28261 join_mark: false,
28262 trailing_comments: Vec::new(),
28263 span: None,
28264 inferred_type: None,
28265 })
28266 } else {
28267 Expression::Identifier(Identifier::new(name.to_string()))
28268 }
28269 }
28270
28271 fn make_join(this: Expression) -> Join {
28272 Join {
28273 this,
28274 on: None,
28275 using: Vec::new(),
28276 kind: JoinKind::Cross,
28277 use_inner_keyword: false,
28278 use_outer_keyword: false,
28279 deferred_condition: false,
28280 join_hint: None,
28281 match_condition: None,
28282 pivots: Vec::new(),
28283 comments: Vec::new(),
28284 nesting_group: 0,
28285 directed: false,
28286 }
28287 }
28288
28289 // Collect UNNEST info from SELECT expressions
28290 struct UnnestInfo {
28291 arr_expr: Expression,
28292 col_alias: String,
28293 pos_alias: String,
28294 source_alias: String,
28295 original_expr: Expression,
28296 has_outer_alias: Option<String>,
28297 }
28298
28299 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
28300 let mut col_counter = 0usize;
28301 let mut pos_counter = 1usize;
28302 let mut source_counter = 1usize;
28303
28304 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
28305 match expr {
28306 Expression::Unnest(u) => Some(u.this.clone()),
28307 Expression::Function(f)
28308 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
28309 {
28310 Some(f.args[0].clone())
28311 }
28312 Expression::Alias(a) => extract_unnest_arg(&a.this),
28313 Expression::Add(op)
28314 | Expression::Sub(op)
28315 | Expression::Mul(op)
28316 | Expression::Div(op) => {
28317 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
28318 }
28319 _ => None,
28320 }
28321 }
28322
28323 fn get_alias_name(expr: &Expression) -> Option<String> {
28324 if let Expression::Alias(a) = expr {
28325 Some(a.alias.name.clone())
28326 } else {
28327 None
28328 }
28329 }
28330
28331 for sel_expr in &select.expressions {
28332 if let Some(arr) = extract_unnest_arg(sel_expr) {
28333 col_counter += 1;
28334 pos_counter += 1;
28335 source_counter += 1;
28336
28337 let col_alias = if col_counter == 1 {
28338 "col".to_string()
28339 } else {
28340 format!("col_{}", col_counter)
28341 };
28342 let pos_alias = format!("pos_{}", pos_counter);
28343 let source_alias = format!("_u_{}", source_counter);
28344 let has_outer_alias = get_alias_name(sel_expr);
28345
28346 unnest_infos.push(UnnestInfo {
28347 arr_expr: arr,
28348 col_alias,
28349 pos_alias,
28350 source_alias,
28351 original_expr: sel_expr.clone(),
28352 has_outer_alias,
28353 });
28354 }
28355 }
28356
28357 if unnest_infos.is_empty() {
28358 return None;
28359 }
28360
28361 let series_alias = "pos".to_string();
28362 let series_source_alias = "_u".to_string();
28363 let tbl_ref = if use_table_aliases {
28364 Some(series_source_alias.as_str())
28365 } else {
28366 None
28367 };
28368
28369 // Build new SELECT expressions
28370 let mut new_select_exprs = Vec::new();
28371 for info in &unnest_infos {
28372 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
28373 let src_ref = if use_table_aliases {
28374 Some(info.source_alias.as_str())
28375 } else {
28376 None
28377 };
28378
28379 let pos_col = make_col(&series_alias, tbl_ref);
28380 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
28381 let col_ref = make_col(actual_col_name, src_ref);
28382
28383 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
28384 pos_col.clone(),
28385 unnest_pos_col.clone(),
28386 )));
28387 let mut if_args = vec![eq_cond, col_ref];
28388 if null_third_arg {
28389 if_args.push(Expression::Null(crate::expressions::Null));
28390 }
28391
28392 let if_expr =
28393 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
28394 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
28395
28396 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
28397 final_expr,
28398 Identifier::new(actual_col_name.clone()),
28399 ))));
28400 }
28401
28402 // Build array size expressions for GREATEST
28403 let size_exprs: Vec<Expression> = unnest_infos
28404 .iter()
28405 .map(|info| {
28406 Expression::Function(Box::new(Function::new(
28407 array_length_func.to_string(),
28408 vec![info.arr_expr.clone()],
28409 )))
28410 })
28411 .collect();
28412
28413 let greatest =
28414 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
28415
28416 let series_end = if index_offset == 0 {
28417 Expression::Sub(Box::new(BinaryOp::new(
28418 greatest,
28419 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
28420 )))
28421 } else {
28422 greatest
28423 };
28424
28425 // Build the position array source
28426 let series_unnest_expr = match target {
28427 DialectType::BigQuery => {
28428 let gen_array = Expression::Function(Box::new(Function::new(
28429 "GENERATE_ARRAY".to_string(),
28430 vec![
28431 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
28432 series_end,
28433 ],
28434 )));
28435 Expression::Unnest(Box::new(UnnestFunc {
28436 this: gen_array,
28437 expressions: Vec::new(),
28438 with_ordinality: false,
28439 alias: None,
28440 offset_alias: None,
28441 }))
28442 }
28443 DialectType::Presto | DialectType::Trino => {
28444 let sequence = Expression::Function(Box::new(Function::new(
28445 "SEQUENCE".to_string(),
28446 vec![
28447 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
28448 series_end,
28449 ],
28450 )));
28451 Expression::Unnest(Box::new(UnnestFunc {
28452 this: sequence,
28453 expressions: Vec::new(),
28454 with_ordinality: false,
28455 alias: None,
28456 offset_alias: None,
28457 }))
28458 }
28459 DialectType::Snowflake => {
28460 let range_end = Expression::Add(Box::new(BinaryOp::new(
28461 Expression::Paren(Box::new(crate::expressions::Paren {
28462 this: series_end,
28463 trailing_comments: Vec::new(),
28464 })),
28465 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
28466 )));
28467 let gen_range = Expression::Function(Box::new(Function::new(
28468 "ARRAY_GENERATE_RANGE".to_string(),
28469 vec![
28470 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
28471 range_end,
28472 ],
28473 )));
28474 let flatten_arg =
28475 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
28476 name: Identifier::new("INPUT".to_string()),
28477 value: gen_range,
28478 separator: crate::expressions::NamedArgSeparator::DArrow,
28479 }));
28480 let flatten = Expression::Function(Box::new(Function::new(
28481 "FLATTEN".to_string(),
28482 vec![flatten_arg],
28483 )));
28484 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
28485 }
28486 _ => return None,
28487 };
28488
28489 // Build series alias expression
28490 let series_alias_expr = if use_table_aliases {
28491 let col_aliases = if matches!(target, DialectType::Snowflake) {
28492 vec![
28493 Identifier::new("seq".to_string()),
28494 Identifier::new("key".to_string()),
28495 Identifier::new("path".to_string()),
28496 Identifier::new("index".to_string()),
28497 Identifier::new(series_alias.clone()),
28498 Identifier::new("this".to_string()),
28499 ]
28500 } else {
28501 vec![Identifier::new(series_alias.clone())]
28502 };
28503 Expression::Alias(Box::new(Alias {
28504 this: series_unnest_expr,
28505 alias: Identifier::new(series_source_alias.clone()),
28506 column_aliases: col_aliases,
28507 pre_alias_comments: Vec::new(),
28508 trailing_comments: Vec::new(),
28509 inferred_type: None,
28510 }))
28511 } else {
28512 Expression::Alias(Box::new(Alias::new(
28513 series_unnest_expr,
28514 Identifier::new(series_alias.clone()),
28515 )))
28516 };
28517
28518 // Build CROSS JOINs for each UNNEST
28519 let mut joins = Vec::new();
28520 for info in &unnest_infos {
28521 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
28522
28523 let unnest_join_expr = match target {
28524 DialectType::BigQuery => {
28525 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
28526 let unnest = UnnestFunc {
28527 this: info.arr_expr.clone(),
28528 expressions: Vec::new(),
28529 with_ordinality: true,
28530 alias: Some(Identifier::new(actual_col_name.clone())),
28531 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
28532 };
28533 Expression::Unnest(Box::new(unnest))
28534 }
28535 DialectType::Presto | DialectType::Trino => {
28536 let unnest = UnnestFunc {
28537 this: info.arr_expr.clone(),
28538 expressions: Vec::new(),
28539 with_ordinality: true,
28540 alias: None,
28541 offset_alias: None,
28542 };
28543 Expression::Alias(Box::new(Alias {
28544 this: Expression::Unnest(Box::new(unnest)),
28545 alias: Identifier::new(info.source_alias.clone()),
28546 column_aliases: vec![
28547 Identifier::new(actual_col_name.clone()),
28548 Identifier::new(info.pos_alias.clone()),
28549 ],
28550 pre_alias_comments: Vec::new(),
28551 trailing_comments: Vec::new(),
28552 inferred_type: None,
28553 }))
28554 }
28555 DialectType::Snowflake => {
28556 let flatten_arg =
28557 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
28558 name: Identifier::new("INPUT".to_string()),
28559 value: info.arr_expr.clone(),
28560 separator: crate::expressions::NamedArgSeparator::DArrow,
28561 }));
28562 let flatten = Expression::Function(Box::new(Function::new(
28563 "FLATTEN".to_string(),
28564 vec![flatten_arg],
28565 )));
28566 let table_fn = Expression::Function(Box::new(Function::new(
28567 "TABLE".to_string(),
28568 vec![flatten],
28569 )));
28570 Expression::Alias(Box::new(Alias {
28571 this: table_fn,
28572 alias: Identifier::new(info.source_alias.clone()),
28573 column_aliases: vec![
28574 Identifier::new("seq".to_string()),
28575 Identifier::new("key".to_string()),
28576 Identifier::new("path".to_string()),
28577 Identifier::new(info.pos_alias.clone()),
28578 Identifier::new(actual_col_name.clone()),
28579 Identifier::new("this".to_string()),
28580 ],
28581 pre_alias_comments: Vec::new(),
28582 trailing_comments: Vec::new(),
28583 inferred_type: None,
28584 }))
28585 }
28586 _ => return None,
28587 };
28588
28589 joins.push(make_join(unnest_join_expr));
28590 }
28591
28592 // Build WHERE clause
28593 let mut where_conditions: Vec<Expression> = Vec::new();
28594 for info in &unnest_infos {
28595 let src_ref = if use_table_aliases {
28596 Some(info.source_alias.as_str())
28597 } else {
28598 None
28599 };
28600 let pos_col = make_col(&series_alias, tbl_ref);
28601 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
28602
28603 let arr_size = Expression::Function(Box::new(Function::new(
28604 array_length_func.to_string(),
28605 vec![info.arr_expr.clone()],
28606 )));
28607
28608 let size_ref = if index_offset == 0 {
28609 Expression::Paren(Box::new(crate::expressions::Paren {
28610 this: Expression::Sub(Box::new(BinaryOp::new(
28611 arr_size,
28612 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
28613 ))),
28614 trailing_comments: Vec::new(),
28615 }))
28616 } else {
28617 arr_size
28618 };
28619
28620 let eq = Expression::Eq(Box::new(BinaryOp::new(
28621 pos_col.clone(),
28622 unnest_pos_col.clone(),
28623 )));
28624 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
28625 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
28626 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
28627 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
28628 this: and_cond,
28629 trailing_comments: Vec::new(),
28630 }));
28631 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
28632
28633 where_conditions.push(or_cond);
28634 }
28635
28636 let where_expr = if where_conditions.len() == 1 {
28637 // Single condition: no parens needed
28638 where_conditions.into_iter().next().unwrap()
28639 } else {
28640 // Multiple conditions: wrap each OR in parens, then combine with AND
28641 let wrap = |e: Expression| {
28642 Expression::Paren(Box::new(crate::expressions::Paren {
28643 this: e,
28644 trailing_comments: Vec::new(),
28645 }))
28646 };
28647 let mut iter = where_conditions.into_iter();
28648 let first = wrap(iter.next().unwrap());
28649 let second = wrap(iter.next().unwrap());
28650 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
28651 this: Expression::And(Box::new(BinaryOp::new(first, second))),
28652 trailing_comments: Vec::new(),
28653 }));
28654 for cond in iter {
28655 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
28656 }
28657 combined
28658 };
28659
28660 // Build the new SELECT
28661 let mut new_select = select.clone();
28662 new_select.expressions = new_select_exprs;
28663
28664 if new_select.from.is_some() {
28665 let mut all_joins = vec![make_join(series_alias_expr)];
28666 all_joins.extend(joins);
28667 new_select.joins.extend(all_joins);
28668 } else {
28669 new_select.from = Some(From {
28670 expressions: vec![series_alias_expr],
28671 });
28672 new_select.joins.extend(joins);
28673 }
28674
28675 if let Some(ref existing_where) = new_select.where_clause {
28676 let combined = Expression::And(Box::new(BinaryOp::new(
28677 existing_where.this.clone(),
28678 where_expr,
28679 )));
28680 new_select.where_clause = Some(crate::expressions::Where { this: combined });
28681 } else {
28682 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
28683 }
28684
28685 Some(new_select)
28686 }
28687
28688 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
28689 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
28690 match original {
28691 Expression::Unnest(_) => replacement.clone(),
28692 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
28693 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
28694 Expression::Add(op) => {
28695 let left = Self::replace_unnest_with_if(&op.left, replacement);
28696 let right = Self::replace_unnest_with_if(&op.right, replacement);
28697 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
28698 }
28699 Expression::Sub(op) => {
28700 let left = Self::replace_unnest_with_if(&op.left, replacement);
28701 let right = Self::replace_unnest_with_if(&op.right, replacement);
28702 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
28703 }
28704 Expression::Mul(op) => {
28705 let left = Self::replace_unnest_with_if(&op.left, replacement);
28706 let right = Self::replace_unnest_with_if(&op.right, replacement);
28707 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
28708 }
28709 Expression::Div(op) => {
28710 let left = Self::replace_unnest_with_if(&op.left, replacement);
28711 let right = Self::replace_unnest_with_if(&op.right, replacement);
28712 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
28713 }
28714 _ => original.clone(),
28715 }
28716 }
28717
28718 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
28719 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
28720 fn decompose_json_path(path: &str) -> Vec<String> {
28721 let mut parts = Vec::new();
28722 let path = if path.starts_with("$.") {
28723 &path[2..]
28724 } else if path.starts_with('$') {
28725 &path[1..]
28726 } else {
28727 path
28728 };
28729 if path.is_empty() {
28730 return parts;
28731 }
28732 let mut current = String::new();
28733 let chars: Vec<char> = path.chars().collect();
28734 let mut i = 0;
28735 while i < chars.len() {
28736 match chars[i] {
28737 '.' => {
28738 if !current.is_empty() {
28739 parts.push(current.clone());
28740 current.clear();
28741 }
28742 i += 1;
28743 }
28744 '[' => {
28745 if !current.is_empty() {
28746 parts.push(current.clone());
28747 current.clear();
28748 }
28749 i += 1;
28750 let mut bracket_content = String::new();
28751 while i < chars.len() && chars[i] != ']' {
28752 if chars[i] == '"' || chars[i] == '\'' {
28753 let quote = chars[i];
28754 i += 1;
28755 while i < chars.len() && chars[i] != quote {
28756 bracket_content.push(chars[i]);
28757 i += 1;
28758 }
28759 if i < chars.len() {
28760 i += 1;
28761 }
28762 } else {
28763 bracket_content.push(chars[i]);
28764 i += 1;
28765 }
28766 }
28767 if i < chars.len() {
28768 i += 1;
28769 }
28770 if bracket_content != "*" {
28771 parts.push(bracket_content);
28772 }
28773 }
28774 _ => {
28775 current.push(chars[i]);
28776 i += 1;
28777 }
28778 }
28779 }
28780 if !current.is_empty() {
28781 parts.push(current);
28782 }
28783 parts
28784 }
28785
28786 /// Strip `$` prefix from a JSON path, keeping the rest.
28787 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
28788 fn strip_json_dollar_prefix(path: &str) -> String {
28789 if path.starts_with("$.") {
28790 path[2..].to_string()
28791 } else if path.starts_with('$') {
28792 path[1..].to_string()
28793 } else {
28794 path.to_string()
28795 }
28796 }
28797
28798 /// Strip `[*]` wildcards from a JSON path.
28799 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
28800 fn strip_json_wildcards(path: &str) -> String {
28801 path.replace("[*]", "")
28802 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
28803 .trim_end_matches('.')
28804 .to_string()
28805 }
28806
28807 /// Convert bracket notation to dot notation for JSON paths.
28808 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
28809 fn bracket_to_dot_notation(path: &str) -> String {
28810 let mut result = String::new();
28811 let chars: Vec<char> = path.chars().collect();
28812 let mut i = 0;
28813 while i < chars.len() {
28814 if chars[i] == '[' {
28815 // Read bracket content
28816 i += 1;
28817 let mut bracket_content = String::new();
28818 let mut is_quoted = false;
28819 let mut _quote_char = '"';
28820 while i < chars.len() && chars[i] != ']' {
28821 if chars[i] == '"' || chars[i] == '\'' {
28822 is_quoted = true;
28823 _quote_char = chars[i];
28824 i += 1;
28825 while i < chars.len() && chars[i] != _quote_char {
28826 bracket_content.push(chars[i]);
28827 i += 1;
28828 }
28829 if i < chars.len() {
28830 i += 1;
28831 }
28832 } else {
28833 bracket_content.push(chars[i]);
28834 i += 1;
28835 }
28836 }
28837 if i < chars.len() {
28838 i += 1;
28839 } // skip ]
28840 if bracket_content == "*" {
28841 // Keep wildcard as-is
28842 result.push_str("[*]");
28843 } else if is_quoted {
28844 // Quoted bracket -> dot notation with quotes
28845 result.push('.');
28846 result.push('"');
28847 result.push_str(&bracket_content);
28848 result.push('"');
28849 } else {
28850 // Numeric index -> keep as bracket
28851 result.push('[');
28852 result.push_str(&bracket_content);
28853 result.push(']');
28854 }
28855 } else {
28856 result.push(chars[i]);
28857 i += 1;
28858 }
28859 }
28860 result
28861 }
28862
28863 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
28864 /// `$["a b"]` -> `$['a b']`
28865 fn bracket_to_single_quotes(path: &str) -> String {
28866 let mut result = String::new();
28867 let chars: Vec<char> = path.chars().collect();
28868 let mut i = 0;
28869 while i < chars.len() {
28870 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
28871 result.push('[');
28872 result.push('\'');
28873 i += 2; // skip [ and "
28874 while i < chars.len() && chars[i] != '"' {
28875 result.push(chars[i]);
28876 i += 1;
28877 }
28878 if i < chars.len() {
28879 i += 1;
28880 } // skip closing "
28881 result.push('\'');
28882 } else {
28883 result.push(chars[i]);
28884 i += 1;
28885 }
28886 }
28887 result
28888 }
28889
28890 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
28891 /// or PostgreSQL #temp -> TEMPORARY.
28892 /// Also strips # from INSERT INTO #table for non-TSQL targets.
28893 fn transform_select_into(
28894 expr: Expression,
28895 _source: DialectType,
28896 target: DialectType,
28897 ) -> Expression {
28898 use crate::expressions::{CreateTable, Expression, TableRef};
28899
28900 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
28901 if let Expression::Insert(ref insert) = expr {
28902 if insert.table.name.name.starts_with('#')
28903 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
28904 {
28905 let mut new_insert = insert.clone();
28906 new_insert.table.name.name =
28907 insert.table.name.name.trim_start_matches('#').to_string();
28908 return Expression::Insert(new_insert);
28909 }
28910 return expr;
28911 }
28912
28913 if let Expression::Select(ref select) = expr {
28914 if let Some(ref into) = select.into {
28915 let table_name_raw = match &into.this {
28916 Expression::Table(tr) => tr.name.name.clone(),
28917 Expression::Identifier(id) => id.name.clone(),
28918 _ => String::new(),
28919 };
28920 let is_temp = table_name_raw.starts_with('#') || into.temporary;
28921 let clean_name = table_name_raw.trim_start_matches('#').to_string();
28922
28923 match target {
28924 DialectType::DuckDB | DialectType::Snowflake => {
28925 // SELECT INTO -> CREATE TABLE AS SELECT
28926 let mut new_select = select.clone();
28927 new_select.into = None;
28928 let ct = CreateTable {
28929 name: TableRef::new(clean_name),
28930 on_cluster: None,
28931 columns: Vec::new(),
28932 constraints: Vec::new(),
28933 if_not_exists: false,
28934 temporary: is_temp,
28935 or_replace: false,
28936 table_modifier: None,
28937 as_select: Some(Expression::Select(new_select)),
28938 as_select_parenthesized: false,
28939 on_commit: None,
28940 clone_source: None,
28941 clone_at_clause: None,
28942 shallow_clone: false,
28943 is_copy: false,
28944 leading_comments: Vec::new(),
28945 with_properties: Vec::new(),
28946 teradata_post_name_options: Vec::new(),
28947 with_data: None,
28948 with_statistics: None,
28949 teradata_indexes: Vec::new(),
28950 with_cte: None,
28951 properties: Vec::new(),
28952 partition_of: None,
28953 post_table_properties: Vec::new(),
28954 mysql_table_options: Vec::new(),
28955 inherits: Vec::new(),
28956 on_property: None,
28957 copy_grants: false,
28958 using_template: None,
28959 rollup: None,
28960 };
28961 return Expression::CreateTable(Box::new(ct));
28962 }
28963 DialectType::PostgreSQL | DialectType::Redshift => {
28964 // PostgreSQL: #foo -> INTO TEMPORARY foo
28965 if is_temp && !into.temporary {
28966 let mut new_select = select.clone();
28967 let mut new_into = into.clone();
28968 new_into.temporary = true;
28969 new_into.unlogged = false;
28970 new_into.this = Expression::Table(Box::new(TableRef::new(clean_name)));
28971 new_select.into = Some(new_into);
28972 Expression::Select(new_select)
28973 } else {
28974 expr
28975 }
28976 }
28977 _ => expr,
28978 }
28979 } else {
28980 expr
28981 }
28982 } else {
28983 expr
28984 }
28985 }
28986
28987 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
28988 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
28989 fn transform_create_table_properties(
28990 ct: &mut crate::expressions::CreateTable,
28991 _source: DialectType,
28992 target: DialectType,
28993 ) {
28994 use crate::expressions::{
28995 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
28996 Properties,
28997 };
28998
28999 // Helper to convert a raw property value string to the correct Expression
29000 let value_to_expr = |v: &str| -> Expression {
29001 let trimmed = v.trim();
29002 // Check if it's a quoted string (starts and ends with ')
29003 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
29004 Expression::Literal(Box::new(Literal::String(trimmed[1..trimmed.len() - 1].to_string())))
29005 }
29006 // Check if it's a number
29007 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
29008 Expression::Literal(Box::new(Literal::Number(trimmed.to_string())))
29009 }
29010 // Check if it's ARRAY[...] or ARRAY(...)
29011 else if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
29012 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
29013 let inner = trimmed
29014 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
29015 .trim_start_matches('[')
29016 .trim_start_matches('(')
29017 .trim_end_matches(']')
29018 .trim_end_matches(')');
29019 let elements: Vec<Expression> = inner
29020 .split(',')
29021 .map(|e| {
29022 let elem = e.trim().trim_matches('\'');
29023 Expression::Literal(Box::new(Literal::String(elem.to_string())))
29024 })
29025 .collect();
29026 Expression::Function(Box::new(crate::expressions::Function::new(
29027 "ARRAY".to_string(),
29028 elements,
29029 )))
29030 }
29031 // Otherwise, just output as identifier (unquoted)
29032 else {
29033 Expression::Identifier(Identifier::new(trimmed.to_string()))
29034 }
29035 };
29036
29037 if ct.with_properties.is_empty() && ct.properties.is_empty() {
29038 return;
29039 }
29040
29041 // Handle Presto-style WITH properties
29042 if !ct.with_properties.is_empty() {
29043 // Extract FORMAT property and remaining properties
29044 let mut format_value: Option<String> = None;
29045 let mut partitioned_by: Option<String> = None;
29046 let mut other_props: Vec<(String, String)> = Vec::new();
29047
29048 for (key, value) in ct.with_properties.drain(..) {
29049 if key.eq_ignore_ascii_case("FORMAT") {
29050 // Strip surrounding quotes from value if present
29051 format_value = Some(value.trim_matches('\'').to_string());
29052 } else if key.eq_ignore_ascii_case("PARTITIONED_BY") {
29053 partitioned_by = Some(value);
29054 } else {
29055 other_props.push((key, value));
29056 }
29057 }
29058
29059 match target {
29060 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29061 // Presto: keep WITH properties but lowercase 'format' key
29062 if let Some(fmt) = format_value {
29063 ct.with_properties
29064 .push(("format".to_string(), format!("'{}'", fmt)));
29065 }
29066 if let Some(part) = partitioned_by {
29067 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
29068 let trimmed = part.trim();
29069 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
29070 // Also handle ARRAY['...'] format - keep as-is
29071 if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
29072 ct.with_properties
29073 .push(("PARTITIONED_BY".to_string(), part));
29074 } else {
29075 // Parse column names from the parenthesized list
29076 let cols: Vec<&str> = inner
29077 .split(',')
29078 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
29079 .collect();
29080 let array_val = format!(
29081 "ARRAY[{}]",
29082 cols.iter()
29083 .map(|c| format!("'{}'", c))
29084 .collect::<Vec<_>>()
29085 .join(", ")
29086 );
29087 ct.with_properties
29088 .push(("PARTITIONED_BY".to_string(), array_val));
29089 }
29090 }
29091 ct.with_properties.extend(other_props);
29092 }
29093 DialectType::Hive => {
29094 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
29095 if let Some(fmt) = format_value {
29096 ct.properties.push(Expression::FileFormatProperty(Box::new(
29097 FileFormatProperty {
29098 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
29099 expressions: vec![],
29100 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
29101 value: true,
29102 }))),
29103 },
29104 )));
29105 }
29106 if let Some(_part) = partitioned_by {
29107 // PARTITIONED_BY handling is complex - move columns to partitioned by
29108 // For now, the partition columns are extracted from the column list
29109 Self::apply_partitioned_by(ct, &_part, target);
29110 }
29111 if !other_props.is_empty() {
29112 let eq_exprs: Vec<Expression> = other_props
29113 .into_iter()
29114 .map(|(k, v)| {
29115 Expression::Eq(Box::new(BinaryOp::new(
29116 Expression::Literal(Box::new(Literal::String(k))),
29117 value_to_expr(&v),
29118 )))
29119 })
29120 .collect();
29121 ct.properties
29122 .push(Expression::Properties(Box::new(Properties {
29123 expressions: eq_exprs,
29124 })));
29125 }
29126 }
29127 DialectType::Spark | DialectType::Databricks => {
29128 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
29129 if let Some(fmt) = format_value {
29130 ct.properties.push(Expression::FileFormatProperty(Box::new(
29131 FileFormatProperty {
29132 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
29133 expressions: vec![],
29134 hive_format: None, // None means USING syntax
29135 },
29136 )));
29137 }
29138 if let Some(_part) = partitioned_by {
29139 Self::apply_partitioned_by(ct, &_part, target);
29140 }
29141 if !other_props.is_empty() {
29142 let eq_exprs: Vec<Expression> = other_props
29143 .into_iter()
29144 .map(|(k, v)| {
29145 Expression::Eq(Box::new(BinaryOp::new(
29146 Expression::Literal(Box::new(Literal::String(k))),
29147 value_to_expr(&v),
29148 )))
29149 })
29150 .collect();
29151 ct.properties
29152 .push(Expression::Properties(Box::new(Properties {
29153 expressions: eq_exprs,
29154 })));
29155 }
29156 }
29157 DialectType::DuckDB => {
29158 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
29159 // Keep nothing
29160 }
29161 _ => {
29162 // For other dialects, keep WITH properties as-is
29163 if let Some(fmt) = format_value {
29164 ct.with_properties
29165 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
29166 }
29167 if let Some(part) = partitioned_by {
29168 ct.with_properties
29169 .push(("PARTITIONED_BY".to_string(), part));
29170 }
29171 ct.with_properties.extend(other_props);
29172 }
29173 }
29174 }
29175
29176 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
29177 // and Hive STORED AS -> Presto WITH (format=...) conversion
29178 if !ct.properties.is_empty() {
29179 let is_presto_target = matches!(
29180 target,
29181 DialectType::Presto | DialectType::Trino | DialectType::Athena
29182 );
29183 let is_duckdb_target = matches!(target, DialectType::DuckDB);
29184
29185 if is_presto_target || is_duckdb_target {
29186 let mut new_properties = Vec::new();
29187 for prop in ct.properties.drain(..) {
29188 match &prop {
29189 Expression::FileFormatProperty(ffp) => {
29190 if is_presto_target {
29191 // Convert STORED AS/USING to WITH (format=...)
29192 if let Some(ref fmt_expr) = ffp.this {
29193 let fmt_str = match fmt_expr.as_ref() {
29194 Expression::Identifier(id) => id.name.clone(),
29195 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; s.clone() },
29196 _ => {
29197 new_properties.push(prop);
29198 continue;
29199 }
29200 };
29201 ct.with_properties
29202 .push(("format".to_string(), format!("'{}'", fmt_str)));
29203 }
29204 }
29205 // DuckDB: just strip file format properties
29206 }
29207 // Convert TBLPROPERTIES to WITH properties for Presto target
29208 Expression::Properties(props) if is_presto_target => {
29209 for expr in &props.expressions {
29210 if let Expression::Eq(eq) = expr {
29211 // Extract key and value from the Eq expression
29212 let key = match &eq.left {
29213 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; s.clone() },
29214 Expression::Identifier(id) => id.name.clone(),
29215 _ => continue,
29216 };
29217 let value = match &eq.right {
29218 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
29219 let Literal::String(s) = lit.as_ref() else { unreachable!() };
29220 format!("'{}'", s)
29221 }
29222 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => { let Literal::Number(n) = lit.as_ref() else { unreachable!() }; n.clone() },
29223 Expression::Identifier(id) => id.name.clone(),
29224 _ => continue,
29225 };
29226 ct.with_properties.push((key, value));
29227 }
29228 }
29229 }
29230 // Convert PartitionedByProperty for Presto target
29231 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
29232 // Check if it contains ColumnDef expressions (Hive-style with types)
29233 if let Expression::Tuple(ref tuple) = *pbp.this {
29234 let mut col_names: Vec<String> = Vec::new();
29235 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
29236 let mut has_col_defs = false;
29237 for expr in &tuple.expressions {
29238 if let Expression::ColumnDef(ref cd) = expr {
29239 has_col_defs = true;
29240 col_names.push(cd.name.name.clone());
29241 col_defs.push(*cd.clone());
29242 } else if let Expression::Column(ref col) = expr {
29243 col_names.push(col.name.name.clone());
29244 } else if let Expression::Identifier(ref id) = expr {
29245 col_names.push(id.name.clone());
29246 } else {
29247 // For function expressions like MONTHS(y), serialize to SQL
29248 let generic = Dialect::get(DialectType::Generic);
29249 if let Ok(sql) = generic.generate(expr) {
29250 col_names.push(sql);
29251 }
29252 }
29253 }
29254 if has_col_defs {
29255 // Merge partition column defs into the main column list
29256 for cd in col_defs {
29257 ct.columns.push(cd);
29258 }
29259 }
29260 if !col_names.is_empty() {
29261 // Add PARTITIONED_BY property
29262 let array_val = format!(
29263 "ARRAY[{}]",
29264 col_names
29265 .iter()
29266 .map(|n| format!("'{}'", n))
29267 .collect::<Vec<_>>()
29268 .join(", ")
29269 );
29270 ct.with_properties
29271 .push(("PARTITIONED_BY".to_string(), array_val));
29272 }
29273 }
29274 // Skip - don't keep in properties
29275 }
29276 _ => {
29277 if !is_duckdb_target {
29278 new_properties.push(prop);
29279 }
29280 }
29281 }
29282 }
29283 ct.properties = new_properties;
29284 } else {
29285 // For Hive/Spark targets, unquote format names in STORED AS
29286 for prop in &mut ct.properties {
29287 if let Expression::FileFormatProperty(ref mut ffp) = prop {
29288 if let Some(ref mut fmt_expr) = ffp.this {
29289 if let Expression::Literal(lit) = fmt_expr.as_ref() {
29290 if let Literal::String(s) = lit.as_ref() {
29291 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
29292 let unquoted = s.clone();
29293 *fmt_expr =
29294 Box::new(Expression::Identifier(Identifier::new(unquoted)));
29295 }
29296 }
29297 }
29298 }
29299 }
29300 }
29301 }
29302 }
29303
29304 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
29305 fn apply_partitioned_by(
29306 ct: &mut crate::expressions::CreateTable,
29307 partitioned_by_value: &str,
29308 target: DialectType,
29309 ) {
29310 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
29311
29312 // Parse the ARRAY['col1', 'col2'] value to extract column names
29313 let mut col_names: Vec<String> = Vec::new();
29314 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
29315 let inner = partitioned_by_value
29316 .trim()
29317 .trim_start_matches("ARRAY")
29318 .trim_start_matches('[')
29319 .trim_start_matches('(')
29320 .trim_end_matches(']')
29321 .trim_end_matches(')');
29322 for part in inner.split(',') {
29323 let col = part.trim().trim_matches('\'').trim_matches('"');
29324 if !col.is_empty() {
29325 col_names.push(col.to_string());
29326 }
29327 }
29328
29329 if col_names.is_empty() {
29330 return;
29331 }
29332
29333 if matches!(target, DialectType::Hive) {
29334 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
29335 let mut partition_col_defs = Vec::new();
29336 for col_name in &col_names {
29337 // Find and remove from columns
29338 if let Some(pos) = ct
29339 .columns
29340 .iter()
29341 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
29342 {
29343 let col_def = ct.columns.remove(pos);
29344 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
29345 }
29346 }
29347 if !partition_col_defs.is_empty() {
29348 ct.properties
29349 .push(Expression::PartitionedByProperty(Box::new(
29350 PartitionedByProperty {
29351 this: Box::new(Expression::Tuple(Box::new(Tuple {
29352 expressions: partition_col_defs,
29353 }))),
29354 },
29355 )));
29356 }
29357 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
29358 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
29359 // Use quoted identifiers to match the quoting style of the original column definitions
29360 let partition_exprs: Vec<Expression> = col_names
29361 .iter()
29362 .map(|name| {
29363 // Check if the column exists in the column list and use its quoting
29364 let is_quoted = ct
29365 .columns
29366 .iter()
29367 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
29368 let ident = if is_quoted {
29369 Identifier::quoted(name.clone())
29370 } else {
29371 Identifier::new(name.clone())
29372 };
29373 Expression::boxed_column(Column {
29374 name: ident,
29375 table: None,
29376 join_mark: false,
29377 trailing_comments: Vec::new(),
29378 span: None,
29379 inferred_type: None,
29380 })
29381 })
29382 .collect();
29383 ct.properties
29384 .push(Expression::PartitionedByProperty(Box::new(
29385 PartitionedByProperty {
29386 this: Box::new(Expression::Tuple(Box::new(Tuple {
29387 expressions: partition_exprs,
29388 }))),
29389 },
29390 )));
29391 }
29392 // DuckDB: strip partitioned_by entirely (already handled)
29393 }
29394
29395 /// Convert a DataType to Spark's type string format (using angle brackets)
29396 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
29397 use crate::expressions::DataType;
29398 match dt {
29399 DataType::Int { .. } => "INT".to_string(),
29400 DataType::BigInt { .. } => "BIGINT".to_string(),
29401 DataType::SmallInt { .. } => "SMALLINT".to_string(),
29402 DataType::TinyInt { .. } => "TINYINT".to_string(),
29403 DataType::Float { .. } => "FLOAT".to_string(),
29404 DataType::Double { .. } => "DOUBLE".to_string(),
29405 DataType::Decimal {
29406 precision: Some(p),
29407 scale: Some(s),
29408 } => format!("DECIMAL({}, {})", p, s),
29409 DataType::Decimal {
29410 precision: Some(p), ..
29411 } => format!("DECIMAL({})", p),
29412 DataType::Decimal { .. } => "DECIMAL".to_string(),
29413 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
29414 "STRING".to_string()
29415 }
29416 DataType::Char { .. } => "STRING".to_string(),
29417 DataType::Boolean => "BOOLEAN".to_string(),
29418 DataType::Date => "DATE".to_string(),
29419 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
29420 DataType::Json | DataType::JsonB => "STRING".to_string(),
29421 DataType::Binary { .. } => "BINARY".to_string(),
29422 DataType::Array { element_type, .. } => {
29423 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
29424 }
29425 DataType::Map {
29426 key_type,
29427 value_type,
29428 } => format!(
29429 "MAP<{}, {}>",
29430 Self::data_type_to_spark_string(key_type),
29431 Self::data_type_to_spark_string(value_type)
29432 ),
29433 DataType::Struct { fields, .. } => {
29434 let field_strs: Vec<String> = fields
29435 .iter()
29436 .map(|f| {
29437 if f.name.is_empty() {
29438 Self::data_type_to_spark_string(&f.data_type)
29439 } else {
29440 format!(
29441 "{}: {}",
29442 f.name,
29443 Self::data_type_to_spark_string(&f.data_type)
29444 )
29445 }
29446 })
29447 .collect();
29448 format!("STRUCT<{}>", field_strs.join(", "))
29449 }
29450 DataType::Custom { name } => name.clone(),
29451 _ => format!("{:?}", dt),
29452 }
29453 }
29454
29455 /// Extract value and unit from an Interval expression
29456 /// Returns (value_expression, IntervalUnit)
29457 fn extract_interval_parts(
29458 interval_expr: &Expression,
29459 ) -> (Expression, crate::expressions::IntervalUnit) {
29460 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
29461
29462 if let Expression::Interval(iv) = interval_expr {
29463 let val = iv.this.clone().unwrap_or(Expression::number(0));
29464 let unit = match &iv.unit {
29465 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
29466 None => {
29467 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
29468 if let Expression::Literal(lit) = &val {
29469 if let crate::expressions::Literal::String(s) = lit.as_ref() {
29470 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
29471 if parts.len() == 2 {
29472 let unit_str = parts[1].trim().to_ascii_uppercase();
29473 let parsed_unit = match unit_str.as_str() {
29474 "YEAR" | "YEARS" => IntervalUnit::Year,
29475 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
29476 "MONTH" | "MONTHS" => IntervalUnit::Month,
29477 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
29478 "DAY" | "DAYS" => IntervalUnit::Day,
29479 "HOUR" | "HOURS" => IntervalUnit::Hour,
29480 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
29481 "SECOND" | "SECONDS" => IntervalUnit::Second,
29482 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
29483 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
29484 _ => IntervalUnit::Day,
29485 };
29486 // Return just the numeric part as value and parsed unit
29487 return (
29488 Expression::Literal(Box::new(crate::expressions::Literal::String(
29489 parts[0].to_string(),
29490 ))),
29491 parsed_unit,
29492 );
29493 }
29494 IntervalUnit::Day
29495 } else { IntervalUnit::Day }
29496 } else {
29497 IntervalUnit::Day
29498 }
29499 }
29500 _ => IntervalUnit::Day,
29501 };
29502 (val, unit)
29503 } else {
29504 // Not an interval - pass through
29505 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
29506 }
29507 }
29508
29509 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
29510 fn normalize_bigquery_function(
29511 e: Expression,
29512 source: DialectType,
29513 target: DialectType,
29514 ) -> Result<Expression> {
29515 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
29516
29517 let f = if let Expression::Function(f) = e {
29518 *f
29519 } else {
29520 return Ok(e);
29521 };
29522 let name = f.name.to_ascii_uppercase();
29523 let mut args = f.args;
29524
29525 /// Helper to extract unit string from an identifier, column, or literal expression
29526 fn get_unit_str(expr: &Expression) -> String {
29527 match expr {
29528 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
29529 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; s.to_ascii_uppercase() },
29530 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
29531 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
29532 Expression::Function(f) => {
29533 let base = f.name.to_ascii_uppercase();
29534 if !f.args.is_empty() {
29535 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
29536 let inner = get_unit_str(&f.args[0]);
29537 format!("{}({})", base, inner)
29538 } else {
29539 base
29540 }
29541 }
29542 _ => "DAY".to_string(),
29543 }
29544 }
29545
29546 /// Parse unit string to IntervalUnit
29547 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
29548 match s {
29549 "YEAR" => crate::expressions::IntervalUnit::Year,
29550 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
29551 "MONTH" => crate::expressions::IntervalUnit::Month,
29552 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
29553 "DAY" => crate::expressions::IntervalUnit::Day,
29554 "HOUR" => crate::expressions::IntervalUnit::Hour,
29555 "MINUTE" => crate::expressions::IntervalUnit::Minute,
29556 "SECOND" => crate::expressions::IntervalUnit::Second,
29557 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
29558 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
29559 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
29560 _ => crate::expressions::IntervalUnit::Day,
29561 }
29562 }
29563
29564 match name.as_str() {
29565 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
29566 // (BigQuery: result = date1 - date2, Standard: result = end - start)
29567 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
29568 let date1 = args.remove(0);
29569 let date2 = args.remove(0);
29570 let unit_expr = args.remove(0);
29571 let unit_str = get_unit_str(&unit_expr);
29572
29573 if matches!(target, DialectType::BigQuery) {
29574 // BigQuery -> BigQuery: just uppercase the unit
29575 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
29576 return Ok(Expression::Function(Box::new(Function::new(
29577 f.name,
29578 vec![date1, date2, unit],
29579 ))));
29580 }
29581
29582 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
29583 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
29584 if matches!(target, DialectType::Snowflake) {
29585 return Ok(Expression::TimestampDiff(Box::new(
29586 crate::expressions::TimestampDiff {
29587 this: Box::new(date2),
29588 expression: Box::new(date1),
29589 unit: Some(unit_str),
29590 },
29591 )));
29592 }
29593
29594 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
29595 if matches!(target, DialectType::DuckDB) {
29596 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
29597 // CAST to TIME
29598 let cast_fn = |e: Expression| -> Expression {
29599 match e {
29600 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
29601 let Literal::String(s) = lit.as_ref() else { unreachable!() };
29602 Expression::Cast(Box::new(Cast {
29603 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
29604 to: DataType::Custom {
29605 name: "TIME".to_string(),
29606 },
29607 trailing_comments: vec![],
29608 double_colon_syntax: false,
29609 format: None,
29610 default: None,
29611 inferred_type: None,
29612 }))
29613 }
29614 other => other,
29615 }
29616 };
29617 (cast_fn(date1), cast_fn(date2))
29618 } else if name == "DATETIME_DIFF" {
29619 // CAST to TIMESTAMP
29620 (
29621 Self::ensure_cast_timestamp(date1),
29622 Self::ensure_cast_timestamp(date2),
29623 )
29624 } else {
29625 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
29626 (
29627 Self::ensure_cast_timestamptz(date1),
29628 Self::ensure_cast_timestamptz(date2),
29629 )
29630 };
29631 return Ok(Expression::Function(Box::new(Function::new(
29632 "DATE_DIFF".to_string(),
29633 vec![
29634 Expression::Literal(Box::new(Literal::String(unit_str))),
29635 cast_d2,
29636 cast_d1,
29637 ],
29638 ))));
29639 }
29640
29641 // Convert to standard TIMESTAMPDIFF(unit, start, end)
29642 let unit = Expression::Identifier(Identifier::new(unit_str));
29643 Ok(Expression::Function(Box::new(Function::new(
29644 "TIMESTAMPDIFF".to_string(),
29645 vec![unit, date2, date1],
29646 ))))
29647 }
29648
29649 // DATEDIFF(unit, start, end) -> target-specific form
29650 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
29651 "DATEDIFF" if args.len() == 3 => {
29652 let arg0 = args.remove(0);
29653 let arg1 = args.remove(0);
29654 let arg2 = args.remove(0);
29655 let unit_str = get_unit_str(&arg0);
29656
29657 // Redshift DATEDIFF(unit, start, end) order: result = end - start
29658 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
29659 // TSQL DATEDIFF(unit, start, end) order: result = end - start
29660
29661 if matches!(target, DialectType::Snowflake) {
29662 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
29663 let unit = Expression::Identifier(Identifier::new(unit_str));
29664 return Ok(Expression::Function(Box::new(Function::new(
29665 "DATEDIFF".to_string(),
29666 vec![unit, arg1, arg2],
29667 ))));
29668 }
29669
29670 if matches!(target, DialectType::DuckDB) {
29671 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
29672 let cast_d1 = Self::ensure_cast_timestamp(arg1);
29673 let cast_d2 = Self::ensure_cast_timestamp(arg2);
29674 return Ok(Expression::Function(Box::new(Function::new(
29675 "DATE_DIFF".to_string(),
29676 vec![
29677 Expression::Literal(Box::new(Literal::String(unit_str))),
29678 cast_d1,
29679 cast_d2,
29680 ],
29681 ))));
29682 }
29683
29684 if matches!(target, DialectType::BigQuery) {
29685 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
29686 let cast_d1 = Self::ensure_cast_datetime(arg1);
29687 let cast_d2 = Self::ensure_cast_datetime(arg2);
29688 let unit = Expression::Identifier(Identifier::new(unit_str));
29689 return Ok(Expression::Function(Box::new(Function::new(
29690 "DATE_DIFF".to_string(),
29691 vec![cast_d2, cast_d1, unit],
29692 ))));
29693 }
29694
29695 if matches!(target, DialectType::Spark | DialectType::Databricks) {
29696 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
29697 let unit = Expression::Identifier(Identifier::new(unit_str));
29698 return Ok(Expression::Function(Box::new(Function::new(
29699 "DATEDIFF".to_string(),
29700 vec![unit, arg1, arg2],
29701 ))));
29702 }
29703
29704 if matches!(target, DialectType::Hive) {
29705 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
29706 match unit_str.as_str() {
29707 "MONTH" => {
29708 return Ok(Expression::Function(Box::new(Function::new(
29709 "CAST".to_string(),
29710 vec![Expression::Function(Box::new(Function::new(
29711 "MONTHS_BETWEEN".to_string(),
29712 vec![arg2, arg1],
29713 )))],
29714 ))));
29715 }
29716 "WEEK" => {
29717 return Ok(Expression::Cast(Box::new(Cast {
29718 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
29719 Expression::Function(Box::new(Function::new(
29720 "DATEDIFF".to_string(),
29721 vec![arg2, arg1],
29722 ))),
29723 Expression::Literal(Box::new(Literal::Number("7".to_string()))),
29724 ))),
29725 to: DataType::Int {
29726 length: None,
29727 integer_spelling: false,
29728 },
29729 trailing_comments: vec![],
29730 double_colon_syntax: false,
29731 format: None,
29732 default: None,
29733 inferred_type: None,
29734 })));
29735 }
29736 _ => {
29737 // Default: DATEDIFF(end, start) for DAY
29738 return Ok(Expression::Function(Box::new(Function::new(
29739 "DATEDIFF".to_string(),
29740 vec![arg2, arg1],
29741 ))));
29742 }
29743 }
29744 }
29745
29746 if matches!(
29747 target,
29748 DialectType::Presto | DialectType::Trino | DialectType::Athena
29749 ) {
29750 // Presto/Trino: DATE_DIFF('UNIT', start, end)
29751 return Ok(Expression::Function(Box::new(Function::new(
29752 "DATE_DIFF".to_string(),
29753 vec![Expression::Literal(Box::new(Literal::String(unit_str))), arg1, arg2],
29754 ))));
29755 }
29756
29757 if matches!(target, DialectType::TSQL) {
29758 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
29759 let cast_d2 = Self::ensure_cast_datetime2(arg2);
29760 let unit = Expression::Identifier(Identifier::new(unit_str));
29761 return Ok(Expression::Function(Box::new(Function::new(
29762 "DATEDIFF".to_string(),
29763 vec![unit, arg1, cast_d2],
29764 ))));
29765 }
29766
29767 if matches!(target, DialectType::PostgreSQL) {
29768 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
29769 // For now, use DATEDIFF (passthrough) with uppercased unit
29770 let unit = Expression::Identifier(Identifier::new(unit_str));
29771 return Ok(Expression::Function(Box::new(Function::new(
29772 "DATEDIFF".to_string(),
29773 vec![unit, arg1, arg2],
29774 ))));
29775 }
29776
29777 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
29778 let unit = Expression::Identifier(Identifier::new(unit_str));
29779 Ok(Expression::Function(Box::new(Function::new(
29780 "DATEDIFF".to_string(),
29781 vec![unit, arg1, arg2],
29782 ))))
29783 }
29784
29785 // DATE_DIFF(date1, date2, unit) -> standard form
29786 "DATE_DIFF" if args.len() == 3 => {
29787 let date1 = args.remove(0);
29788 let date2 = args.remove(0);
29789 let unit_expr = args.remove(0);
29790 let unit_str = get_unit_str(&unit_expr);
29791
29792 if matches!(target, DialectType::BigQuery) {
29793 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
29794 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
29795 "WEEK".to_string()
29796 } else {
29797 unit_str
29798 };
29799 let norm_d1 = Self::date_literal_to_cast(date1);
29800 let norm_d2 = Self::date_literal_to_cast(date2);
29801 let unit = Expression::Identifier(Identifier::new(norm_unit));
29802 return Ok(Expression::Function(Box::new(Function::new(
29803 f.name,
29804 vec![norm_d1, norm_d2, unit],
29805 ))));
29806 }
29807
29808 if matches!(target, DialectType::MySQL) {
29809 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
29810 let norm_d1 = Self::date_literal_to_cast(date1);
29811 let norm_d2 = Self::date_literal_to_cast(date2);
29812 return Ok(Expression::Function(Box::new(Function::new(
29813 "DATEDIFF".to_string(),
29814 vec![norm_d1, norm_d2],
29815 ))));
29816 }
29817
29818 if matches!(target, DialectType::StarRocks) {
29819 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
29820 let norm_d1 = Self::date_literal_to_cast(date1);
29821 let norm_d2 = Self::date_literal_to_cast(date2);
29822 return Ok(Expression::Function(Box::new(Function::new(
29823 "DATE_DIFF".to_string(),
29824 vec![
29825 Expression::Literal(Box::new(Literal::String(unit_str))),
29826 norm_d1,
29827 norm_d2,
29828 ],
29829 ))));
29830 }
29831
29832 if matches!(target, DialectType::DuckDB) {
29833 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
29834 let norm_d1 = Self::ensure_cast_date(date1);
29835 let norm_d2 = Self::ensure_cast_date(date2);
29836
29837 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
29838 let is_week_variant = unit_str == "WEEK"
29839 || unit_str.starts_with("WEEK(")
29840 || unit_str == "ISOWEEK";
29841 if is_week_variant {
29842 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
29843 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
29844 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
29845 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
29846 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
29847 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
29848 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
29849 Some("1") // Shift Sunday to Monday alignment
29850 } else if unit_str == "WEEK(SATURDAY)" {
29851 Some("-5")
29852 } else if unit_str == "WEEK(TUESDAY)" {
29853 Some("-1")
29854 } else if unit_str == "WEEK(WEDNESDAY)" {
29855 Some("-2")
29856 } else if unit_str == "WEEK(THURSDAY)" {
29857 Some("-3")
29858 } else if unit_str == "WEEK(FRIDAY)" {
29859 Some("-4")
29860 } else {
29861 Some("1") // default to Sunday
29862 };
29863
29864 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
29865 let shifted = if let Some(off) = offset {
29866 let interval =
29867 Expression::Interval(Box::new(crate::expressions::Interval {
29868 this: Some(Expression::Literal(Box::new(Literal::String(
29869 off.to_string(),
29870 )))),
29871 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29872 unit: crate::expressions::IntervalUnit::Day,
29873 use_plural: false,
29874 }),
29875 }));
29876 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
29877 date, interval,
29878 )))
29879 } else {
29880 date
29881 };
29882 Expression::Function(Box::new(Function::new(
29883 "DATE_TRUNC".to_string(),
29884 vec![
29885 Expression::Literal(Box::new(Literal::String("WEEK".to_string()))),
29886 shifted,
29887 ],
29888 )))
29889 };
29890
29891 let trunc_d2 = make_trunc(norm_d2, day_offset);
29892 let trunc_d1 = make_trunc(norm_d1, day_offset);
29893 return Ok(Expression::Function(Box::new(Function::new(
29894 "DATE_DIFF".to_string(),
29895 vec![
29896 Expression::Literal(Box::new(Literal::String("WEEK".to_string()))),
29897 trunc_d2,
29898 trunc_d1,
29899 ],
29900 ))));
29901 }
29902
29903 return Ok(Expression::Function(Box::new(Function::new(
29904 "DATE_DIFF".to_string(),
29905 vec![
29906 Expression::Literal(Box::new(Literal::String(unit_str))),
29907 norm_d2,
29908 norm_d1,
29909 ],
29910 ))));
29911 }
29912
29913 // Default: DATEDIFF(unit, date2, date1)
29914 let unit = Expression::Identifier(Identifier::new(unit_str));
29915 Ok(Expression::Function(Box::new(Function::new(
29916 "DATEDIFF".to_string(),
29917 vec![unit, date2, date1],
29918 ))))
29919 }
29920
29921 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
29922 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
29923 let ts = args.remove(0);
29924 let interval_expr = args.remove(0);
29925 let (val, unit) = Self::extract_interval_parts(&interval_expr);
29926
29927 match target {
29928 DialectType::Snowflake => {
29929 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
29930 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
29931 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
29932 let unit_str = Self::interval_unit_to_string(&unit);
29933 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
29934 Ok(Expression::TimestampAdd(Box::new(
29935 crate::expressions::TimestampAdd {
29936 this: Box::new(val),
29937 expression: Box::new(cast_ts),
29938 unit: Some(unit_str.to_string()),
29939 },
29940 )))
29941 }
29942 DialectType::Spark | DialectType::Databricks => {
29943 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
29944 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
29945 let interval =
29946 Expression::Interval(Box::new(crate::expressions::Interval {
29947 this: Some(val),
29948 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
29949 unit,
29950 use_plural: false,
29951 }),
29952 }));
29953 Ok(Expression::Add(Box::new(
29954 crate::expressions::BinaryOp::new(ts, interval),
29955 )))
29956 } else if name == "DATETIME_ADD"
29957 && matches!(target, DialectType::Databricks)
29958 {
29959 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
29960 let unit_str = Self::interval_unit_to_string(&unit);
29961 Ok(Expression::Function(Box::new(Function::new(
29962 "TIMESTAMPADD".to_string(),
29963 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
29964 ))))
29965 } else {
29966 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
29967 let unit_str = Self::interval_unit_to_string(&unit);
29968 let cast_ts =
29969 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
29970 Self::maybe_cast_ts(ts)
29971 } else {
29972 ts
29973 };
29974 Ok(Expression::Function(Box::new(Function::new(
29975 "DATE_ADD".to_string(),
29976 vec![
29977 Expression::Identifier(Identifier::new(unit_str)),
29978 val,
29979 cast_ts,
29980 ],
29981 ))))
29982 }
29983 }
29984 DialectType::MySQL => {
29985 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
29986 let mysql_ts = if name.starts_with("TIMESTAMP") {
29987 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
29988 match &ts {
29989 Expression::Function(ref inner_f)
29990 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
29991 {
29992 // Already wrapped, keep as-is
29993 ts
29994 }
29995 _ => {
29996 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
29997 let unwrapped = match ts {
29998 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
29999 let Literal::Timestamp(s) = lit.as_ref() else { unreachable!() };
30000 Expression::Literal(Box::new(Literal::String(s.clone())))
30001 }
30002 other => other,
30003 };
30004 Expression::Function(Box::new(Function::new(
30005 "TIMESTAMP".to_string(),
30006 vec![unwrapped],
30007 )))
30008 }
30009 }
30010 } else {
30011 ts
30012 };
30013 Ok(Expression::DateAdd(Box::new(
30014 crate::expressions::DateAddFunc {
30015 this: mysql_ts,
30016 interval: val,
30017 unit,
30018 },
30019 )))
30020 }
30021 _ => {
30022 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
30023 let cast_ts = if matches!(target, DialectType::DuckDB) {
30024 if name == "DATETIME_ADD" {
30025 Self::ensure_cast_timestamp(ts)
30026 } else if name.starts_with("TIMESTAMP") {
30027 Self::maybe_cast_ts_to_tz(ts, &name)
30028 } else {
30029 ts
30030 }
30031 } else {
30032 ts
30033 };
30034 Ok(Expression::DateAdd(Box::new(
30035 crate::expressions::DateAddFunc {
30036 this: cast_ts,
30037 interval: val,
30038 unit,
30039 },
30040 )))
30041 }
30042 }
30043 }
30044
30045 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
30046 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
30047 let ts = args.remove(0);
30048 let interval_expr = args.remove(0);
30049 let (val, unit) = Self::extract_interval_parts(&interval_expr);
30050
30051 match target {
30052 DialectType::Snowflake => {
30053 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
30054 let unit_str = Self::interval_unit_to_string(&unit);
30055 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
30056 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
30057 val,
30058 Expression::Neg(Box::new(crate::expressions::UnaryOp {
30059 this: Expression::number(1),
30060 inferred_type: None,
30061 })),
30062 )));
30063 Ok(Expression::TimestampAdd(Box::new(
30064 crate::expressions::TimestampAdd {
30065 this: Box::new(neg_val),
30066 expression: Box::new(cast_ts),
30067 unit: Some(unit_str.to_string()),
30068 },
30069 )))
30070 }
30071 DialectType::Spark | DialectType::Databricks => {
30072 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
30073 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
30074 {
30075 // Spark: ts - INTERVAL val UNIT
30076 let cast_ts = if name.starts_with("TIMESTAMP") {
30077 Self::maybe_cast_ts(ts)
30078 } else {
30079 ts
30080 };
30081 let interval =
30082 Expression::Interval(Box::new(crate::expressions::Interval {
30083 this: Some(val),
30084 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30085 unit,
30086 use_plural: false,
30087 }),
30088 }));
30089 Ok(Expression::Sub(Box::new(
30090 crate::expressions::BinaryOp::new(cast_ts, interval),
30091 )))
30092 } else {
30093 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
30094 let unit_str = Self::interval_unit_to_string(&unit);
30095 let neg_val =
30096 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
30097 val,
30098 Expression::Neg(Box::new(crate::expressions::UnaryOp {
30099 this: Expression::number(1),
30100 inferred_type: None,
30101 })),
30102 )));
30103 Ok(Expression::Function(Box::new(Function::new(
30104 "TIMESTAMPADD".to_string(),
30105 vec![
30106 Expression::Identifier(Identifier::new(unit_str)),
30107 neg_val,
30108 ts,
30109 ],
30110 ))))
30111 }
30112 }
30113 DialectType::MySQL => {
30114 let mysql_ts = if name.starts_with("TIMESTAMP") {
30115 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
30116 match &ts {
30117 Expression::Function(ref inner_f)
30118 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
30119 {
30120 // Already wrapped, keep as-is
30121 ts
30122 }
30123 _ => {
30124 let unwrapped = match ts {
30125 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
30126 let Literal::Timestamp(s) = lit.as_ref() else { unreachable!() };
30127 Expression::Literal(Box::new(Literal::String(s.clone())))
30128 }
30129 other => other,
30130 };
30131 Expression::Function(Box::new(Function::new(
30132 "TIMESTAMP".to_string(),
30133 vec![unwrapped],
30134 )))
30135 }
30136 }
30137 } else {
30138 ts
30139 };
30140 Ok(Expression::DateSub(Box::new(
30141 crate::expressions::DateAddFunc {
30142 this: mysql_ts,
30143 interval: val,
30144 unit,
30145 },
30146 )))
30147 }
30148 _ => {
30149 let cast_ts = if matches!(target, DialectType::DuckDB) {
30150 if name == "DATETIME_SUB" {
30151 Self::ensure_cast_timestamp(ts)
30152 } else if name.starts_with("TIMESTAMP") {
30153 Self::maybe_cast_ts_to_tz(ts, &name)
30154 } else {
30155 ts
30156 }
30157 } else {
30158 ts
30159 };
30160 Ok(Expression::DateSub(Box::new(
30161 crate::expressions::DateAddFunc {
30162 this: cast_ts,
30163 interval: val,
30164 unit,
30165 },
30166 )))
30167 }
30168 }
30169 }
30170
30171 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
30172 "DATE_SUB" if args.len() == 2 => {
30173 let date = args.remove(0);
30174 let interval_expr = args.remove(0);
30175 let (val, unit) = Self::extract_interval_parts(&interval_expr);
30176
30177 match target {
30178 DialectType::Databricks | DialectType::Spark => {
30179 // Databricks/Spark: DATE_ADD(date, -val)
30180 // Use DateAdd expression with negative val so it generates correctly
30181 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
30182 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
30183 // Instead, we directly output as a simple negated DateSub
30184 Ok(Expression::DateSub(Box::new(
30185 crate::expressions::DateAddFunc {
30186 this: date,
30187 interval: val,
30188 unit,
30189 },
30190 )))
30191 }
30192 DialectType::DuckDB => {
30193 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
30194 let cast_date = Self::ensure_cast_date(date);
30195 let interval =
30196 Expression::Interval(Box::new(crate::expressions::Interval {
30197 this: Some(val),
30198 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30199 unit,
30200 use_plural: false,
30201 }),
30202 }));
30203 Ok(Expression::Sub(Box::new(
30204 crate::expressions::BinaryOp::new(cast_date, interval),
30205 )))
30206 }
30207 DialectType::Snowflake => {
30208 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
30209 // Just ensure the date is cast properly
30210 let cast_date = Self::ensure_cast_date(date);
30211 Ok(Expression::DateSub(Box::new(
30212 crate::expressions::DateAddFunc {
30213 this: cast_date,
30214 interval: val,
30215 unit,
30216 },
30217 )))
30218 }
30219 DialectType::PostgreSQL => {
30220 // PostgreSQL: date - INTERVAL 'val UNIT'
30221 let unit_str = Self::interval_unit_to_string(&unit);
30222 let interval =
30223 Expression::Interval(Box::new(crate::expressions::Interval {
30224 this: Some(Expression::Literal(Box::new(Literal::String(format!(
30225 "{} {}",
30226 Self::expr_to_string(&val),
30227 unit_str
30228 ))))),
30229 unit: None,
30230 }));
30231 Ok(Expression::Sub(Box::new(
30232 crate::expressions::BinaryOp::new(date, interval),
30233 )))
30234 }
30235 _ => Ok(Expression::DateSub(Box::new(
30236 crate::expressions::DateAddFunc {
30237 this: date,
30238 interval: val,
30239 unit,
30240 },
30241 ))),
30242 }
30243 }
30244
30245 // DATEADD(unit, val, date) -> target-specific form
30246 // Used by: Redshift, Snowflake, TSQL, ClickHouse
30247 "DATEADD" if args.len() == 3 => {
30248 let arg0 = args.remove(0);
30249 let arg1 = args.remove(0);
30250 let arg2 = args.remove(0);
30251 let unit_str = get_unit_str(&arg0);
30252
30253 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
30254 // Keep DATEADD(UNIT, val, date) with uppercased unit
30255 let unit = Expression::Identifier(Identifier::new(unit_str));
30256 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
30257 let date = if matches!(target, DialectType::TSQL)
30258 && !matches!(
30259 source,
30260 DialectType::Spark | DialectType::Databricks | DialectType::Hive
30261 ) {
30262 Self::ensure_cast_datetime2(arg2)
30263 } else {
30264 arg2
30265 };
30266 return Ok(Expression::Function(Box::new(Function::new(
30267 "DATEADD".to_string(),
30268 vec![unit, arg1, date],
30269 ))));
30270 }
30271
30272 if matches!(target, DialectType::DuckDB) {
30273 // DuckDB: date + INTERVAL 'val' UNIT
30274 let iu = parse_interval_unit(&unit_str);
30275 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
30276 this: Some(arg1),
30277 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30278 unit: iu,
30279 use_plural: false,
30280 }),
30281 }));
30282 let cast_date = Self::ensure_cast_timestamp(arg2);
30283 return Ok(Expression::Add(Box::new(
30284 crate::expressions::BinaryOp::new(cast_date, interval),
30285 )));
30286 }
30287
30288 if matches!(target, DialectType::BigQuery) {
30289 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
30290 let iu = parse_interval_unit(&unit_str);
30291 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
30292 this: Some(arg1),
30293 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30294 unit: iu,
30295 use_plural: false,
30296 }),
30297 }));
30298 return Ok(Expression::Function(Box::new(Function::new(
30299 "DATE_ADD".to_string(),
30300 vec![arg2, interval],
30301 ))));
30302 }
30303
30304 if matches!(target, DialectType::Databricks) {
30305 // Databricks: keep DATEADD(UNIT, val, date) format
30306 let unit = Expression::Identifier(Identifier::new(unit_str));
30307 return Ok(Expression::Function(Box::new(Function::new(
30308 "DATEADD".to_string(),
30309 vec![unit, arg1, arg2],
30310 ))));
30311 }
30312
30313 if matches!(target, DialectType::Spark) {
30314 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
30315 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
30316 if let Expression::Literal(lit) = &expr {
30317 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
30318 if let Ok(val) = n.parse::<i64>() {
30319 return Expression::Literal(Box::new(crate::expressions::Literal::Number(
30320 (val * factor).to_string(),
30321 )));
30322 }
30323 }
30324 }
30325 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
30326 expr,
30327 Expression::Literal(Box::new(crate::expressions::Literal::Number(
30328 factor.to_string(),
30329 ))),
30330 )))
30331 }
30332 match unit_str.as_str() {
30333 "YEAR" => {
30334 let months = multiply_expr_dateadd(arg1, 12);
30335 return Ok(Expression::Function(Box::new(Function::new(
30336 "ADD_MONTHS".to_string(),
30337 vec![arg2, months],
30338 ))));
30339 }
30340 "QUARTER" => {
30341 let months = multiply_expr_dateadd(arg1, 3);
30342 return Ok(Expression::Function(Box::new(Function::new(
30343 "ADD_MONTHS".to_string(),
30344 vec![arg2, months],
30345 ))));
30346 }
30347 "MONTH" => {
30348 return Ok(Expression::Function(Box::new(Function::new(
30349 "ADD_MONTHS".to_string(),
30350 vec![arg2, arg1],
30351 ))));
30352 }
30353 "WEEK" => {
30354 let days = multiply_expr_dateadd(arg1, 7);
30355 return Ok(Expression::Function(Box::new(Function::new(
30356 "DATE_ADD".to_string(),
30357 vec![arg2, days],
30358 ))));
30359 }
30360 "DAY" => {
30361 return Ok(Expression::Function(Box::new(Function::new(
30362 "DATE_ADD".to_string(),
30363 vec![arg2, arg1],
30364 ))));
30365 }
30366 _ => {
30367 let unit = Expression::Identifier(Identifier::new(unit_str));
30368 return Ok(Expression::Function(Box::new(Function::new(
30369 "DATE_ADD".to_string(),
30370 vec![unit, arg1, arg2],
30371 ))));
30372 }
30373 }
30374 }
30375
30376 if matches!(target, DialectType::Hive) {
30377 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
30378 match unit_str.as_str() {
30379 "DAY" => {
30380 return Ok(Expression::Function(Box::new(Function::new(
30381 "DATE_ADD".to_string(),
30382 vec![arg2, arg1],
30383 ))));
30384 }
30385 "MONTH" => {
30386 return Ok(Expression::Function(Box::new(Function::new(
30387 "ADD_MONTHS".to_string(),
30388 vec![arg2, arg1],
30389 ))));
30390 }
30391 _ => {
30392 let iu = parse_interval_unit(&unit_str);
30393 let interval =
30394 Expression::Interval(Box::new(crate::expressions::Interval {
30395 this: Some(arg1),
30396 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30397 unit: iu,
30398 use_plural: false,
30399 }),
30400 }));
30401 return Ok(Expression::Add(Box::new(
30402 crate::expressions::BinaryOp::new(arg2, interval),
30403 )));
30404 }
30405 }
30406 }
30407
30408 if matches!(target, DialectType::PostgreSQL) {
30409 // PostgreSQL: date + INTERVAL 'val UNIT'
30410 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
30411 this: Some(Expression::Literal(Box::new(Literal::String(format!(
30412 "{} {}",
30413 Self::expr_to_string(&arg1),
30414 unit_str
30415 ))))),
30416 unit: None,
30417 }));
30418 return Ok(Expression::Add(Box::new(
30419 crate::expressions::BinaryOp::new(arg2, interval),
30420 )));
30421 }
30422
30423 if matches!(
30424 target,
30425 DialectType::Presto | DialectType::Trino | DialectType::Athena
30426 ) {
30427 // Presto/Trino: DATE_ADD('UNIT', val, date)
30428 return Ok(Expression::Function(Box::new(Function::new(
30429 "DATE_ADD".to_string(),
30430 vec![Expression::Literal(Box::new(Literal::String(unit_str))), arg1, arg2],
30431 ))));
30432 }
30433
30434 if matches!(target, DialectType::ClickHouse) {
30435 // ClickHouse: DATE_ADD(UNIT, val, date)
30436 let unit = Expression::Identifier(Identifier::new(unit_str));
30437 return Ok(Expression::Function(Box::new(Function::new(
30438 "DATE_ADD".to_string(),
30439 vec![unit, arg1, arg2],
30440 ))));
30441 }
30442
30443 // Default: keep DATEADD with uppercased unit
30444 let unit = Expression::Identifier(Identifier::new(unit_str));
30445 Ok(Expression::Function(Box::new(Function::new(
30446 "DATEADD".to_string(),
30447 vec![unit, arg1, arg2],
30448 ))))
30449 }
30450
30451 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
30452 "DATE_ADD" if args.len() == 3 => {
30453 let arg0 = args.remove(0);
30454 let arg1 = args.remove(0);
30455 let arg2 = args.remove(0);
30456 let unit_str = get_unit_str(&arg0);
30457
30458 if matches!(
30459 target,
30460 DialectType::Presto | DialectType::Trino | DialectType::Athena
30461 ) {
30462 // Presto/Trino: DATE_ADD('UNIT', val, date)
30463 return Ok(Expression::Function(Box::new(Function::new(
30464 "DATE_ADD".to_string(),
30465 vec![Expression::Literal(Box::new(Literal::String(unit_str))), arg1, arg2],
30466 ))));
30467 }
30468
30469 if matches!(
30470 target,
30471 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
30472 ) {
30473 // DATEADD(UNIT, val, date)
30474 let unit = Expression::Identifier(Identifier::new(unit_str));
30475 let date = if matches!(target, DialectType::TSQL) {
30476 Self::ensure_cast_datetime2(arg2)
30477 } else {
30478 arg2
30479 };
30480 return Ok(Expression::Function(Box::new(Function::new(
30481 "DATEADD".to_string(),
30482 vec![unit, arg1, date],
30483 ))));
30484 }
30485
30486 if matches!(target, DialectType::DuckDB) {
30487 // DuckDB: date + INTERVAL val UNIT
30488 let iu = parse_interval_unit(&unit_str);
30489 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
30490 this: Some(arg1),
30491 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30492 unit: iu,
30493 use_plural: false,
30494 }),
30495 }));
30496 return Ok(Expression::Add(Box::new(
30497 crate::expressions::BinaryOp::new(arg2, interval),
30498 )));
30499 }
30500
30501 if matches!(target, DialectType::Spark | DialectType::Databricks) {
30502 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
30503 let unit = Expression::Identifier(Identifier::new(unit_str));
30504 return Ok(Expression::Function(Box::new(Function::new(
30505 "DATE_ADD".to_string(),
30506 vec![unit, arg1, arg2],
30507 ))));
30508 }
30509
30510 // Default: DATE_ADD(UNIT, val, date)
30511 let unit = Expression::Identifier(Identifier::new(unit_str));
30512 Ok(Expression::Function(Box::new(Function::new(
30513 "DATE_ADD".to_string(),
30514 vec![unit, arg1, arg2],
30515 ))))
30516 }
30517
30518 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
30519 "DATE_ADD" if args.len() == 2 => {
30520 let date = args.remove(0);
30521 let interval_expr = args.remove(0);
30522 let (val, unit) = Self::extract_interval_parts(&interval_expr);
30523 let unit_str = Self::interval_unit_to_string(&unit);
30524
30525 match target {
30526 DialectType::DuckDB => {
30527 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
30528 let cast_date = Self::ensure_cast_date(date);
30529 let quoted_val = Self::quote_interval_val(&val);
30530 let interval =
30531 Expression::Interval(Box::new(crate::expressions::Interval {
30532 this: Some(quoted_val),
30533 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30534 unit,
30535 use_plural: false,
30536 }),
30537 }));
30538 Ok(Expression::Add(Box::new(
30539 crate::expressions::BinaryOp::new(cast_date, interval),
30540 )))
30541 }
30542 DialectType::PostgreSQL => {
30543 // PostgreSQL: date + INTERVAL 'val UNIT'
30544 let interval =
30545 Expression::Interval(Box::new(crate::expressions::Interval {
30546 this: Some(Expression::Literal(Box::new(Literal::String(format!(
30547 "{} {}",
30548 Self::expr_to_string(&val),
30549 unit_str
30550 ))))),
30551 unit: None,
30552 }));
30553 Ok(Expression::Add(Box::new(
30554 crate::expressions::BinaryOp::new(date, interval),
30555 )))
30556 }
30557 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30558 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
30559 let val_str = Self::expr_to_string(&val);
30560 Ok(Expression::Function(Box::new(Function::new(
30561 "DATE_ADD".to_string(),
30562 vec![
30563 Expression::Literal(Box::new(Literal::String(unit_str.to_string()))),
30564 Expression::Cast(Box::new(Cast {
30565 this: Expression::Literal(Box::new(Literal::String(val_str))),
30566 to: DataType::BigInt { length: None },
30567 trailing_comments: vec![],
30568 double_colon_syntax: false,
30569 format: None,
30570 default: None,
30571 inferred_type: None,
30572 })),
30573 date,
30574 ],
30575 ))))
30576 }
30577 DialectType::Spark | DialectType::Hive => {
30578 // Spark/Hive: DATE_ADD(date, val) for DAY
30579 match unit_str {
30580 "DAY" => Ok(Expression::Function(Box::new(Function::new(
30581 "DATE_ADD".to_string(),
30582 vec![date, val],
30583 )))),
30584 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
30585 "ADD_MONTHS".to_string(),
30586 vec![date, val],
30587 )))),
30588 _ => {
30589 let iu = parse_interval_unit(&unit_str);
30590 let interval =
30591 Expression::Interval(Box::new(crate::expressions::Interval {
30592 this: Some(val),
30593 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30594 unit: iu,
30595 use_plural: false,
30596 }),
30597 }));
30598 Ok(Expression::Function(Box::new(Function::new(
30599 "DATE_ADD".to_string(),
30600 vec![date, interval],
30601 ))))
30602 }
30603 }
30604 }
30605 DialectType::Snowflake => {
30606 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
30607 let cast_date = Self::ensure_cast_date(date);
30608 let val_str = Self::expr_to_string(&val);
30609 Ok(Expression::Function(Box::new(Function::new(
30610 "DATEADD".to_string(),
30611 vec![
30612 Expression::Identifier(Identifier::new(unit_str)),
30613 Expression::Literal(Box::new(Literal::String(val_str))),
30614 cast_date,
30615 ],
30616 ))))
30617 }
30618 DialectType::TSQL | DialectType::Fabric => {
30619 let cast_date = Self::ensure_cast_datetime2(date);
30620 Ok(Expression::Function(Box::new(Function::new(
30621 "DATEADD".to_string(),
30622 vec![
30623 Expression::Identifier(Identifier::new(unit_str)),
30624 val,
30625 cast_date,
30626 ],
30627 ))))
30628 }
30629 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
30630 "DATEADD".to_string(),
30631 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
30632 )))),
30633 DialectType::MySQL => {
30634 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
30635 let quoted_val = Self::quote_interval_val(&val);
30636 let iu = parse_interval_unit(&unit_str);
30637 let interval =
30638 Expression::Interval(Box::new(crate::expressions::Interval {
30639 this: Some(quoted_val),
30640 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30641 unit: iu,
30642 use_plural: false,
30643 }),
30644 }));
30645 Ok(Expression::Function(Box::new(Function::new(
30646 "DATE_ADD".to_string(),
30647 vec![date, interval],
30648 ))))
30649 }
30650 DialectType::BigQuery => {
30651 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
30652 let quoted_val = Self::quote_interval_val(&val);
30653 let iu = parse_interval_unit(&unit_str);
30654 let interval =
30655 Expression::Interval(Box::new(crate::expressions::Interval {
30656 this: Some(quoted_val),
30657 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30658 unit: iu,
30659 use_plural: false,
30660 }),
30661 }));
30662 Ok(Expression::Function(Box::new(Function::new(
30663 "DATE_ADD".to_string(),
30664 vec![date, interval],
30665 ))))
30666 }
30667 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
30668 "DATEADD".to_string(),
30669 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
30670 )))),
30671 _ => {
30672 // Default: keep as DATE_ADD with decomposed interval
30673 Ok(Expression::DateAdd(Box::new(
30674 crate::expressions::DateAddFunc {
30675 this: date,
30676 interval: val,
30677 unit,
30678 },
30679 )))
30680 }
30681 }
30682 }
30683
30684 // ADD_MONTHS(date, val) -> target-specific form
30685 "ADD_MONTHS" if args.len() == 2 => {
30686 let date = args.remove(0);
30687 let val = args.remove(0);
30688
30689 if matches!(target, DialectType::TSQL) {
30690 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
30691 let cast_date = Self::ensure_cast_datetime2(date);
30692 return Ok(Expression::Function(Box::new(Function::new(
30693 "DATEADD".to_string(),
30694 vec![
30695 Expression::Identifier(Identifier::new("MONTH")),
30696 val,
30697 cast_date,
30698 ],
30699 ))));
30700 }
30701
30702 if matches!(target, DialectType::DuckDB) {
30703 // DuckDB: date + INTERVAL val MONTH
30704 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
30705 this: Some(val),
30706 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30707 unit: crate::expressions::IntervalUnit::Month,
30708 use_plural: false,
30709 }),
30710 }));
30711 return Ok(Expression::Add(Box::new(
30712 crate::expressions::BinaryOp::new(date, interval),
30713 )));
30714 }
30715
30716 if matches!(target, DialectType::Snowflake) {
30717 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
30718 if matches!(source, DialectType::Snowflake) {
30719 return Ok(Expression::Function(Box::new(Function::new(
30720 "ADD_MONTHS".to_string(),
30721 vec![date, val],
30722 ))));
30723 }
30724 return Ok(Expression::Function(Box::new(Function::new(
30725 "DATEADD".to_string(),
30726 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
30727 ))));
30728 }
30729
30730 if matches!(target, DialectType::Spark | DialectType::Databricks) {
30731 // Spark: ADD_MONTHS(date, val) - keep as is
30732 return Ok(Expression::Function(Box::new(Function::new(
30733 "ADD_MONTHS".to_string(),
30734 vec![date, val],
30735 ))));
30736 }
30737
30738 if matches!(target, DialectType::Hive) {
30739 return Ok(Expression::Function(Box::new(Function::new(
30740 "ADD_MONTHS".to_string(),
30741 vec![date, val],
30742 ))));
30743 }
30744
30745 if matches!(
30746 target,
30747 DialectType::Presto | DialectType::Trino | DialectType::Athena
30748 ) {
30749 // Presto: DATE_ADD('MONTH', val, date)
30750 return Ok(Expression::Function(Box::new(Function::new(
30751 "DATE_ADD".to_string(),
30752 vec![
30753 Expression::Literal(Box::new(Literal::String("MONTH".to_string()))),
30754 val,
30755 date,
30756 ],
30757 ))));
30758 }
30759
30760 // Default: keep ADD_MONTHS
30761 Ok(Expression::Function(Box::new(Function::new(
30762 "ADD_MONTHS".to_string(),
30763 vec![date, val],
30764 ))))
30765 }
30766
30767 // SAFE_DIVIDE(x, y) -> target-specific form directly
30768 "SAFE_DIVIDE" if args.len() == 2 => {
30769 let x = args.remove(0);
30770 let y = args.remove(0);
30771 // Wrap x and y in parens if they're complex expressions
30772 let y_ref = match &y {
30773 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
30774 y.clone()
30775 }
30776 _ => Expression::Paren(Box::new(Paren {
30777 this: y.clone(),
30778 trailing_comments: vec![],
30779 })),
30780 };
30781 let x_ref = match &x {
30782 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
30783 x.clone()
30784 }
30785 _ => Expression::Paren(Box::new(Paren {
30786 this: x.clone(),
30787 trailing_comments: vec![],
30788 })),
30789 };
30790 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
30791 y_ref.clone(),
30792 Expression::number(0),
30793 )));
30794 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
30795 x_ref.clone(),
30796 y_ref.clone(),
30797 )));
30798
30799 match target {
30800 DialectType::DuckDB | DialectType::PostgreSQL => {
30801 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
30802 let result_div = if matches!(target, DialectType::PostgreSQL) {
30803 let cast_x = Expression::Cast(Box::new(Cast {
30804 this: x_ref,
30805 to: DataType::Custom {
30806 name: "DOUBLE PRECISION".to_string(),
30807 },
30808 trailing_comments: vec![],
30809 double_colon_syntax: false,
30810 format: None,
30811 default: None,
30812 inferred_type: None,
30813 }));
30814 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
30815 cast_x, y_ref,
30816 )))
30817 } else {
30818 div_expr
30819 };
30820 Ok(Expression::Case(Box::new(crate::expressions::Case {
30821 operand: None,
30822 whens: vec![(condition, result_div)],
30823 else_: Some(Expression::Null(crate::expressions::Null)),
30824 comments: Vec::new(),
30825 inferred_type: None,
30826 })))
30827 }
30828 DialectType::Snowflake => {
30829 // IFF(y <> 0, x / y, NULL)
30830 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
30831 condition,
30832 true_value: div_expr,
30833 false_value: Some(Expression::Null(crate::expressions::Null)),
30834 original_name: Some("IFF".to_string()),
30835 inferred_type: None,
30836 })))
30837 }
30838 DialectType::Presto | DialectType::Trino => {
30839 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
30840 let cast_x = Expression::Cast(Box::new(Cast {
30841 this: x_ref,
30842 to: DataType::Double {
30843 precision: None,
30844 scale: None,
30845 },
30846 trailing_comments: vec![],
30847 double_colon_syntax: false,
30848 format: None,
30849 default: None,
30850 inferred_type: None,
30851 }));
30852 let cast_div = Expression::Div(Box::new(
30853 crate::expressions::BinaryOp::new(cast_x, y_ref),
30854 ));
30855 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
30856 condition,
30857 true_value: cast_div,
30858 false_value: Some(Expression::Null(crate::expressions::Null)),
30859 original_name: None,
30860 inferred_type: None,
30861 })))
30862 }
30863 _ => {
30864 // IF(y <> 0, x / y, NULL)
30865 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
30866 condition,
30867 true_value: div_expr,
30868 false_value: Some(Expression::Null(crate::expressions::Null)),
30869 original_name: None,
30870 inferred_type: None,
30871 })))
30872 }
30873 }
30874 }
30875
30876 // GENERATE_UUID() -> UUID() with CAST to string
30877 "GENERATE_UUID" => {
30878 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
30879 this: None,
30880 name: None,
30881 is_string: None,
30882 }));
30883 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
30884 let cast_type = match target {
30885 DialectType::DuckDB => Some(DataType::Text),
30886 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
30887 length: None,
30888 parenthesized_length: false,
30889 }),
30890 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
30891 Some(DataType::String { length: None })
30892 }
30893 _ => None,
30894 };
30895 if let Some(dt) = cast_type {
30896 Ok(Expression::Cast(Box::new(Cast {
30897 this: uuid_expr,
30898 to: dt,
30899 trailing_comments: vec![],
30900 double_colon_syntax: false,
30901 format: None,
30902 default: None,
30903 inferred_type: None,
30904 })))
30905 } else {
30906 Ok(uuid_expr)
30907 }
30908 }
30909
30910 // COUNTIF(x) -> CountIf expression
30911 "COUNTIF" if args.len() == 1 => {
30912 let arg = args.remove(0);
30913 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
30914 this: arg,
30915 distinct: false,
30916 filter: None,
30917 order_by: vec![],
30918 name: None,
30919 ignore_nulls: None,
30920 having_max: None,
30921 limit: None,
30922 inferred_type: None,
30923 })))
30924 }
30925
30926 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
30927 "EDIT_DISTANCE" => {
30928 // Strip named arguments (max_distance => N) and pass as positional
30929 let mut positional_args: Vec<Expression> = vec![];
30930 for arg in args {
30931 match arg {
30932 Expression::NamedArgument(na) => {
30933 positional_args.push(na.value);
30934 }
30935 other => positional_args.push(other),
30936 }
30937 }
30938 if positional_args.len() >= 2 {
30939 let col1 = positional_args.remove(0);
30940 let col2 = positional_args.remove(0);
30941 let levenshtein = crate::expressions::BinaryFunc {
30942 this: col1,
30943 expression: col2,
30944 original_name: None,
30945 inferred_type: None,
30946 };
30947 // Pass extra args through a function wrapper with all args
30948 if !positional_args.is_empty() {
30949 let max_dist = positional_args.remove(0);
30950 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
30951 if matches!(target, DialectType::DuckDB) {
30952 let lev = Expression::Function(Box::new(Function::new(
30953 "LEVENSHTEIN".to_string(),
30954 vec![levenshtein.this, levenshtein.expression],
30955 )));
30956 let lev_is_null =
30957 Expression::IsNull(Box::new(crate::expressions::IsNull {
30958 this: lev.clone(),
30959 not: false,
30960 postfix_form: false,
30961 }));
30962 let max_is_null =
30963 Expression::IsNull(Box::new(crate::expressions::IsNull {
30964 this: max_dist.clone(),
30965 not: false,
30966 postfix_form: false,
30967 }));
30968 let null_check =
30969 Expression::Or(Box::new(crate::expressions::BinaryOp {
30970 left: lev_is_null,
30971 right: max_is_null,
30972 left_comments: Vec::new(),
30973 operator_comments: Vec::new(),
30974 trailing_comments: Vec::new(),
30975 inferred_type: None,
30976 }));
30977 let least =
30978 Expression::Least(Box::new(crate::expressions::VarArgFunc {
30979 expressions: vec![lev, max_dist],
30980 original_name: None,
30981 inferred_type: None,
30982 }));
30983 return Ok(Expression::Case(Box::new(crate::expressions::Case {
30984 operand: None,
30985 whens: vec![(
30986 null_check,
30987 Expression::Null(crate::expressions::Null),
30988 )],
30989 else_: Some(least),
30990 comments: Vec::new(),
30991 inferred_type: None,
30992 })));
30993 }
30994 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
30995 all_args.extend(positional_args);
30996 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
30997 let func_name = if matches!(target, DialectType::PostgreSQL) {
30998 "LEVENSHTEIN_LESS_EQUAL"
30999 } else {
31000 "LEVENSHTEIN"
31001 };
31002 return Ok(Expression::Function(Box::new(Function::new(
31003 func_name.to_string(),
31004 all_args,
31005 ))));
31006 }
31007 Ok(Expression::Levenshtein(Box::new(levenshtein)))
31008 } else {
31009 Ok(Expression::Function(Box::new(Function::new(
31010 "EDIT_DISTANCE".to_string(),
31011 positional_args,
31012 ))))
31013 }
31014 }
31015
31016 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
31017 "TIMESTAMP_SECONDS" if args.len() == 1 => {
31018 let arg = args.remove(0);
31019 Ok(Expression::UnixToTime(Box::new(
31020 crate::expressions::UnixToTime {
31021 this: Box::new(arg),
31022 scale: Some(0),
31023 zone: None,
31024 hours: None,
31025 minutes: None,
31026 format: None,
31027 target_type: None,
31028 },
31029 )))
31030 }
31031
31032 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
31033 "TIMESTAMP_MILLIS" if args.len() == 1 => {
31034 let arg = args.remove(0);
31035 Ok(Expression::UnixToTime(Box::new(
31036 crate::expressions::UnixToTime {
31037 this: Box::new(arg),
31038 scale: Some(3),
31039 zone: None,
31040 hours: None,
31041 minutes: None,
31042 format: None,
31043 target_type: None,
31044 },
31045 )))
31046 }
31047
31048 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
31049 "TIMESTAMP_MICROS" if args.len() == 1 => {
31050 let arg = args.remove(0);
31051 Ok(Expression::UnixToTime(Box::new(
31052 crate::expressions::UnixToTime {
31053 this: Box::new(arg),
31054 scale: Some(6),
31055 zone: None,
31056 hours: None,
31057 minutes: None,
31058 format: None,
31059 target_type: None,
31060 },
31061 )))
31062 }
31063
31064 // DIV(x, y) -> IntDiv expression
31065 "DIV" if args.len() == 2 => {
31066 let x = args.remove(0);
31067 let y = args.remove(0);
31068 Ok(Expression::IntDiv(Box::new(
31069 crate::expressions::BinaryFunc {
31070 this: x,
31071 expression: y,
31072 original_name: None,
31073 inferred_type: None,
31074 },
31075 )))
31076 }
31077
31078 // TO_HEX(x) -> target-specific form
31079 "TO_HEX" if args.len() == 1 => {
31080 let arg = args.remove(0);
31081 // Check if inner function already returns hex string in certain targets
31082 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
31083 if matches!(target, DialectType::BigQuery) {
31084 // BQ->BQ: keep as TO_HEX
31085 Ok(Expression::Function(Box::new(Function::new(
31086 "TO_HEX".to_string(),
31087 vec![arg],
31088 ))))
31089 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
31090 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
31091 Ok(arg)
31092 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
31093 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
31094 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
31095 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
31096 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
31097 if let Expression::Function(ref inner_f) = arg {
31098 let inner_args = inner_f.args.clone();
31099 let binary_func = match inner_f.name.to_ascii_uppercase().as_str() {
31100 "SHA1" => Expression::Function(Box::new(Function::new(
31101 "SHA1_BINARY".to_string(),
31102 inner_args,
31103 ))),
31104 "MD5" => Expression::Function(Box::new(Function::new(
31105 "MD5_BINARY".to_string(),
31106 inner_args,
31107 ))),
31108 "SHA256" => {
31109 let mut a = inner_args;
31110 a.push(Expression::number(256));
31111 Expression::Function(Box::new(Function::new(
31112 "SHA2_BINARY".to_string(),
31113 a,
31114 )))
31115 }
31116 "SHA512" => {
31117 let mut a = inner_args;
31118 a.push(Expression::number(512));
31119 Expression::Function(Box::new(Function::new(
31120 "SHA2_BINARY".to_string(),
31121 a,
31122 )))
31123 }
31124 _ => arg.clone(),
31125 };
31126 Ok(Expression::Function(Box::new(Function::new(
31127 "TO_CHAR".to_string(),
31128 vec![binary_func],
31129 ))))
31130 } else {
31131 let inner = Expression::Function(Box::new(Function::new(
31132 "HEX".to_string(),
31133 vec![arg],
31134 )));
31135 Ok(Expression::Lower(Box::new(
31136 crate::expressions::UnaryFunc::new(inner),
31137 )))
31138 }
31139 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
31140 let inner = Expression::Function(Box::new(Function::new(
31141 "TO_HEX".to_string(),
31142 vec![arg],
31143 )));
31144 Ok(Expression::Lower(Box::new(
31145 crate::expressions::UnaryFunc::new(inner),
31146 )))
31147 } else {
31148 let inner =
31149 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
31150 Ok(Expression::Lower(Box::new(
31151 crate::expressions::UnaryFunc::new(inner),
31152 )))
31153 }
31154 }
31155
31156 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
31157 "LAST_DAY" if args.len() == 2 => {
31158 let date = args.remove(0);
31159 let _unit = args.remove(0); // Strip the unit (MONTH is default)
31160 Ok(Expression::Function(Box::new(Function::new(
31161 "LAST_DAY".to_string(),
31162 vec![date],
31163 ))))
31164 }
31165
31166 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
31167 "GENERATE_ARRAY" => {
31168 let start = args.get(0).cloned();
31169 let end = args.get(1).cloned();
31170 let step = args.get(2).cloned();
31171 Ok(Expression::GenerateSeries(Box::new(
31172 crate::expressions::GenerateSeries {
31173 start: start.map(Box::new),
31174 end: end.map(Box::new),
31175 step: step.map(Box::new),
31176 is_end_exclusive: None,
31177 },
31178 )))
31179 }
31180
31181 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
31182 "GENERATE_TIMESTAMP_ARRAY" => {
31183 let start = args.get(0).cloned();
31184 let end = args.get(1).cloned();
31185 let step = args.get(2).cloned();
31186
31187 if matches!(target, DialectType::DuckDB) {
31188 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
31189 // Only cast string literals - leave columns/expressions as-is
31190 let maybe_cast_ts = |expr: Expression| -> Expression {
31191 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) {
31192 Expression::Cast(Box::new(Cast {
31193 this: expr,
31194 to: DataType::Timestamp {
31195 precision: None,
31196 timezone: false,
31197 },
31198 trailing_comments: vec![],
31199 double_colon_syntax: false,
31200 format: None,
31201 default: None,
31202 inferred_type: None,
31203 }))
31204 } else {
31205 expr
31206 }
31207 };
31208 let cast_start = start.map(maybe_cast_ts);
31209 let cast_end = end.map(maybe_cast_ts);
31210 Ok(Expression::GenerateSeries(Box::new(
31211 crate::expressions::GenerateSeries {
31212 start: cast_start.map(Box::new),
31213 end: cast_end.map(Box::new),
31214 step: step.map(Box::new),
31215 is_end_exclusive: None,
31216 },
31217 )))
31218 } else {
31219 Ok(Expression::GenerateSeries(Box::new(
31220 crate::expressions::GenerateSeries {
31221 start: start.map(Box::new),
31222 end: end.map(Box::new),
31223 step: step.map(Box::new),
31224 is_end_exclusive: None,
31225 },
31226 )))
31227 }
31228 }
31229
31230 // TO_JSON(x) -> target-specific (from Spark/Hive)
31231 "TO_JSON" => {
31232 match target {
31233 DialectType::Presto | DialectType::Trino => {
31234 // JSON_FORMAT(CAST(x AS JSON))
31235 let arg = args
31236 .into_iter()
31237 .next()
31238 .unwrap_or(Expression::Null(crate::expressions::Null));
31239 let cast_json = Expression::Cast(Box::new(Cast {
31240 this: arg,
31241 to: DataType::Custom {
31242 name: "JSON".to_string(),
31243 },
31244 trailing_comments: vec![],
31245 double_colon_syntax: false,
31246 format: None,
31247 default: None,
31248 inferred_type: None,
31249 }));
31250 Ok(Expression::Function(Box::new(Function::new(
31251 "JSON_FORMAT".to_string(),
31252 vec![cast_json],
31253 ))))
31254 }
31255 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
31256 "TO_JSON_STRING".to_string(),
31257 args,
31258 )))),
31259 DialectType::DuckDB => {
31260 // CAST(TO_JSON(x) AS TEXT)
31261 let arg = args
31262 .into_iter()
31263 .next()
31264 .unwrap_or(Expression::Null(crate::expressions::Null));
31265 let to_json = Expression::Function(Box::new(Function::new(
31266 "TO_JSON".to_string(),
31267 vec![arg],
31268 )));
31269 Ok(Expression::Cast(Box::new(Cast {
31270 this: to_json,
31271 to: DataType::Text,
31272 trailing_comments: vec![],
31273 double_colon_syntax: false,
31274 format: None,
31275 default: None,
31276 inferred_type: None,
31277 })))
31278 }
31279 _ => Ok(Expression::Function(Box::new(Function::new(
31280 "TO_JSON".to_string(),
31281 args,
31282 )))),
31283 }
31284 }
31285
31286 // TO_JSON_STRING(x) -> target-specific
31287 "TO_JSON_STRING" => {
31288 match target {
31289 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
31290 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
31291 ),
31292 DialectType::Presto | DialectType::Trino => {
31293 // JSON_FORMAT(CAST(x AS JSON))
31294 let arg = args
31295 .into_iter()
31296 .next()
31297 .unwrap_or(Expression::Null(crate::expressions::Null));
31298 let cast_json = Expression::Cast(Box::new(Cast {
31299 this: arg,
31300 to: DataType::Custom {
31301 name: "JSON".to_string(),
31302 },
31303 trailing_comments: vec![],
31304 double_colon_syntax: false,
31305 format: None,
31306 default: None,
31307 inferred_type: None,
31308 }));
31309 Ok(Expression::Function(Box::new(Function::new(
31310 "JSON_FORMAT".to_string(),
31311 vec![cast_json],
31312 ))))
31313 }
31314 DialectType::DuckDB => {
31315 // CAST(TO_JSON(x) AS TEXT)
31316 let arg = args
31317 .into_iter()
31318 .next()
31319 .unwrap_or(Expression::Null(crate::expressions::Null));
31320 let to_json = Expression::Function(Box::new(Function::new(
31321 "TO_JSON".to_string(),
31322 vec![arg],
31323 )));
31324 Ok(Expression::Cast(Box::new(Cast {
31325 this: to_json,
31326 to: DataType::Text,
31327 trailing_comments: vec![],
31328 double_colon_syntax: false,
31329 format: None,
31330 default: None,
31331 inferred_type: None,
31332 })))
31333 }
31334 DialectType::Snowflake => {
31335 // TO_JSON(x)
31336 Ok(Expression::Function(Box::new(Function::new(
31337 "TO_JSON".to_string(),
31338 args,
31339 ))))
31340 }
31341 _ => Ok(Expression::Function(Box::new(Function::new(
31342 "TO_JSON_STRING".to_string(),
31343 args,
31344 )))),
31345 }
31346 }
31347
31348 // SAFE_ADD(x, y) -> SafeAdd expression
31349 "SAFE_ADD" if args.len() == 2 => {
31350 let x = args.remove(0);
31351 let y = args.remove(0);
31352 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
31353 this: Box::new(x),
31354 expression: Box::new(y),
31355 })))
31356 }
31357
31358 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
31359 "SAFE_SUBTRACT" if args.len() == 2 => {
31360 let x = args.remove(0);
31361 let y = args.remove(0);
31362 Ok(Expression::SafeSubtract(Box::new(
31363 crate::expressions::SafeSubtract {
31364 this: Box::new(x),
31365 expression: Box::new(y),
31366 },
31367 )))
31368 }
31369
31370 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
31371 "SAFE_MULTIPLY" if args.len() == 2 => {
31372 let x = args.remove(0);
31373 let y = args.remove(0);
31374 Ok(Expression::SafeMultiply(Box::new(
31375 crate::expressions::SafeMultiply {
31376 this: Box::new(x),
31377 expression: Box::new(y),
31378 },
31379 )))
31380 }
31381
31382 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
31383 "REGEXP_CONTAINS" if args.len() == 2 => {
31384 let str_expr = args.remove(0);
31385 let pattern = args.remove(0);
31386 Ok(Expression::RegexpLike(Box::new(
31387 crate::expressions::RegexpFunc {
31388 this: str_expr,
31389 pattern,
31390 flags: None,
31391 },
31392 )))
31393 }
31394
31395 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
31396 "CONTAINS_SUBSTR" if args.len() == 2 => {
31397 let a = args.remove(0);
31398 let b = args.remove(0);
31399 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
31400 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
31401 Ok(Expression::Function(Box::new(Function::new(
31402 "CONTAINS".to_string(),
31403 vec![lower_a, lower_b],
31404 ))))
31405 }
31406
31407 // INT64(x) -> CAST(x AS BIGINT)
31408 "INT64" if args.len() == 1 => {
31409 let arg = args.remove(0);
31410 Ok(Expression::Cast(Box::new(Cast {
31411 this: arg,
31412 to: DataType::BigInt { length: None },
31413 trailing_comments: vec![],
31414 double_colon_syntax: false,
31415 format: None,
31416 default: None,
31417 inferred_type: None,
31418 })))
31419 }
31420
31421 // INSTR(str, substr) -> target-specific
31422 "INSTR" if args.len() >= 2 => {
31423 let str_expr = args.remove(0);
31424 let substr = args.remove(0);
31425 if matches!(target, DialectType::Snowflake) {
31426 // CHARINDEX(substr, str)
31427 Ok(Expression::Function(Box::new(Function::new(
31428 "CHARINDEX".to_string(),
31429 vec![substr, str_expr],
31430 ))))
31431 } else if matches!(target, DialectType::BigQuery) {
31432 // Keep as INSTR
31433 Ok(Expression::Function(Box::new(Function::new(
31434 "INSTR".to_string(),
31435 vec![str_expr, substr],
31436 ))))
31437 } else {
31438 // Default: keep as INSTR
31439 Ok(Expression::Function(Box::new(Function::new(
31440 "INSTR".to_string(),
31441 vec![str_expr, substr],
31442 ))))
31443 }
31444 }
31445
31446 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
31447 "DATE_TRUNC" if args.len() == 2 => {
31448 let expr = args.remove(0);
31449 let unit_expr = args.remove(0);
31450 let unit_str = get_unit_str(&unit_expr);
31451
31452 match target {
31453 DialectType::DuckDB
31454 | DialectType::Snowflake
31455 | DialectType::PostgreSQL
31456 | DialectType::Presto
31457 | DialectType::Trino
31458 | DialectType::Databricks
31459 | DialectType::Spark
31460 | DialectType::Redshift
31461 | DialectType::ClickHouse
31462 | DialectType::TSQL => {
31463 // Standard: DATE_TRUNC('UNIT', expr)
31464 Ok(Expression::Function(Box::new(Function::new(
31465 "DATE_TRUNC".to_string(),
31466 vec![Expression::Literal(Box::new(Literal::String(unit_str))), expr],
31467 ))))
31468 }
31469 _ => {
31470 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
31471 Ok(Expression::Function(Box::new(Function::new(
31472 "DATE_TRUNC".to_string(),
31473 vec![expr, unit_expr],
31474 ))))
31475 }
31476 }
31477 }
31478
31479 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
31480 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
31481 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
31482 let ts = args.remove(0);
31483 let unit_expr = args.remove(0);
31484 let tz = if !args.is_empty() {
31485 Some(args.remove(0))
31486 } else {
31487 None
31488 };
31489 let unit_str = get_unit_str(&unit_expr);
31490
31491 match target {
31492 DialectType::DuckDB => {
31493 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
31494 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
31495 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
31496 let is_coarse = matches!(
31497 unit_str.as_str(),
31498 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
31499 );
31500 // For DATETIME_TRUNC, cast string args to TIMESTAMP
31501 let cast_ts = if name == "DATETIME_TRUNC" {
31502 match ts {
31503 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
31504 Expression::Cast(Box::new(Cast {
31505 this: ts,
31506 to: DataType::Timestamp {
31507 precision: None,
31508 timezone: false,
31509 },
31510 trailing_comments: vec![],
31511 double_colon_syntax: false,
31512 format: None,
31513 default: None,
31514 inferred_type: None,
31515 }))
31516 }
31517 _ => Self::maybe_cast_ts_to_tz(ts, &name),
31518 }
31519 } else {
31520 Self::maybe_cast_ts_to_tz(ts, &name)
31521 };
31522
31523 if let Some(tz_arg) = tz {
31524 if is_coarse {
31525 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
31526 let at_tz = Expression::AtTimeZone(Box::new(
31527 crate::expressions::AtTimeZone {
31528 this: cast_ts,
31529 zone: tz_arg.clone(),
31530 },
31531 ));
31532 let date_trunc = Expression::Function(Box::new(Function::new(
31533 "DATE_TRUNC".to_string(),
31534 vec![Expression::Literal(Box::new(Literal::String(unit_str))), at_tz],
31535 )));
31536 Ok(Expression::AtTimeZone(Box::new(
31537 crate::expressions::AtTimeZone {
31538 this: date_trunc,
31539 zone: tz_arg,
31540 },
31541 )))
31542 } else {
31543 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
31544 Ok(Expression::Function(Box::new(Function::new(
31545 "DATE_TRUNC".to_string(),
31546 vec![Expression::Literal(Box::new(Literal::String(unit_str))), cast_ts],
31547 ))))
31548 }
31549 } else {
31550 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
31551 Ok(Expression::Function(Box::new(Function::new(
31552 "DATE_TRUNC".to_string(),
31553 vec![Expression::Literal(Box::new(Literal::String(unit_str))), cast_ts],
31554 ))))
31555 }
31556 }
31557 DialectType::Databricks | DialectType::Spark => {
31558 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
31559 Ok(Expression::Function(Box::new(Function::new(
31560 "DATE_TRUNC".to_string(),
31561 vec![Expression::Literal(Box::new(Literal::String(unit_str))), ts],
31562 ))))
31563 }
31564 _ => {
31565 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
31566 let unit = Expression::Literal(Box::new(Literal::String(unit_str)));
31567 let mut date_trunc_args = vec![unit, ts];
31568 if let Some(tz_arg) = tz {
31569 date_trunc_args.push(tz_arg);
31570 }
31571 Ok(Expression::Function(Box::new(Function::new(
31572 "TIMESTAMP_TRUNC".to_string(),
31573 date_trunc_args,
31574 ))))
31575 }
31576 }
31577 }
31578
31579 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
31580 "TIME" => {
31581 if args.len() == 3 {
31582 // TIME(h, m, s) constructor
31583 match target {
31584 DialectType::TSQL => {
31585 // TIMEFROMPARTS(h, m, s, 0, 0)
31586 args.push(Expression::number(0));
31587 args.push(Expression::number(0));
31588 Ok(Expression::Function(Box::new(Function::new(
31589 "TIMEFROMPARTS".to_string(),
31590 args,
31591 ))))
31592 }
31593 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
31594 "MAKETIME".to_string(),
31595 args,
31596 )))),
31597 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
31598 Function::new("MAKE_TIME".to_string(), args),
31599 ))),
31600 _ => Ok(Expression::Function(Box::new(Function::new(
31601 "TIME".to_string(),
31602 args,
31603 )))),
31604 }
31605 } else if args.len() == 1 {
31606 let arg = args.remove(0);
31607 if matches!(target, DialectType::Spark) {
31608 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
31609 Ok(Expression::Cast(Box::new(Cast {
31610 this: arg,
31611 to: DataType::Timestamp {
31612 timezone: false,
31613 precision: None,
31614 },
31615 trailing_comments: vec![],
31616 double_colon_syntax: false,
31617 format: None,
31618 default: None,
31619 inferred_type: None,
31620 })))
31621 } else {
31622 // Most targets: CAST(x AS TIME)
31623 Ok(Expression::Cast(Box::new(Cast {
31624 this: arg,
31625 to: DataType::Time {
31626 precision: None,
31627 timezone: false,
31628 },
31629 trailing_comments: vec![],
31630 double_colon_syntax: false,
31631 format: None,
31632 default: None,
31633 inferred_type: None,
31634 })))
31635 }
31636 } else if args.len() == 2 {
31637 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
31638 let expr = args.remove(0);
31639 let tz = args.remove(0);
31640 let cast_tstz = Expression::Cast(Box::new(Cast {
31641 this: expr,
31642 to: DataType::Timestamp {
31643 timezone: true,
31644 precision: None,
31645 },
31646 trailing_comments: vec![],
31647 double_colon_syntax: false,
31648 format: None,
31649 default: None,
31650 inferred_type: None,
31651 }));
31652 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
31653 this: cast_tstz,
31654 zone: tz,
31655 }));
31656 Ok(Expression::Cast(Box::new(Cast {
31657 this: at_tz,
31658 to: DataType::Time {
31659 precision: None,
31660 timezone: false,
31661 },
31662 trailing_comments: vec![],
31663 double_colon_syntax: false,
31664 format: None,
31665 default: None,
31666 inferred_type: None,
31667 })))
31668 } else {
31669 Ok(Expression::Function(Box::new(Function::new(
31670 "TIME".to_string(),
31671 args,
31672 ))))
31673 }
31674 }
31675
31676 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
31677 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
31678 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
31679 // DATETIME(y, m, d, h, min, s) -> target-specific
31680 "DATETIME" => {
31681 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
31682 if matches!(target, DialectType::BigQuery) {
31683 if args.len() == 2 {
31684 let has_time_literal =
31685 matches!(&args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
31686 if has_time_literal {
31687 let first = args.remove(0);
31688 let second = args.remove(0);
31689 let time_as_cast = match second {
31690 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)) => {
31691 let Literal::Time(s) = lit.as_ref() else { unreachable!() };
31692 Expression::Cast(Box::new(Cast {
31693 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
31694 to: DataType::Time {
31695 precision: None,
31696 timezone: false,
31697 },
31698 trailing_comments: vec![],
31699 double_colon_syntax: false,
31700 format: None,
31701 default: None,
31702 inferred_type: None,
31703 }))
31704 }
31705 other => other,
31706 };
31707 return Ok(Expression::Function(Box::new(Function::new(
31708 "DATETIME".to_string(),
31709 vec![first, time_as_cast],
31710 ))));
31711 }
31712 }
31713 return Ok(Expression::Function(Box::new(Function::new(
31714 "DATETIME".to_string(),
31715 args,
31716 ))));
31717 }
31718
31719 if args.len() == 1 {
31720 let arg = args.remove(0);
31721 Ok(Expression::Cast(Box::new(Cast {
31722 this: arg,
31723 to: DataType::Timestamp {
31724 timezone: false,
31725 precision: None,
31726 },
31727 trailing_comments: vec![],
31728 double_colon_syntax: false,
31729 format: None,
31730 default: None,
31731 inferred_type: None,
31732 })))
31733 } else if args.len() == 2 {
31734 let first = args.remove(0);
31735 let second = args.remove(0);
31736 // Check if second arg is a TIME literal
31737 let is_time_literal = matches!(&second, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
31738 if is_time_literal {
31739 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
31740 let cast_date = Expression::Cast(Box::new(Cast {
31741 this: first,
31742 to: DataType::Date,
31743 trailing_comments: vec![],
31744 double_colon_syntax: false,
31745 format: None,
31746 default: None,
31747 inferred_type: None,
31748 }));
31749 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
31750 let time_as_string = match second {
31751 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)) => {
31752 let Literal::Time(s) = lit.as_ref() else { unreachable!() };
31753 Expression::Literal(Box::new(Literal::String(s.clone())))
31754 }
31755 other => other,
31756 };
31757 let cast_time = Expression::Cast(Box::new(Cast {
31758 this: time_as_string,
31759 to: DataType::Time {
31760 precision: None,
31761 timezone: false,
31762 },
31763 trailing_comments: vec![],
31764 double_colon_syntax: false,
31765 format: None,
31766 default: None,
31767 inferred_type: None,
31768 }));
31769 let add_expr =
31770 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
31771 Ok(Expression::Cast(Box::new(Cast {
31772 this: add_expr,
31773 to: DataType::Timestamp {
31774 timezone: false,
31775 precision: None,
31776 },
31777 trailing_comments: vec![],
31778 double_colon_syntax: false,
31779 format: None,
31780 default: None,
31781 inferred_type: None,
31782 })))
31783 } else {
31784 // DATETIME('string', 'timezone')
31785 let cast_tstz = Expression::Cast(Box::new(Cast {
31786 this: first,
31787 to: DataType::Timestamp {
31788 timezone: true,
31789 precision: None,
31790 },
31791 trailing_comments: vec![],
31792 double_colon_syntax: false,
31793 format: None,
31794 default: None,
31795 inferred_type: None,
31796 }));
31797 let at_tz =
31798 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
31799 this: cast_tstz,
31800 zone: second,
31801 }));
31802 Ok(Expression::Cast(Box::new(Cast {
31803 this: at_tz,
31804 to: DataType::Timestamp {
31805 timezone: false,
31806 precision: None,
31807 },
31808 trailing_comments: vec![],
31809 double_colon_syntax: false,
31810 format: None,
31811 default: None,
31812 inferred_type: None,
31813 })))
31814 }
31815 } else if args.len() >= 3 {
31816 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
31817 // For other targets, use MAKE_TIMESTAMP or similar
31818 if matches!(target, DialectType::Snowflake) {
31819 Ok(Expression::Function(Box::new(Function::new(
31820 "TIMESTAMP_FROM_PARTS".to_string(),
31821 args,
31822 ))))
31823 } else {
31824 Ok(Expression::Function(Box::new(Function::new(
31825 "DATETIME".to_string(),
31826 args,
31827 ))))
31828 }
31829 } else {
31830 Ok(Expression::Function(Box::new(Function::new(
31831 "DATETIME".to_string(),
31832 args,
31833 ))))
31834 }
31835 }
31836
31837 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
31838 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
31839 "TIMESTAMP" => {
31840 if args.len() == 1 {
31841 let arg = args.remove(0);
31842 Ok(Expression::Cast(Box::new(Cast {
31843 this: arg,
31844 to: DataType::Timestamp {
31845 timezone: true,
31846 precision: None,
31847 },
31848 trailing_comments: vec![],
31849 double_colon_syntax: false,
31850 format: None,
31851 default: None,
31852 inferred_type: None,
31853 })))
31854 } else if args.len() == 2 {
31855 let arg = args.remove(0);
31856 let tz = args.remove(0);
31857 let cast_ts = Expression::Cast(Box::new(Cast {
31858 this: arg,
31859 to: DataType::Timestamp {
31860 timezone: false,
31861 precision: None,
31862 },
31863 trailing_comments: vec![],
31864 double_colon_syntax: false,
31865 format: None,
31866 default: None,
31867 inferred_type: None,
31868 }));
31869 if matches!(target, DialectType::Snowflake) {
31870 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
31871 Ok(Expression::Function(Box::new(Function::new(
31872 "CONVERT_TIMEZONE".to_string(),
31873 vec![tz, cast_ts],
31874 ))))
31875 } else {
31876 Ok(Expression::AtTimeZone(Box::new(
31877 crate::expressions::AtTimeZone {
31878 this: cast_ts,
31879 zone: tz,
31880 },
31881 )))
31882 }
31883 } else {
31884 Ok(Expression::Function(Box::new(Function::new(
31885 "TIMESTAMP".to_string(),
31886 args,
31887 ))))
31888 }
31889 }
31890
31891 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
31892 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
31893 "STRING" => {
31894 if args.len() == 1 {
31895 let arg = args.remove(0);
31896 let cast_type = match target {
31897 DialectType::DuckDB => DataType::Text,
31898 _ => DataType::VarChar {
31899 length: None,
31900 parenthesized_length: false,
31901 },
31902 };
31903 Ok(Expression::Cast(Box::new(Cast {
31904 this: arg,
31905 to: cast_type,
31906 trailing_comments: vec![],
31907 double_colon_syntax: false,
31908 format: None,
31909 default: None,
31910 inferred_type: None,
31911 })))
31912 } else if args.len() == 2 {
31913 let arg = args.remove(0);
31914 let tz = args.remove(0);
31915 let cast_type = match target {
31916 DialectType::DuckDB => DataType::Text,
31917 _ => DataType::VarChar {
31918 length: None,
31919 parenthesized_length: false,
31920 },
31921 };
31922 if matches!(target, DialectType::Snowflake) {
31923 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
31924 let convert_tz = Expression::Function(Box::new(Function::new(
31925 "CONVERT_TIMEZONE".to_string(),
31926 vec![
31927 Expression::Literal(Box::new(Literal::String("UTC".to_string()))),
31928 tz,
31929 arg,
31930 ],
31931 )));
31932 Ok(Expression::Cast(Box::new(Cast {
31933 this: convert_tz,
31934 to: cast_type,
31935 trailing_comments: vec![],
31936 double_colon_syntax: false,
31937 format: None,
31938 default: None,
31939 inferred_type: None,
31940 })))
31941 } else {
31942 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
31943 let cast_ts = Expression::Cast(Box::new(Cast {
31944 this: arg,
31945 to: DataType::Timestamp {
31946 timezone: false,
31947 precision: None,
31948 },
31949 trailing_comments: vec![],
31950 double_colon_syntax: false,
31951 format: None,
31952 default: None,
31953 inferred_type: None,
31954 }));
31955 let at_utc =
31956 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
31957 this: cast_ts,
31958 zone: Expression::Literal(Box::new(Literal::String("UTC".to_string()))),
31959 }));
31960 let at_tz =
31961 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
31962 this: at_utc,
31963 zone: tz,
31964 }));
31965 Ok(Expression::Cast(Box::new(Cast {
31966 this: at_tz,
31967 to: cast_type,
31968 trailing_comments: vec![],
31969 double_colon_syntax: false,
31970 format: None,
31971 default: None,
31972 inferred_type: None,
31973 })))
31974 }
31975 } else {
31976 Ok(Expression::Function(Box::new(Function::new(
31977 "STRING".to_string(),
31978 args,
31979 ))))
31980 }
31981 }
31982
31983 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
31984 "UNIX_SECONDS" if args.len() == 1 => {
31985 let ts = args.remove(0);
31986 match target {
31987 DialectType::DuckDB => {
31988 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
31989 let cast_ts = Self::ensure_cast_timestamptz(ts);
31990 let epoch = Expression::Function(Box::new(Function::new(
31991 "EPOCH".to_string(),
31992 vec![cast_ts],
31993 )));
31994 Ok(Expression::Cast(Box::new(Cast {
31995 this: epoch,
31996 to: DataType::BigInt { length: None },
31997 trailing_comments: vec![],
31998 double_colon_syntax: false,
31999 format: None,
32000 default: None,
32001 inferred_type: None,
32002 })))
32003 }
32004 DialectType::Snowflake => {
32005 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
32006 let epoch = Expression::Cast(Box::new(Cast {
32007 this: Expression::Literal(Box::new(Literal::String(
32008 "1970-01-01 00:00:00+00".to_string(),
32009 ))),
32010 to: DataType::Timestamp {
32011 timezone: true,
32012 precision: None,
32013 },
32014 trailing_comments: vec![],
32015 double_colon_syntax: false,
32016 format: None,
32017 default: None,
32018 inferred_type: None,
32019 }));
32020 Ok(Expression::TimestampDiff(Box::new(
32021 crate::expressions::TimestampDiff {
32022 this: Box::new(epoch),
32023 expression: Box::new(ts),
32024 unit: Some("SECONDS".to_string()),
32025 },
32026 )))
32027 }
32028 _ => Ok(Expression::Function(Box::new(Function::new(
32029 "UNIX_SECONDS".to_string(),
32030 vec![ts],
32031 )))),
32032 }
32033 }
32034
32035 "UNIX_MILLIS" if args.len() == 1 => {
32036 let ts = args.remove(0);
32037 match target {
32038 DialectType::DuckDB => {
32039 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
32040 let cast_ts = Self::ensure_cast_timestamptz(ts);
32041 Ok(Expression::Function(Box::new(Function::new(
32042 "EPOCH_MS".to_string(),
32043 vec![cast_ts],
32044 ))))
32045 }
32046 _ => Ok(Expression::Function(Box::new(Function::new(
32047 "UNIX_MILLIS".to_string(),
32048 vec![ts],
32049 )))),
32050 }
32051 }
32052
32053 "UNIX_MICROS" if args.len() == 1 => {
32054 let ts = args.remove(0);
32055 match target {
32056 DialectType::DuckDB => {
32057 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
32058 let cast_ts = Self::ensure_cast_timestamptz(ts);
32059 Ok(Expression::Function(Box::new(Function::new(
32060 "EPOCH_US".to_string(),
32061 vec![cast_ts],
32062 ))))
32063 }
32064 _ => Ok(Expression::Function(Box::new(Function::new(
32065 "UNIX_MICROS".to_string(),
32066 vec![ts],
32067 )))),
32068 }
32069 }
32070
32071 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
32072 "ARRAY_CONCAT" | "LIST_CONCAT" => {
32073 match target {
32074 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
32075 // CONCAT(arr1, arr2, ...)
32076 Ok(Expression::Function(Box::new(Function::new(
32077 "CONCAT".to_string(),
32078 args,
32079 ))))
32080 }
32081 DialectType::Presto | DialectType::Trino => {
32082 // CONCAT(arr1, arr2, ...)
32083 Ok(Expression::Function(Box::new(Function::new(
32084 "CONCAT".to_string(),
32085 args,
32086 ))))
32087 }
32088 DialectType::Snowflake => {
32089 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
32090 if args.len() == 1 {
32091 // ARRAY_CAT requires 2 args, add empty array as []
32092 let empty_arr = Expression::ArrayFunc(Box::new(
32093 crate::expressions::ArrayConstructor {
32094 expressions: vec![],
32095 bracket_notation: true,
32096 use_list_keyword: false,
32097 },
32098 ));
32099 let mut new_args = args;
32100 new_args.push(empty_arr);
32101 Ok(Expression::Function(Box::new(Function::new(
32102 "ARRAY_CAT".to_string(),
32103 new_args,
32104 ))))
32105 } else if args.is_empty() {
32106 Ok(Expression::Function(Box::new(Function::new(
32107 "ARRAY_CAT".to_string(),
32108 args,
32109 ))))
32110 } else {
32111 let mut it = args.into_iter().rev();
32112 let mut result = it.next().unwrap();
32113 for arr in it {
32114 result = Expression::Function(Box::new(Function::new(
32115 "ARRAY_CAT".to_string(),
32116 vec![arr, result],
32117 )));
32118 }
32119 Ok(result)
32120 }
32121 }
32122 DialectType::PostgreSQL => {
32123 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
32124 if args.len() <= 1 {
32125 Ok(Expression::Function(Box::new(Function::new(
32126 "ARRAY_CAT".to_string(),
32127 args,
32128 ))))
32129 } else {
32130 let mut it = args.into_iter().rev();
32131 let mut result = it.next().unwrap();
32132 for arr in it {
32133 result = Expression::Function(Box::new(Function::new(
32134 "ARRAY_CAT".to_string(),
32135 vec![arr, result],
32136 )));
32137 }
32138 Ok(result)
32139 }
32140 }
32141 DialectType::Redshift => {
32142 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
32143 if args.len() <= 2 {
32144 Ok(Expression::Function(Box::new(Function::new(
32145 "ARRAY_CONCAT".to_string(),
32146 args,
32147 ))))
32148 } else {
32149 let mut it = args.into_iter().rev();
32150 let mut result = it.next().unwrap();
32151 for arr in it {
32152 result = Expression::Function(Box::new(Function::new(
32153 "ARRAY_CONCAT".to_string(),
32154 vec![arr, result],
32155 )));
32156 }
32157 Ok(result)
32158 }
32159 }
32160 DialectType::DuckDB => {
32161 // LIST_CONCAT supports multiple args natively in DuckDB
32162 Ok(Expression::Function(Box::new(Function::new(
32163 "LIST_CONCAT".to_string(),
32164 args,
32165 ))))
32166 }
32167 _ => Ok(Expression::Function(Box::new(Function::new(
32168 "ARRAY_CONCAT".to_string(),
32169 args,
32170 )))),
32171 }
32172 }
32173
32174 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
32175 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
32176 let arg = args.remove(0);
32177 match target {
32178 DialectType::Snowflake => {
32179 let array_agg =
32180 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
32181 this: arg,
32182 distinct: false,
32183 filter: None,
32184 order_by: vec![],
32185 name: None,
32186 ignore_nulls: None,
32187 having_max: None,
32188 limit: None,
32189 inferred_type: None,
32190 }));
32191 Ok(Expression::Function(Box::new(Function::new(
32192 "ARRAY_FLATTEN".to_string(),
32193 vec![array_agg],
32194 ))))
32195 }
32196 _ => Ok(Expression::Function(Box::new(Function::new(
32197 "ARRAY_CONCAT_AGG".to_string(),
32198 vec![arg],
32199 )))),
32200 }
32201 }
32202
32203 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
32204 "MD5" if args.len() == 1 => {
32205 let arg = args.remove(0);
32206 match target {
32207 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
32208 // UNHEX(MD5(x))
32209 let md5 = Expression::Function(Box::new(Function::new(
32210 "MD5".to_string(),
32211 vec![arg],
32212 )));
32213 Ok(Expression::Function(Box::new(Function::new(
32214 "UNHEX".to_string(),
32215 vec![md5],
32216 ))))
32217 }
32218 DialectType::Snowflake => {
32219 // MD5_BINARY(x)
32220 Ok(Expression::Function(Box::new(Function::new(
32221 "MD5_BINARY".to_string(),
32222 vec![arg],
32223 ))))
32224 }
32225 _ => Ok(Expression::Function(Box::new(Function::new(
32226 "MD5".to_string(),
32227 vec![arg],
32228 )))),
32229 }
32230 }
32231
32232 "SHA1" if args.len() == 1 => {
32233 let arg = args.remove(0);
32234 match target {
32235 DialectType::DuckDB => {
32236 // UNHEX(SHA1(x))
32237 let sha1 = Expression::Function(Box::new(Function::new(
32238 "SHA1".to_string(),
32239 vec![arg],
32240 )));
32241 Ok(Expression::Function(Box::new(Function::new(
32242 "UNHEX".to_string(),
32243 vec![sha1],
32244 ))))
32245 }
32246 _ => Ok(Expression::Function(Box::new(Function::new(
32247 "SHA1".to_string(),
32248 vec![arg],
32249 )))),
32250 }
32251 }
32252
32253 "SHA256" if args.len() == 1 => {
32254 let arg = args.remove(0);
32255 match target {
32256 DialectType::DuckDB => {
32257 // UNHEX(SHA256(x))
32258 let sha = Expression::Function(Box::new(Function::new(
32259 "SHA256".to_string(),
32260 vec![arg],
32261 )));
32262 Ok(Expression::Function(Box::new(Function::new(
32263 "UNHEX".to_string(),
32264 vec![sha],
32265 ))))
32266 }
32267 DialectType::Snowflake => {
32268 // SHA2_BINARY(x, 256)
32269 Ok(Expression::Function(Box::new(Function::new(
32270 "SHA2_BINARY".to_string(),
32271 vec![arg, Expression::number(256)],
32272 ))))
32273 }
32274 DialectType::Redshift | DialectType::Spark => {
32275 // SHA2(x, 256)
32276 Ok(Expression::Function(Box::new(Function::new(
32277 "SHA2".to_string(),
32278 vec![arg, Expression::number(256)],
32279 ))))
32280 }
32281 _ => Ok(Expression::Function(Box::new(Function::new(
32282 "SHA256".to_string(),
32283 vec![arg],
32284 )))),
32285 }
32286 }
32287
32288 "SHA512" if args.len() == 1 => {
32289 let arg = args.remove(0);
32290 match target {
32291 DialectType::Snowflake => {
32292 // SHA2_BINARY(x, 512)
32293 Ok(Expression::Function(Box::new(Function::new(
32294 "SHA2_BINARY".to_string(),
32295 vec![arg, Expression::number(512)],
32296 ))))
32297 }
32298 DialectType::Redshift | DialectType::Spark => {
32299 // SHA2(x, 512)
32300 Ok(Expression::Function(Box::new(Function::new(
32301 "SHA2".to_string(),
32302 vec![arg, Expression::number(512)],
32303 ))))
32304 }
32305 _ => Ok(Expression::Function(Box::new(Function::new(
32306 "SHA512".to_string(),
32307 vec![arg],
32308 )))),
32309 }
32310 }
32311
32312 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
32313 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
32314 let str_expr = args.remove(0);
32315 let pattern = args.remove(0);
32316
32317 // Check if pattern contains capturing groups (parentheses)
32318 let has_groups = match &pattern {
32319 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; s.contains('(') && s.contains(')') },
32320 _ => false,
32321 };
32322
32323 match target {
32324 DialectType::DuckDB => {
32325 let group = if has_groups {
32326 Expression::number(1)
32327 } else {
32328 Expression::number(0)
32329 };
32330 Ok(Expression::Function(Box::new(Function::new(
32331 "REGEXP_EXTRACT_ALL".to_string(),
32332 vec![str_expr, pattern, group],
32333 ))))
32334 }
32335 DialectType::Spark | DialectType::Databricks => {
32336 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
32337 if has_groups {
32338 Ok(Expression::Function(Box::new(Function::new(
32339 "REGEXP_EXTRACT_ALL".to_string(),
32340 vec![str_expr, pattern],
32341 ))))
32342 } else {
32343 Ok(Expression::Function(Box::new(Function::new(
32344 "REGEXP_EXTRACT_ALL".to_string(),
32345 vec![str_expr, pattern, Expression::number(0)],
32346 ))))
32347 }
32348 }
32349 DialectType::Presto | DialectType::Trino => {
32350 if has_groups {
32351 Ok(Expression::Function(Box::new(Function::new(
32352 "REGEXP_EXTRACT_ALL".to_string(),
32353 vec![str_expr, pattern, Expression::number(1)],
32354 ))))
32355 } else {
32356 Ok(Expression::Function(Box::new(Function::new(
32357 "REGEXP_EXTRACT_ALL".to_string(),
32358 vec![str_expr, pattern],
32359 ))))
32360 }
32361 }
32362 DialectType::Snowflake => {
32363 if has_groups {
32364 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
32365 Ok(Expression::Function(Box::new(Function::new(
32366 "REGEXP_EXTRACT_ALL".to_string(),
32367 vec![
32368 str_expr,
32369 pattern,
32370 Expression::number(1),
32371 Expression::number(1),
32372 Expression::Literal(Box::new(Literal::String("c".to_string()))),
32373 Expression::number(1),
32374 ],
32375 ))))
32376 } else {
32377 Ok(Expression::Function(Box::new(Function::new(
32378 "REGEXP_EXTRACT_ALL".to_string(),
32379 vec![str_expr, pattern],
32380 ))))
32381 }
32382 }
32383 _ => Ok(Expression::Function(Box::new(Function::new(
32384 "REGEXP_EXTRACT_ALL".to_string(),
32385 vec![str_expr, pattern],
32386 )))),
32387 }
32388 }
32389
32390 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
32391 "MOD" if args.len() == 2 => {
32392 match target {
32393 DialectType::PostgreSQL
32394 | DialectType::DuckDB
32395 | DialectType::Presto
32396 | DialectType::Trino
32397 | DialectType::Athena
32398 | DialectType::Snowflake => {
32399 let x = args.remove(0);
32400 let y = args.remove(0);
32401 // Wrap complex expressions in parens to preserve precedence
32402 let needs_paren = |e: &Expression| {
32403 matches!(
32404 e,
32405 Expression::Add(_)
32406 | Expression::Sub(_)
32407 | Expression::Mul(_)
32408 | Expression::Div(_)
32409 )
32410 };
32411 let x = if needs_paren(&x) {
32412 Expression::Paren(Box::new(crate::expressions::Paren {
32413 this: x,
32414 trailing_comments: vec![],
32415 }))
32416 } else {
32417 x
32418 };
32419 let y = if needs_paren(&y) {
32420 Expression::Paren(Box::new(crate::expressions::Paren {
32421 this: y,
32422 trailing_comments: vec![],
32423 }))
32424 } else {
32425 y
32426 };
32427 Ok(Expression::Mod(Box::new(
32428 crate::expressions::BinaryOp::new(x, y),
32429 )))
32430 }
32431 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
32432 // Hive/Spark: a % b
32433 let x = args.remove(0);
32434 let y = args.remove(0);
32435 let needs_paren = |e: &Expression| {
32436 matches!(
32437 e,
32438 Expression::Add(_)
32439 | Expression::Sub(_)
32440 | Expression::Mul(_)
32441 | Expression::Div(_)
32442 )
32443 };
32444 let x = if needs_paren(&x) {
32445 Expression::Paren(Box::new(crate::expressions::Paren {
32446 this: x,
32447 trailing_comments: vec![],
32448 }))
32449 } else {
32450 x
32451 };
32452 let y = if needs_paren(&y) {
32453 Expression::Paren(Box::new(crate::expressions::Paren {
32454 this: y,
32455 trailing_comments: vec![],
32456 }))
32457 } else {
32458 y
32459 };
32460 Ok(Expression::Mod(Box::new(
32461 crate::expressions::BinaryOp::new(x, y),
32462 )))
32463 }
32464 _ => Ok(Expression::Function(Box::new(Function::new(
32465 "MOD".to_string(),
32466 args,
32467 )))),
32468 }
32469 }
32470
32471 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
32472 "ARRAY_FILTER" if args.len() == 2 => {
32473 let name = match target {
32474 DialectType::DuckDB => "LIST_FILTER",
32475 DialectType::StarRocks => "ARRAY_FILTER",
32476 _ => "FILTER",
32477 };
32478 Ok(Expression::Function(Box::new(Function::new(
32479 name.to_string(),
32480 args,
32481 ))))
32482 }
32483 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
32484 "FILTER" if args.len() == 2 => {
32485 let name = match target {
32486 DialectType::DuckDB => "LIST_FILTER",
32487 DialectType::StarRocks => "ARRAY_FILTER",
32488 _ => "FILTER",
32489 };
32490 Ok(Expression::Function(Box::new(Function::new(
32491 name.to_string(),
32492 args,
32493 ))))
32494 }
32495 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
32496 "REDUCE" if args.len() >= 3 => {
32497 let name = match target {
32498 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
32499 _ => "REDUCE",
32500 };
32501 Ok(Expression::Function(Box::new(Function::new(
32502 name.to_string(),
32503 args,
32504 ))))
32505 }
32506 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
32507 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
32508 Function::new("ARRAY_REVERSE".to_string(), args),
32509 ))),
32510
32511 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
32512 "CONCAT" if args.len() > 2 => match target {
32513 DialectType::DuckDB => {
32514 let mut it = args.into_iter();
32515 let mut result = it.next().unwrap();
32516 for arg in it {
32517 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
32518 this: Box::new(result),
32519 expression: Box::new(arg),
32520 safe: None,
32521 }));
32522 }
32523 Ok(result)
32524 }
32525 _ => Ok(Expression::Function(Box::new(Function::new(
32526 "CONCAT".to_string(),
32527 args,
32528 )))),
32529 },
32530
32531 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
32532 "GENERATE_DATE_ARRAY" => {
32533 if matches!(target, DialectType::BigQuery) {
32534 // BQ->BQ: add default interval if not present
32535 if args.len() == 2 {
32536 let start = args.remove(0);
32537 let end = args.remove(0);
32538 let default_interval =
32539 Expression::Interval(Box::new(crate::expressions::Interval {
32540 this: Some(Expression::Literal(Box::new(Literal::String("1".to_string())))),
32541 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32542 unit: crate::expressions::IntervalUnit::Day,
32543 use_plural: false,
32544 }),
32545 }));
32546 Ok(Expression::Function(Box::new(Function::new(
32547 "GENERATE_DATE_ARRAY".to_string(),
32548 vec![start, end, default_interval],
32549 ))))
32550 } else {
32551 Ok(Expression::Function(Box::new(Function::new(
32552 "GENERATE_DATE_ARRAY".to_string(),
32553 args,
32554 ))))
32555 }
32556 } else if matches!(target, DialectType::DuckDB) {
32557 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
32558 let start = args.get(0).cloned();
32559 let end = args.get(1).cloned();
32560 let step = args.get(2).cloned().or_else(|| {
32561 Some(Expression::Interval(Box::new(
32562 crate::expressions::Interval {
32563 this: Some(Expression::Literal(Box::new(Literal::String("1".to_string())))),
32564 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32565 unit: crate::expressions::IntervalUnit::Day,
32566 use_plural: false,
32567 }),
32568 },
32569 )))
32570 });
32571
32572 // Wrap start/end in CAST(... AS DATE) only for string literals
32573 let maybe_cast_date = |expr: Expression| -> Expression {
32574 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) {
32575 Expression::Cast(Box::new(Cast {
32576 this: expr,
32577 to: DataType::Date,
32578 trailing_comments: vec![],
32579 double_colon_syntax: false,
32580 format: None,
32581 default: None,
32582 inferred_type: None,
32583 }))
32584 } else {
32585 expr
32586 }
32587 };
32588 let cast_start = start.map(maybe_cast_date);
32589 let cast_end = end.map(maybe_cast_date);
32590
32591 let gen_series =
32592 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
32593 start: cast_start.map(Box::new),
32594 end: cast_end.map(Box::new),
32595 step: step.map(Box::new),
32596 is_end_exclusive: None,
32597 }));
32598
32599 // Wrap in CAST(... AS DATE[])
32600 Ok(Expression::Cast(Box::new(Cast {
32601 this: gen_series,
32602 to: DataType::Array {
32603 element_type: Box::new(DataType::Date),
32604 dimension: None,
32605 },
32606 trailing_comments: vec![],
32607 double_colon_syntax: false,
32608 format: None,
32609 default: None,
32610 inferred_type: None,
32611 })))
32612 } else if matches!(target, DialectType::Snowflake) {
32613 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
32614 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
32615 if args.len() == 2 {
32616 let start = args.remove(0);
32617 let end = args.remove(0);
32618 let default_interval =
32619 Expression::Interval(Box::new(crate::expressions::Interval {
32620 this: Some(Expression::Literal(Box::new(Literal::String("1".to_string())))),
32621 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32622 unit: crate::expressions::IntervalUnit::Day,
32623 use_plural: false,
32624 }),
32625 }));
32626 Ok(Expression::Function(Box::new(Function::new(
32627 "GENERATE_DATE_ARRAY".to_string(),
32628 vec![start, end, default_interval],
32629 ))))
32630 } else {
32631 Ok(Expression::Function(Box::new(Function::new(
32632 "GENERATE_DATE_ARRAY".to_string(),
32633 args,
32634 ))))
32635 }
32636 } else {
32637 // Convert to GenerateSeries for other targets
32638 let start = args.get(0).cloned();
32639 let end = args.get(1).cloned();
32640 let step = args.get(2).cloned().or_else(|| {
32641 Some(Expression::Interval(Box::new(
32642 crate::expressions::Interval {
32643 this: Some(Expression::Literal(Box::new(Literal::String("1".to_string())))),
32644 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32645 unit: crate::expressions::IntervalUnit::Day,
32646 use_plural: false,
32647 }),
32648 },
32649 )))
32650 });
32651 Ok(Expression::GenerateSeries(Box::new(
32652 crate::expressions::GenerateSeries {
32653 start: start.map(Box::new),
32654 end: end.map(Box::new),
32655 step: step.map(Box::new),
32656 is_end_exclusive: None,
32657 },
32658 )))
32659 }
32660 }
32661
32662 // PARSE_DATE(format, str) -> target-specific
32663 "PARSE_DATE" if args.len() == 2 => {
32664 let format = args.remove(0);
32665 let str_expr = args.remove(0);
32666 match target {
32667 DialectType::DuckDB => {
32668 // CAST(STRPTIME(str, duck_format) AS DATE)
32669 let duck_format = Self::bq_format_to_duckdb(&format);
32670 let strptime = Expression::Function(Box::new(Function::new(
32671 "STRPTIME".to_string(),
32672 vec![str_expr, duck_format],
32673 )));
32674 Ok(Expression::Cast(Box::new(Cast {
32675 this: strptime,
32676 to: DataType::Date,
32677 trailing_comments: vec![],
32678 double_colon_syntax: false,
32679 format: None,
32680 default: None,
32681 inferred_type: None,
32682 })))
32683 }
32684 DialectType::Snowflake => {
32685 // _POLYGLOT_DATE(str, snowflake_format)
32686 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
32687 let sf_format = Self::bq_format_to_snowflake(&format);
32688 Ok(Expression::Function(Box::new(Function::new(
32689 "_POLYGLOT_DATE".to_string(),
32690 vec![str_expr, sf_format],
32691 ))))
32692 }
32693 _ => Ok(Expression::Function(Box::new(Function::new(
32694 "PARSE_DATE".to_string(),
32695 vec![format, str_expr],
32696 )))),
32697 }
32698 }
32699
32700 // PARSE_TIMESTAMP(format, str) -> target-specific
32701 "PARSE_TIMESTAMP" if args.len() >= 2 => {
32702 let format = args.remove(0);
32703 let str_expr = args.remove(0);
32704 let tz = if !args.is_empty() {
32705 Some(args.remove(0))
32706 } else {
32707 None
32708 };
32709 match target {
32710 DialectType::DuckDB => {
32711 let duck_format = Self::bq_format_to_duckdb(&format);
32712 let strptime = Expression::Function(Box::new(Function::new(
32713 "STRPTIME".to_string(),
32714 vec![str_expr, duck_format],
32715 )));
32716 Ok(strptime)
32717 }
32718 _ => {
32719 let mut result_args = vec![format, str_expr];
32720 if let Some(tz_arg) = tz {
32721 result_args.push(tz_arg);
32722 }
32723 Ok(Expression::Function(Box::new(Function::new(
32724 "PARSE_TIMESTAMP".to_string(),
32725 result_args,
32726 ))))
32727 }
32728 }
32729 }
32730
32731 // FORMAT_DATE(format, date) -> target-specific
32732 "FORMAT_DATE" if args.len() == 2 => {
32733 let format = args.remove(0);
32734 let date_expr = args.remove(0);
32735 match target {
32736 DialectType::DuckDB => {
32737 // STRFTIME(CAST(date AS DATE), format)
32738 let cast_date = Expression::Cast(Box::new(Cast {
32739 this: date_expr,
32740 to: DataType::Date,
32741 trailing_comments: vec![],
32742 double_colon_syntax: false,
32743 format: None,
32744 default: None,
32745 inferred_type: None,
32746 }));
32747 Ok(Expression::Function(Box::new(Function::new(
32748 "STRFTIME".to_string(),
32749 vec![cast_date, format],
32750 ))))
32751 }
32752 _ => Ok(Expression::Function(Box::new(Function::new(
32753 "FORMAT_DATE".to_string(),
32754 vec![format, date_expr],
32755 )))),
32756 }
32757 }
32758
32759 // FORMAT_DATETIME(format, datetime) -> target-specific
32760 "FORMAT_DATETIME" if args.len() == 2 => {
32761 let format = args.remove(0);
32762 let dt_expr = args.remove(0);
32763
32764 if matches!(target, DialectType::BigQuery) {
32765 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
32766 let norm_format = Self::bq_format_normalize_bq(&format);
32767 // Also strip DATETIME keyword from typed literals
32768 let norm_dt = match dt_expr {
32769 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
32770 let Literal::Timestamp(s) = lit.as_ref() else { unreachable!() };
32771 Expression::Cast(Box::new(Cast {
32772 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
32773 to: DataType::Custom {
32774 name: "DATETIME".to_string(),
32775 },
32776 trailing_comments: vec![],
32777 double_colon_syntax: false,
32778 format: None,
32779 default: None,
32780 inferred_type: None,
32781 }))
32782 }
32783 other => other,
32784 };
32785 return Ok(Expression::Function(Box::new(Function::new(
32786 "FORMAT_DATETIME".to_string(),
32787 vec![norm_format, norm_dt],
32788 ))));
32789 }
32790
32791 match target {
32792 DialectType::DuckDB => {
32793 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
32794 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
32795 let duck_format = Self::bq_format_to_duckdb(&format);
32796 Ok(Expression::Function(Box::new(Function::new(
32797 "STRFTIME".to_string(),
32798 vec![cast_dt, duck_format],
32799 ))))
32800 }
32801 _ => Ok(Expression::Function(Box::new(Function::new(
32802 "FORMAT_DATETIME".to_string(),
32803 vec![format, dt_expr],
32804 )))),
32805 }
32806 }
32807
32808 // FORMAT_TIMESTAMP(format, ts) -> target-specific
32809 "FORMAT_TIMESTAMP" if args.len() == 2 => {
32810 let format = args.remove(0);
32811 let ts_expr = args.remove(0);
32812 match target {
32813 DialectType::DuckDB => {
32814 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
32815 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
32816 let cast_ts = Expression::Cast(Box::new(Cast {
32817 this: cast_tstz,
32818 to: DataType::Timestamp {
32819 timezone: false,
32820 precision: None,
32821 },
32822 trailing_comments: vec![],
32823 double_colon_syntax: false,
32824 format: None,
32825 default: None,
32826 inferred_type: None,
32827 }));
32828 Ok(Expression::Function(Box::new(Function::new(
32829 "STRFTIME".to_string(),
32830 vec![cast_ts, format],
32831 ))))
32832 }
32833 DialectType::Snowflake => {
32834 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
32835 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
32836 let cast_ts = Expression::Cast(Box::new(Cast {
32837 this: cast_tstz,
32838 to: DataType::Timestamp {
32839 timezone: false,
32840 precision: None,
32841 },
32842 trailing_comments: vec![],
32843 double_colon_syntax: false,
32844 format: None,
32845 default: None,
32846 inferred_type: None,
32847 }));
32848 let sf_format = Self::bq_format_to_snowflake(&format);
32849 Ok(Expression::Function(Box::new(Function::new(
32850 "TO_CHAR".to_string(),
32851 vec![cast_ts, sf_format],
32852 ))))
32853 }
32854 _ => Ok(Expression::Function(Box::new(Function::new(
32855 "FORMAT_TIMESTAMP".to_string(),
32856 vec![format, ts_expr],
32857 )))),
32858 }
32859 }
32860
32861 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
32862 "UNIX_DATE" if args.len() == 1 => {
32863 let date = args.remove(0);
32864 match target {
32865 DialectType::DuckDB => {
32866 let epoch = Expression::Cast(Box::new(Cast {
32867 this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
32868 to: DataType::Date,
32869 trailing_comments: vec![],
32870 double_colon_syntax: false,
32871 format: None,
32872 default: None,
32873 inferred_type: None,
32874 }));
32875 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
32876 // Need to convert DATE literal to CAST
32877 let norm_date = Self::date_literal_to_cast(date);
32878 Ok(Expression::Function(Box::new(Function::new(
32879 "DATE_DIFF".to_string(),
32880 vec![
32881 Expression::Literal(Box::new(Literal::String("DAY".to_string()))),
32882 epoch,
32883 norm_date,
32884 ],
32885 ))))
32886 }
32887 _ => Ok(Expression::Function(Box::new(Function::new(
32888 "UNIX_DATE".to_string(),
32889 vec![date],
32890 )))),
32891 }
32892 }
32893
32894 // UNIX_SECONDS(ts) -> target-specific
32895 "UNIX_SECONDS" if args.len() == 1 => {
32896 let ts = args.remove(0);
32897 match target {
32898 DialectType::DuckDB => {
32899 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
32900 let norm_ts = Self::ts_literal_to_cast_tz(ts);
32901 let epoch = Expression::Function(Box::new(Function::new(
32902 "EPOCH".to_string(),
32903 vec![norm_ts],
32904 )));
32905 Ok(Expression::Cast(Box::new(Cast {
32906 this: epoch,
32907 to: DataType::BigInt { length: None },
32908 trailing_comments: vec![],
32909 double_colon_syntax: false,
32910 format: None,
32911 default: None,
32912 inferred_type: None,
32913 })))
32914 }
32915 DialectType::Snowflake => {
32916 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
32917 let epoch = Expression::Cast(Box::new(Cast {
32918 this: Expression::Literal(Box::new(Literal::String(
32919 "1970-01-01 00:00:00+00".to_string(),
32920 ))),
32921 to: DataType::Timestamp {
32922 timezone: true,
32923 precision: None,
32924 },
32925 trailing_comments: vec![],
32926 double_colon_syntax: false,
32927 format: None,
32928 default: None,
32929 inferred_type: None,
32930 }));
32931 Ok(Expression::Function(Box::new(Function::new(
32932 "TIMESTAMPDIFF".to_string(),
32933 vec![
32934 Expression::Identifier(Identifier::new("SECONDS".to_string())),
32935 epoch,
32936 ts,
32937 ],
32938 ))))
32939 }
32940 _ => Ok(Expression::Function(Box::new(Function::new(
32941 "UNIX_SECONDS".to_string(),
32942 vec![ts],
32943 )))),
32944 }
32945 }
32946
32947 // UNIX_MILLIS(ts) -> target-specific
32948 "UNIX_MILLIS" if args.len() == 1 => {
32949 let ts = args.remove(0);
32950 match target {
32951 DialectType::DuckDB => {
32952 let norm_ts = Self::ts_literal_to_cast_tz(ts);
32953 Ok(Expression::Function(Box::new(Function::new(
32954 "EPOCH_MS".to_string(),
32955 vec![norm_ts],
32956 ))))
32957 }
32958 _ => Ok(Expression::Function(Box::new(Function::new(
32959 "UNIX_MILLIS".to_string(),
32960 vec![ts],
32961 )))),
32962 }
32963 }
32964
32965 // UNIX_MICROS(ts) -> target-specific
32966 "UNIX_MICROS" if args.len() == 1 => {
32967 let ts = args.remove(0);
32968 match target {
32969 DialectType::DuckDB => {
32970 let norm_ts = Self::ts_literal_to_cast_tz(ts);
32971 Ok(Expression::Function(Box::new(Function::new(
32972 "EPOCH_US".to_string(),
32973 vec![norm_ts],
32974 ))))
32975 }
32976 _ => Ok(Expression::Function(Box::new(Function::new(
32977 "UNIX_MICROS".to_string(),
32978 vec![ts],
32979 )))),
32980 }
32981 }
32982
32983 // INSTR(str, substr) -> target-specific
32984 "INSTR" => {
32985 if matches!(target, DialectType::BigQuery) {
32986 // BQ->BQ: keep as INSTR
32987 Ok(Expression::Function(Box::new(Function::new(
32988 "INSTR".to_string(),
32989 args,
32990 ))))
32991 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
32992 // Snowflake: CHARINDEX(substr, str) - swap args
32993 let str_expr = args.remove(0);
32994 let substr = args.remove(0);
32995 Ok(Expression::Function(Box::new(Function::new(
32996 "CHARINDEX".to_string(),
32997 vec![substr, str_expr],
32998 ))))
32999 } else {
33000 // Keep as INSTR for other targets
33001 Ok(Expression::Function(Box::new(Function::new(
33002 "INSTR".to_string(),
33003 args,
33004 ))))
33005 }
33006 }
33007
33008 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
33009 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
33010 if matches!(target, DialectType::BigQuery) {
33011 // BQ->BQ: always output with parens (function form), keep any timezone arg
33012 Ok(Expression::Function(Box::new(Function::new(name, args))))
33013 } else if name == "CURRENT_DATE" && args.len() == 1 {
33014 // CURRENT_DATE('UTC') - has timezone arg
33015 let tz_arg = args.remove(0);
33016 match target {
33017 DialectType::DuckDB => {
33018 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
33019 let ct = Expression::CurrentTimestamp(
33020 crate::expressions::CurrentTimestamp {
33021 precision: None,
33022 sysdate: false,
33023 },
33024 );
33025 let at_tz =
33026 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
33027 this: ct,
33028 zone: tz_arg,
33029 }));
33030 Ok(Expression::Cast(Box::new(Cast {
33031 this: at_tz,
33032 to: DataType::Date,
33033 trailing_comments: vec![],
33034 double_colon_syntax: false,
33035 format: None,
33036 default: None,
33037 inferred_type: None,
33038 })))
33039 }
33040 DialectType::Snowflake => {
33041 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
33042 let ct = Expression::Function(Box::new(Function::new(
33043 "CURRENT_TIMESTAMP".to_string(),
33044 vec![],
33045 )));
33046 let convert = Expression::Function(Box::new(Function::new(
33047 "CONVERT_TIMEZONE".to_string(),
33048 vec![tz_arg, ct],
33049 )));
33050 Ok(Expression::Cast(Box::new(Cast {
33051 this: convert,
33052 to: DataType::Date,
33053 trailing_comments: vec![],
33054 double_colon_syntax: false,
33055 format: None,
33056 default: None,
33057 inferred_type: None,
33058 })))
33059 }
33060 _ => {
33061 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
33062 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
33063 Ok(Expression::AtTimeZone(Box::new(
33064 crate::expressions::AtTimeZone {
33065 this: cd,
33066 zone: tz_arg,
33067 },
33068 )))
33069 }
33070 }
33071 } else if (name == "CURRENT_TIMESTAMP"
33072 || name == "CURRENT_TIME"
33073 || name == "CURRENT_DATE")
33074 && args.is_empty()
33075 && matches!(
33076 target,
33077 DialectType::PostgreSQL
33078 | DialectType::DuckDB
33079 | DialectType::Presto
33080 | DialectType::Trino
33081 )
33082 {
33083 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
33084 if name == "CURRENT_TIMESTAMP" {
33085 Ok(Expression::CurrentTimestamp(
33086 crate::expressions::CurrentTimestamp {
33087 precision: None,
33088 sysdate: false,
33089 },
33090 ))
33091 } else if name == "CURRENT_DATE" {
33092 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
33093 } else {
33094 // CURRENT_TIME
33095 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
33096 precision: None,
33097 }))
33098 }
33099 } else {
33100 // All other targets: keep as function (with parens)
33101 Ok(Expression::Function(Box::new(Function::new(name, args))))
33102 }
33103 }
33104
33105 // JSON_QUERY(json, path) -> target-specific
33106 "JSON_QUERY" if args.len() == 2 => {
33107 match target {
33108 DialectType::DuckDB | DialectType::SQLite => {
33109 // json -> path syntax
33110 let json_expr = args.remove(0);
33111 let path = args.remove(0);
33112 Ok(Expression::JsonExtract(Box::new(
33113 crate::expressions::JsonExtractFunc {
33114 this: json_expr,
33115 path,
33116 returning: None,
33117 arrow_syntax: true,
33118 hash_arrow_syntax: false,
33119 wrapper_option: None,
33120 quotes_option: None,
33121 on_scalar_string: false,
33122 on_error: None,
33123 },
33124 )))
33125 }
33126 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
33127 Ok(Expression::Function(Box::new(Function::new(
33128 "GET_JSON_OBJECT".to_string(),
33129 args,
33130 ))))
33131 }
33132 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
33133 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
33134 )),
33135 _ => Ok(Expression::Function(Box::new(Function::new(
33136 "JSON_QUERY".to_string(),
33137 args,
33138 )))),
33139 }
33140 }
33141
33142 // JSON_VALUE_ARRAY(json, path) -> target-specific
33143 "JSON_VALUE_ARRAY" if args.len() == 2 => {
33144 match target {
33145 DialectType::DuckDB => {
33146 // CAST(json -> path AS TEXT[])
33147 let json_expr = args.remove(0);
33148 let path = args.remove(0);
33149 let arrow = Expression::JsonExtract(Box::new(
33150 crate::expressions::JsonExtractFunc {
33151 this: json_expr,
33152 path,
33153 returning: None,
33154 arrow_syntax: true,
33155 hash_arrow_syntax: false,
33156 wrapper_option: None,
33157 quotes_option: None,
33158 on_scalar_string: false,
33159 on_error: None,
33160 },
33161 ));
33162 Ok(Expression::Cast(Box::new(Cast {
33163 this: arrow,
33164 to: DataType::Array {
33165 element_type: Box::new(DataType::Text),
33166 dimension: None,
33167 },
33168 trailing_comments: vec![],
33169 double_colon_syntax: false,
33170 format: None,
33171 default: None,
33172 inferred_type: None,
33173 })))
33174 }
33175 DialectType::Snowflake => {
33176 let json_expr = args.remove(0);
33177 let path_expr = args.remove(0);
33178 // Convert JSON path from $.path to just path
33179 let sf_path = if let Expression::Literal(ref lit) = path_expr
33180 {
33181 if let Literal::String(ref s) = lit.as_ref() {
33182 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
33183 Expression::Literal(Box::new(Literal::String(trimmed.to_string())))
33184 } else { path_expr.clone() }
33185 } else {
33186 path_expr
33187 };
33188 let parse_json = Expression::Function(Box::new(Function::new(
33189 "PARSE_JSON".to_string(),
33190 vec![json_expr],
33191 )));
33192 let get_path = Expression::Function(Box::new(Function::new(
33193 "GET_PATH".to_string(),
33194 vec![parse_json, sf_path],
33195 )));
33196 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
33197 let cast_expr = Expression::Cast(Box::new(Cast {
33198 this: Expression::Identifier(Identifier::new("x")),
33199 to: DataType::VarChar {
33200 length: None,
33201 parenthesized_length: false,
33202 },
33203 trailing_comments: vec![],
33204 double_colon_syntax: false,
33205 format: None,
33206 default: None,
33207 inferred_type: None,
33208 }));
33209 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
33210 parameters: vec![Identifier::new("x")],
33211 body: cast_expr,
33212 colon: false,
33213 parameter_types: vec![],
33214 }));
33215 Ok(Expression::Function(Box::new(Function::new(
33216 "TRANSFORM".to_string(),
33217 vec![get_path, lambda],
33218 ))))
33219 }
33220 _ => Ok(Expression::Function(Box::new(Function::new(
33221 "JSON_VALUE_ARRAY".to_string(),
33222 args,
33223 )))),
33224 }
33225 }
33226
33227 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
33228 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
33229 // This is different from Hive/Spark where 3rd arg is "group_index"
33230 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
33231 match target {
33232 DialectType::DuckDB
33233 | DialectType::Presto
33234 | DialectType::Trino
33235 | DialectType::Athena => {
33236 if args.len() == 2 {
33237 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
33238 args.push(Expression::number(1));
33239 Ok(Expression::Function(Box::new(Function::new(
33240 "REGEXP_EXTRACT".to_string(),
33241 args,
33242 ))))
33243 } else if args.len() == 3 {
33244 let val = args.remove(0);
33245 let regex = args.remove(0);
33246 let position = args.remove(0);
33247 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
33248 if is_pos_1 {
33249 Ok(Expression::Function(Box::new(Function::new(
33250 "REGEXP_EXTRACT".to_string(),
33251 vec![val, regex, Expression::number(1)],
33252 ))))
33253 } else {
33254 let substring_expr = Expression::Function(Box::new(Function::new(
33255 "SUBSTRING".to_string(),
33256 vec![val, position],
33257 )));
33258 let nullif_expr = Expression::Function(Box::new(Function::new(
33259 "NULLIF".to_string(),
33260 vec![
33261 substring_expr,
33262 Expression::Literal(Box::new(Literal::String(String::new()))),
33263 ],
33264 )));
33265 Ok(Expression::Function(Box::new(Function::new(
33266 "REGEXP_EXTRACT".to_string(),
33267 vec![nullif_expr, regex, Expression::number(1)],
33268 ))))
33269 }
33270 } else if args.len() == 4 {
33271 let val = args.remove(0);
33272 let regex = args.remove(0);
33273 let position = args.remove(0);
33274 let occurrence = args.remove(0);
33275 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
33276 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
33277 if is_pos_1 && is_occ_1 {
33278 Ok(Expression::Function(Box::new(Function::new(
33279 "REGEXP_EXTRACT".to_string(),
33280 vec![val, regex, Expression::number(1)],
33281 ))))
33282 } else {
33283 let subject = if is_pos_1 {
33284 val
33285 } else {
33286 let substring_expr = Expression::Function(Box::new(
33287 Function::new("SUBSTRING".to_string(), vec![val, position]),
33288 ));
33289 Expression::Function(Box::new(Function::new(
33290 "NULLIF".to_string(),
33291 vec![
33292 substring_expr,
33293 Expression::Literal(Box::new(Literal::String(String::new()))),
33294 ],
33295 )))
33296 };
33297 let extract_all = Expression::Function(Box::new(Function::new(
33298 "REGEXP_EXTRACT_ALL".to_string(),
33299 vec![subject, regex, Expression::number(1)],
33300 )));
33301 Ok(Expression::Function(Box::new(Function::new(
33302 "ARRAY_EXTRACT".to_string(),
33303 vec![extract_all, occurrence],
33304 ))))
33305 }
33306 } else {
33307 Ok(Expression::Function(Box::new(Function {
33308 name: f.name,
33309 args,
33310 distinct: f.distinct,
33311 trailing_comments: f.trailing_comments,
33312 use_bracket_syntax: f.use_bracket_syntax,
33313 no_parens: f.no_parens,
33314 quoted: f.quoted,
33315 span: None,
33316 inferred_type: None,
33317 })))
33318 }
33319 }
33320 DialectType::Snowflake => {
33321 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
33322 Ok(Expression::Function(Box::new(Function::new(
33323 "REGEXP_SUBSTR".to_string(),
33324 args,
33325 ))))
33326 }
33327 _ => {
33328 // For other targets (Hive/Spark/BigQuery): pass through as-is
33329 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
33330 Ok(Expression::Function(Box::new(Function {
33331 name: f.name,
33332 args,
33333 distinct: f.distinct,
33334 trailing_comments: f.trailing_comments,
33335 use_bracket_syntax: f.use_bracket_syntax,
33336 no_parens: f.no_parens,
33337 quoted: f.quoted,
33338 span: None,
33339 inferred_type: None,
33340 })))
33341 }
33342 }
33343 }
33344
33345 // BigQuery STRUCT(args) -> target-specific struct expression
33346 "STRUCT" => {
33347 // Convert Function args to Struct fields
33348 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
33349 for (i, arg) in args.into_iter().enumerate() {
33350 match arg {
33351 Expression::Alias(a) => {
33352 // Named field: expr AS name
33353 fields.push((Some(a.alias.name.clone()), a.this));
33354 }
33355 other => {
33356 // Unnamed field: for Spark/Hive, keep as None
33357 // For Snowflake, auto-name as _N
33358 // For DuckDB, use column name for column refs, _N for others
33359 if matches!(target, DialectType::Snowflake) {
33360 fields.push((Some(format!("_{}", i)), other));
33361 } else if matches!(target, DialectType::DuckDB) {
33362 let auto_name = match &other {
33363 Expression::Column(col) => col.name.name.clone(),
33364 _ => format!("_{}", i),
33365 };
33366 fields.push((Some(auto_name), other));
33367 } else {
33368 fields.push((None, other));
33369 }
33370 }
33371 }
33372 }
33373
33374 match target {
33375 DialectType::Snowflake => {
33376 // OBJECT_CONSTRUCT('name', value, ...)
33377 let mut oc_args = Vec::new();
33378 for (name, val) in &fields {
33379 if let Some(n) = name {
33380 oc_args.push(Expression::Literal(Box::new(Literal::String(n.clone()))));
33381 oc_args.push(val.clone());
33382 } else {
33383 oc_args.push(val.clone());
33384 }
33385 }
33386 Ok(Expression::Function(Box::new(Function::new(
33387 "OBJECT_CONSTRUCT".to_string(),
33388 oc_args,
33389 ))))
33390 }
33391 DialectType::DuckDB => {
33392 // {'name': value, ...}
33393 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
33394 fields,
33395 })))
33396 }
33397 DialectType::Hive => {
33398 // STRUCT(val1, val2, ...) - strip aliases
33399 let hive_fields: Vec<(Option<String>, Expression)> =
33400 fields.into_iter().map(|(_, v)| (None, v)).collect();
33401 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
33402 fields: hive_fields,
33403 })))
33404 }
33405 DialectType::Spark | DialectType::Databricks => {
33406 // Use Expression::Struct to bypass Spark target transform auto-naming
33407 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
33408 fields,
33409 })))
33410 }
33411 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
33412 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
33413 let all_named =
33414 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
33415 let all_types_inferable = all_named
33416 && fields
33417 .iter()
33418 .all(|(_, val)| Self::can_infer_presto_type(val));
33419 let row_args: Vec<Expression> =
33420 fields.iter().map(|(_, v)| v.clone()).collect();
33421 let row_expr = Expression::Function(Box::new(Function::new(
33422 "ROW".to_string(),
33423 row_args,
33424 )));
33425 if all_named && all_types_inferable {
33426 // Build ROW type with inferred types
33427 let mut row_type_fields = Vec::new();
33428 for (name, val) in &fields {
33429 if let Some(n) = name {
33430 let type_str = Self::infer_sql_type_for_presto(val);
33431 row_type_fields.push(crate::expressions::StructField::new(
33432 n.clone(),
33433 crate::expressions::DataType::Custom { name: type_str },
33434 ));
33435 }
33436 }
33437 let row_type = crate::expressions::DataType::Struct {
33438 fields: row_type_fields,
33439 nested: true,
33440 };
33441 Ok(Expression::Cast(Box::new(Cast {
33442 this: row_expr,
33443 to: row_type,
33444 trailing_comments: Vec::new(),
33445 double_colon_syntax: false,
33446 format: None,
33447 default: None,
33448 inferred_type: None,
33449 })))
33450 } else {
33451 Ok(row_expr)
33452 }
33453 }
33454 _ => {
33455 // Default: keep as STRUCT function with original args
33456 let mut new_args = Vec::new();
33457 for (name, val) in fields {
33458 if let Some(n) = name {
33459 new_args.push(Expression::Alias(Box::new(
33460 crate::expressions::Alias::new(val, Identifier::new(n)),
33461 )));
33462 } else {
33463 new_args.push(val);
33464 }
33465 }
33466 Ok(Expression::Function(Box::new(Function::new(
33467 "STRUCT".to_string(),
33468 new_args,
33469 ))))
33470 }
33471 }
33472 }
33473
33474 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
33475 "ROUND" if args.len() == 3 => {
33476 let x = args.remove(0);
33477 let n = args.remove(0);
33478 let mode = args.remove(0);
33479 // Check if mode is 'ROUND_HALF_EVEN'
33480 let is_half_even = matches!(&mode, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN")));
33481 if is_half_even && matches!(target, DialectType::DuckDB) {
33482 Ok(Expression::Function(Box::new(Function::new(
33483 "ROUND_EVEN".to_string(),
33484 vec![x, n],
33485 ))))
33486 } else {
33487 // Pass through with all args
33488 Ok(Expression::Function(Box::new(Function::new(
33489 "ROUND".to_string(),
33490 vec![x, n, mode],
33491 ))))
33492 }
33493 }
33494
33495 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
33496 "MAKE_INTERVAL" => {
33497 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
33498 // The positional args are: year, month
33499 // Named args are: day =>, minute =>, etc.
33500 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
33501 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
33502 // For BigQuery->BigQuery: reorder named args (day before minute)
33503 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
33504 let mut parts: Vec<(String, String)> = Vec::new();
33505 let mut pos_idx = 0;
33506 let pos_units = ["year", "month"];
33507 for arg in &args {
33508 if let Expression::NamedArgument(na) = arg {
33509 // Named arg like minute => 5
33510 let unit = na.name.name.clone();
33511 if let Expression::Literal(lit) = &na.value {
33512 if let Literal::Number(n) = lit.as_ref() {
33513 parts.push((unit, n.clone()));
33514 }
33515 }
33516 } else if pos_idx < pos_units.len() {
33517 if let Expression::Literal(lit) = arg {
33518 if let Literal::Number(n) = lit.as_ref() {
33519 parts.push((pos_units[pos_idx].to_string(), n.clone()));
33520 }
33521 }
33522 pos_idx += 1;
33523 }
33524 }
33525 // Don't sort - preserve original argument order
33526 let separator = if matches!(target, DialectType::Snowflake) {
33527 ", "
33528 } else {
33529 " "
33530 };
33531 let interval_str = parts
33532 .iter()
33533 .map(|(u, v)| format!("{} {}", v, u))
33534 .collect::<Vec<_>>()
33535 .join(separator);
33536 Ok(Expression::Interval(Box::new(
33537 crate::expressions::Interval {
33538 this: Some(Expression::Literal(Box::new(Literal::String(interval_str)))),
33539 unit: None,
33540 },
33541 )))
33542 } else if matches!(target, DialectType::BigQuery) {
33543 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
33544 let mut positional = Vec::new();
33545 let mut named: Vec<(
33546 String,
33547 Expression,
33548 crate::expressions::NamedArgSeparator,
33549 )> = Vec::new();
33550 let _pos_units = ["year", "month"];
33551 let mut _pos_idx = 0;
33552 for arg in args {
33553 if let Expression::NamedArgument(na) = arg {
33554 named.push((na.name.name.clone(), na.value, na.separator));
33555 } else {
33556 positional.push(arg);
33557 _pos_idx += 1;
33558 }
33559 }
33560 // Sort named args by: day, hour, minute, second
33561 let unit_order = |u: &str| -> usize {
33562 match u.to_ascii_lowercase().as_str() {
33563 "day" => 0,
33564 "hour" => 1,
33565 "minute" => 2,
33566 "second" => 3,
33567 _ => 4,
33568 }
33569 };
33570 named.sort_by_key(|(u, _, _)| unit_order(u));
33571 let mut result_args = positional;
33572 for (name, value, sep) in named {
33573 result_args.push(Expression::NamedArgument(Box::new(
33574 crate::expressions::NamedArgument {
33575 name: Identifier::new(&name),
33576 value,
33577 separator: sep,
33578 },
33579 )));
33580 }
33581 Ok(Expression::Function(Box::new(Function::new(
33582 "MAKE_INTERVAL".to_string(),
33583 result_args,
33584 ))))
33585 } else {
33586 Ok(Expression::Function(Box::new(Function::new(
33587 "MAKE_INTERVAL".to_string(),
33588 args,
33589 ))))
33590 }
33591 }
33592
33593 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
33594 "ARRAY_TO_STRING" if args.len() == 3 => {
33595 let arr = args.remove(0);
33596 let sep = args.remove(0);
33597 let null_text = args.remove(0);
33598 match target {
33599 DialectType::DuckDB => {
33600 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
33601 let _lambda_param =
33602 Expression::Identifier(crate::expressions::Identifier::new("x"));
33603 let coalesce =
33604 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
33605 original_name: None,
33606 expressions: vec![
33607 Expression::Identifier(crate::expressions::Identifier::new(
33608 "x",
33609 )),
33610 null_text,
33611 ],
33612 inferred_type: None,
33613 }));
33614 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
33615 parameters: vec![crate::expressions::Identifier::new("x")],
33616 body: coalesce,
33617 colon: false,
33618 parameter_types: vec![],
33619 }));
33620 let list_transform = Expression::Function(Box::new(Function::new(
33621 "LIST_TRANSFORM".to_string(),
33622 vec![arr, lambda],
33623 )));
33624 Ok(Expression::Function(Box::new(Function::new(
33625 "ARRAY_TO_STRING".to_string(),
33626 vec![list_transform, sep],
33627 ))))
33628 }
33629 _ => Ok(Expression::Function(Box::new(Function::new(
33630 "ARRAY_TO_STRING".to_string(),
33631 vec![arr, sep, null_text],
33632 )))),
33633 }
33634 }
33635
33636 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
33637 "LENGTH" if args.len() == 1 => {
33638 let arg = args.remove(0);
33639 match target {
33640 DialectType::DuckDB => {
33641 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
33642 let typeof_func = Expression::Function(Box::new(Function::new(
33643 "TYPEOF".to_string(),
33644 vec![arg.clone()],
33645 )));
33646 let blob_cast = Expression::Cast(Box::new(Cast {
33647 this: arg.clone(),
33648 to: DataType::VarBinary { length: None },
33649 trailing_comments: vec![],
33650 double_colon_syntax: false,
33651 format: None,
33652 default: None,
33653 inferred_type: None,
33654 }));
33655 let octet_length = Expression::Function(Box::new(Function::new(
33656 "OCTET_LENGTH".to_string(),
33657 vec![blob_cast],
33658 )));
33659 let text_cast = Expression::Cast(Box::new(Cast {
33660 this: arg,
33661 to: DataType::Text,
33662 trailing_comments: vec![],
33663 double_colon_syntax: false,
33664 format: None,
33665 default: None,
33666 inferred_type: None,
33667 }));
33668 let length_text = Expression::Function(Box::new(Function::new(
33669 "LENGTH".to_string(),
33670 vec![text_cast],
33671 )));
33672 Ok(Expression::Case(Box::new(crate::expressions::Case {
33673 operand: Some(typeof_func),
33674 whens: vec![(
33675 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
33676 octet_length,
33677 )],
33678 else_: Some(length_text),
33679 comments: Vec::new(),
33680 inferred_type: None,
33681 })))
33682 }
33683 _ => Ok(Expression::Function(Box::new(Function::new(
33684 "LENGTH".to_string(),
33685 vec![arg],
33686 )))),
33687 }
33688 }
33689
33690 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
33691 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
33692 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
33693 // The args should be [x, fraction] with the null handling stripped
33694 // For DuckDB: QUANTILE_CONT(x, fraction)
33695 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
33696 match target {
33697 DialectType::DuckDB => {
33698 // Strip down to just 2 args, rename to QUANTILE_CONT
33699 let x = args[0].clone();
33700 let frac = args[1].clone();
33701 Ok(Expression::Function(Box::new(Function::new(
33702 "QUANTILE_CONT".to_string(),
33703 vec![x, frac],
33704 ))))
33705 }
33706 _ => Ok(Expression::Function(Box::new(Function::new(
33707 "PERCENTILE_CONT".to_string(),
33708 args,
33709 )))),
33710 }
33711 }
33712
33713 // All others: pass through
33714 _ => Ok(Expression::Function(Box::new(Function {
33715 name: f.name,
33716 args,
33717 distinct: f.distinct,
33718 trailing_comments: f.trailing_comments,
33719 use_bracket_syntax: f.use_bracket_syntax,
33720 no_parens: f.no_parens,
33721 quoted: f.quoted,
33722 span: None,
33723 inferred_type: None,
33724 }))),
33725 }
33726 }
33727
33728 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
33729 /// Returns false for column references and other non-literal expressions where the type is unknown.
33730 fn can_infer_presto_type(expr: &Expression) -> bool {
33731 match expr {
33732 Expression::Literal(_) => true,
33733 Expression::Boolean(_) => true,
33734 Expression::Array(_) | Expression::ArrayFunc(_) => true,
33735 Expression::Struct(_) | Expression::StructFunc(_) => true,
33736 Expression::Function(f) => {
33737 f.name.eq_ignore_ascii_case("STRUCT")
33738 || f.name.eq_ignore_ascii_case("ROW")
33739 || f.name.eq_ignore_ascii_case("CURRENT_DATE")
33740 || f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
33741 || f.name.eq_ignore_ascii_case("NOW")
33742 }
33743 Expression::Cast(_) => true,
33744 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
33745 _ => false,
33746 }
33747 }
33748
33749 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
33750 fn infer_sql_type_for_presto(expr: &Expression) -> String {
33751 use crate::expressions::Literal;
33752 match expr {
33753 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => "VARCHAR".to_string(),
33754 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
33755 let Literal::Number(n) = lit.as_ref() else { unreachable!() };
33756 if n.contains('.') {
33757 "DOUBLE".to_string()
33758 } else {
33759 "INTEGER".to_string()
33760 }
33761 }
33762 Expression::Boolean(_) => "BOOLEAN".to_string(),
33763 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => "DATE".to_string(),
33764 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => "TIMESTAMP".to_string(),
33765 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => "TIMESTAMP".to_string(),
33766 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
33767 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
33768 Expression::Function(f) => {
33769 if f.name.eq_ignore_ascii_case("STRUCT") || f.name.eq_ignore_ascii_case("ROW") {
33770 "ROW".to_string()
33771 } else if f.name.eq_ignore_ascii_case("CURRENT_DATE") {
33772 "DATE".to_string()
33773 } else if f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP") || f.name.eq_ignore_ascii_case("NOW") {
33774 "TIMESTAMP".to_string()
33775 } else {
33776 "VARCHAR".to_string()
33777 }
33778 }
33779 Expression::Cast(c) => {
33780 // If already cast, use the target type
33781 Self::data_type_to_presto_string(&c.to)
33782 }
33783 _ => "VARCHAR".to_string(),
33784 }
33785 }
33786
33787 /// Convert a DataType to its Presto/Trino string representation for ROW type
33788 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
33789 use crate::expressions::DataType;
33790 match dt {
33791 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
33792 "VARCHAR".to_string()
33793 }
33794 DataType::Int { .. }
33795 | DataType::BigInt { .. }
33796 | DataType::SmallInt { .. }
33797 | DataType::TinyInt { .. } => "INTEGER".to_string(),
33798 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
33799 DataType::Boolean => "BOOLEAN".to_string(),
33800 DataType::Date => "DATE".to_string(),
33801 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
33802 DataType::Struct { fields, .. } => {
33803 let field_strs: Vec<String> = fields
33804 .iter()
33805 .map(|f| {
33806 format!(
33807 "{} {}",
33808 f.name,
33809 Self::data_type_to_presto_string(&f.data_type)
33810 )
33811 })
33812 .collect();
33813 format!("ROW({})", field_strs.join(", "))
33814 }
33815 DataType::Array { element_type, .. } => {
33816 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
33817 }
33818 DataType::Custom { name } => {
33819 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
33820 name.clone()
33821 }
33822 _ => "VARCHAR".to_string(),
33823 }
33824 }
33825
33826 /// Convert IntervalUnit to string
33827 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> &'static str {
33828 match unit {
33829 crate::expressions::IntervalUnit::Year => "YEAR",
33830 crate::expressions::IntervalUnit::Quarter => "QUARTER",
33831 crate::expressions::IntervalUnit::Month => "MONTH",
33832 crate::expressions::IntervalUnit::Week => "WEEK",
33833 crate::expressions::IntervalUnit::Day => "DAY",
33834 crate::expressions::IntervalUnit::Hour => "HOUR",
33835 crate::expressions::IntervalUnit::Minute => "MINUTE",
33836 crate::expressions::IntervalUnit::Second => "SECOND",
33837 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
33838 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
33839 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
33840 }
33841 }
33842
33843 /// Extract unit string from an expression (uppercased)
33844 fn get_unit_str_static(expr: &Expression) -> String {
33845 use crate::expressions::Literal;
33846 match expr {
33847 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
33848 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; s.to_ascii_uppercase() },
33849 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
33850 Expression::Function(f) => {
33851 let base = f.name.to_ascii_uppercase();
33852 if !f.args.is_empty() {
33853 let inner = Self::get_unit_str_static(&f.args[0]);
33854 format!("{}({})", base, inner)
33855 } else {
33856 base
33857 }
33858 }
33859 _ => "DAY".to_string(),
33860 }
33861 }
33862
33863 /// Parse unit string to IntervalUnit
33864 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
33865 match s {
33866 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
33867 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
33868 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
33869 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
33870 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
33871 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
33872 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
33873 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
33874 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
33875 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
33876 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
33877 _ => crate::expressions::IntervalUnit::Day,
33878 }
33879 }
33880
33881 /// Convert expression to simple string for interval building
33882 fn expr_to_string_static(expr: &Expression) -> String {
33883 use crate::expressions::Literal;
33884 match expr {
33885 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => { let Literal::Number(s) = lit.as_ref() else { unreachable!() }; s.clone() },
33886 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; s.clone() },
33887 Expression::Identifier(id) => id.name.clone(),
33888 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
33889 _ => "1".to_string(),
33890 }
33891 }
33892
33893 /// Extract a simple string representation from a literal expression
33894 fn expr_to_string(expr: &Expression) -> String {
33895 use crate::expressions::Literal;
33896 match expr {
33897 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => { let Literal::Number(s) = lit.as_ref() else { unreachable!() }; s.clone() },
33898 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => { let Literal::String(s) = lit.as_ref() else { unreachable!() }; s.clone() },
33899 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
33900 Expression::Identifier(id) => id.name.clone(),
33901 _ => "1".to_string(),
33902 }
33903 }
33904
33905 /// Quote an interval value expression as a string literal if it's a number (or negated number)
33906 fn quote_interval_val(expr: &Expression) -> Expression {
33907 use crate::expressions::Literal;
33908 match expr {
33909 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
33910 let Literal::Number(n) = lit.as_ref() else { unreachable!() };
33911 Expression::Literal(Box::new(Literal::String(n.clone())))
33912 }
33913 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => expr.clone(),
33914 Expression::Neg(inner) => {
33915 if let Expression::Literal(lit) = &inner.this {
33916 if let Literal::Number(n) = lit.as_ref() {
33917 Expression::Literal(Box::new(Literal::String(format!("-{}", n))))
33918 } else { inner.this.clone() }
33919 } else {
33920 expr.clone()
33921 }
33922 }
33923 _ => expr.clone(),
33924 }
33925 }
33926
33927 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
33928 fn timestamp_string_has_timezone(ts: &str) -> bool {
33929 let trimmed = ts.trim();
33930 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
33931 if let Some(last_space) = trimmed.rfind(' ') {
33932 let suffix = &trimmed[last_space + 1..];
33933 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
33934 let rest = &suffix[1..];
33935 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
33936 return true;
33937 }
33938 }
33939 }
33940 // Check for named timezone abbreviations
33941 let ts_lower = trimmed.to_ascii_lowercase();
33942 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
33943 for abbrev in &tz_abbrevs {
33944 if ts_lower.ends_with(abbrev) {
33945 return true;
33946 }
33947 }
33948 false
33949 }
33950
33951 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
33952 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
33953 use crate::expressions::{Cast, DataType, Literal};
33954 match expr {
33955 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
33956 let Literal::Timestamp(s) = lit.as_ref() else { unreachable!() };
33957 let tz = func_name.starts_with("TIMESTAMP");
33958 Expression::Cast(Box::new(Cast {
33959 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
33960 to: if tz {
33961 DataType::Timestamp {
33962 timezone: true,
33963 precision: None,
33964 }
33965 } else {
33966 DataType::Timestamp {
33967 timezone: false,
33968 precision: None,
33969 }
33970 },
33971 trailing_comments: vec![],
33972 double_colon_syntax: false,
33973 format: None,
33974 default: None,
33975 inferred_type: None,
33976 }))
33977 }
33978 other => other,
33979 }
33980 }
33981
33982 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
33983 fn maybe_cast_ts(expr: Expression) -> Expression {
33984 use crate::expressions::{Cast, DataType, Literal};
33985 match expr {
33986 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => { let Literal::Timestamp(s) = lit.as_ref() else { unreachable!() }; Expression::Cast(Box::new(Cast {
33987 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
33988 to: DataType::Timestamp {
33989 timezone: false,
33990 precision: None,
33991 },
33992 trailing_comments: vec![],
33993 double_colon_syntax: false,
33994 format: None,
33995 default: None,
33996 inferred_type: None,
33997 })) },
33998 other => other,
33999 }
34000 }
34001
34002 /// Convert DATE 'x' literal to CAST('x' AS DATE)
34003 fn date_literal_to_cast(expr: Expression) -> Expression {
34004 use crate::expressions::{Cast, DataType, Literal};
34005 match expr {
34006 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => { let Literal::Date(s) = lit.as_ref() else { unreachable!() }; Expression::Cast(Box::new(Cast {
34007 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
34008 to: DataType::Date,
34009 trailing_comments: vec![],
34010 double_colon_syntax: false,
34011 format: None,
34012 default: None,
34013 inferred_type: None,
34014 })) },
34015 other => other,
34016 }
34017 }
34018
34019 /// Ensure an expression that should be a date is CAST(... AS DATE).
34020 /// Handles both DATE literals and string literals that look like dates.
34021 fn ensure_cast_date(expr: Expression) -> Expression {
34022 use crate::expressions::{Cast, DataType, Literal};
34023 match expr {
34024 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => { let Literal::Date(s) = lit.as_ref() else { unreachable!() }; Expression::Cast(Box::new(Cast {
34025 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
34026 to: DataType::Date,
34027 trailing_comments: vec![],
34028 double_colon_syntax: false,
34029 format: None,
34030 default: None,
34031 inferred_type: None,
34032 })) },
34033 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
34034 // String literal that should be a date -> CAST('s' AS DATE)
34035 Expression::Cast(Box::new(Cast {
34036 this: expr,
34037 to: DataType::Date,
34038 trailing_comments: vec![],
34039 double_colon_syntax: false,
34040 format: None,
34041 default: None,
34042 inferred_type: None,
34043 }))
34044 }
34045 // Already a CAST or other expression -> leave as-is
34046 other => other,
34047 }
34048 }
34049
34050 /// Force CAST(expr AS DATE) for any expression (not just literals)
34051 /// Skips if the expression is already a CAST to DATE
34052 fn force_cast_date(expr: Expression) -> Expression {
34053 use crate::expressions::{Cast, DataType};
34054 // If it's already a CAST to DATE, don't double-wrap
34055 if let Expression::Cast(ref c) = expr {
34056 if matches!(c.to, DataType::Date) {
34057 return expr;
34058 }
34059 }
34060 Expression::Cast(Box::new(Cast {
34061 this: expr,
34062 to: DataType::Date,
34063 trailing_comments: vec![],
34064 double_colon_syntax: false,
34065 format: None,
34066 default: None,
34067 inferred_type: None,
34068 }))
34069 }
34070
34071 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
34072 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
34073 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
34074 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
34075
34076 fn ensure_to_date_preserved(expr: Expression) -> Expression {
34077 use crate::expressions::{Function, Literal};
34078 if matches!(expr, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))) {
34079 Expression::Function(Box::new(Function::new(
34080 Self::PRESERVED_TO_DATE.to_string(),
34081 vec![expr],
34082 )))
34083 } else {
34084 expr
34085 }
34086 }
34087
34088 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
34089 fn try_cast_date(expr: Expression) -> Expression {
34090 use crate::expressions::{Cast, DataType};
34091 Expression::TryCast(Box::new(Cast {
34092 this: expr,
34093 to: DataType::Date,
34094 trailing_comments: vec![],
34095 double_colon_syntax: false,
34096 format: None,
34097 default: None,
34098 inferred_type: None,
34099 }))
34100 }
34101
34102 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
34103 fn double_cast_timestamp_date(expr: Expression) -> Expression {
34104 use crate::expressions::{Cast, DataType};
34105 let inner = Expression::Cast(Box::new(Cast {
34106 this: expr,
34107 to: DataType::Timestamp {
34108 timezone: false,
34109 precision: None,
34110 },
34111 trailing_comments: vec![],
34112 double_colon_syntax: false,
34113 format: None,
34114 default: None,
34115 inferred_type: None,
34116 }));
34117 Expression::Cast(Box::new(Cast {
34118 this: inner,
34119 to: DataType::Date,
34120 trailing_comments: vec![],
34121 double_colon_syntax: false,
34122 format: None,
34123 default: None,
34124 inferred_type: None,
34125 }))
34126 }
34127
34128 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
34129 fn double_cast_datetime_date(expr: Expression) -> Expression {
34130 use crate::expressions::{Cast, DataType};
34131 let inner = Expression::Cast(Box::new(Cast {
34132 this: expr,
34133 to: DataType::Custom {
34134 name: "DATETIME".to_string(),
34135 },
34136 trailing_comments: vec![],
34137 double_colon_syntax: false,
34138 format: None,
34139 default: None,
34140 inferred_type: None,
34141 }));
34142 Expression::Cast(Box::new(Cast {
34143 this: inner,
34144 to: DataType::Date,
34145 trailing_comments: vec![],
34146 double_colon_syntax: false,
34147 format: None,
34148 default: None,
34149 inferred_type: None,
34150 }))
34151 }
34152
34153 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
34154 fn double_cast_datetime2_date(expr: Expression) -> Expression {
34155 use crate::expressions::{Cast, DataType};
34156 let inner = Expression::Cast(Box::new(Cast {
34157 this: expr,
34158 to: DataType::Custom {
34159 name: "DATETIME2".to_string(),
34160 },
34161 trailing_comments: vec![],
34162 double_colon_syntax: false,
34163 format: None,
34164 default: None,
34165 inferred_type: None,
34166 }));
34167 Expression::Cast(Box::new(Cast {
34168 this: inner,
34169 to: DataType::Date,
34170 trailing_comments: vec![],
34171 double_colon_syntax: false,
34172 format: None,
34173 default: None,
34174 inferred_type: None,
34175 }))
34176 }
34177
34178 /// Convert Hive/Java-style date format strings to C-style (strftime) format
34179 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
34180 fn hive_format_to_c_format(fmt: &str) -> String {
34181 let mut result = String::new();
34182 let chars: Vec<char> = fmt.chars().collect();
34183 let mut i = 0;
34184 while i < chars.len() {
34185 match chars[i] {
34186 'y' => {
34187 let mut count = 0;
34188 while i < chars.len() && chars[i] == 'y' {
34189 count += 1;
34190 i += 1;
34191 }
34192 if count >= 4 {
34193 result.push_str("%Y");
34194 } else if count == 2 {
34195 result.push_str("%y");
34196 } else {
34197 result.push_str("%Y");
34198 }
34199 }
34200 'M' => {
34201 let mut count = 0;
34202 while i < chars.len() && chars[i] == 'M' {
34203 count += 1;
34204 i += 1;
34205 }
34206 if count >= 3 {
34207 result.push_str("%b");
34208 } else if count == 2 {
34209 result.push_str("%m");
34210 } else {
34211 result.push_str("%m");
34212 }
34213 }
34214 'd' => {
34215 let mut _count = 0;
34216 while i < chars.len() && chars[i] == 'd' {
34217 _count += 1;
34218 i += 1;
34219 }
34220 result.push_str("%d");
34221 }
34222 'H' => {
34223 let mut _count = 0;
34224 while i < chars.len() && chars[i] == 'H' {
34225 _count += 1;
34226 i += 1;
34227 }
34228 result.push_str("%H");
34229 }
34230 'h' => {
34231 let mut _count = 0;
34232 while i < chars.len() && chars[i] == 'h' {
34233 _count += 1;
34234 i += 1;
34235 }
34236 result.push_str("%I");
34237 }
34238 'm' => {
34239 let mut _count = 0;
34240 while i < chars.len() && chars[i] == 'm' {
34241 _count += 1;
34242 i += 1;
34243 }
34244 result.push_str("%M");
34245 }
34246 's' => {
34247 let mut _count = 0;
34248 while i < chars.len() && chars[i] == 's' {
34249 _count += 1;
34250 i += 1;
34251 }
34252 result.push_str("%S");
34253 }
34254 'S' => {
34255 // Fractional seconds - skip
34256 while i < chars.len() && chars[i] == 'S' {
34257 i += 1;
34258 }
34259 result.push_str("%f");
34260 }
34261 'a' => {
34262 // AM/PM
34263 while i < chars.len() && chars[i] == 'a' {
34264 i += 1;
34265 }
34266 result.push_str("%p");
34267 }
34268 'E' => {
34269 let mut count = 0;
34270 while i < chars.len() && chars[i] == 'E' {
34271 count += 1;
34272 i += 1;
34273 }
34274 if count >= 4 {
34275 result.push_str("%A");
34276 } else {
34277 result.push_str("%a");
34278 }
34279 }
34280 '\'' => {
34281 // Quoted literal text - pass through the quotes and content
34282 result.push('\'');
34283 i += 1;
34284 while i < chars.len() && chars[i] != '\'' {
34285 result.push(chars[i]);
34286 i += 1;
34287 }
34288 if i < chars.len() {
34289 result.push('\'');
34290 i += 1;
34291 }
34292 }
34293 c => {
34294 result.push(c);
34295 i += 1;
34296 }
34297 }
34298 }
34299 result
34300 }
34301
34302 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
34303 fn hive_format_to_presto_format(fmt: &str) -> String {
34304 let c_fmt = Self::hive_format_to_c_format(fmt);
34305 // Presto uses %T for HH:MM:SS
34306 c_fmt.replace("%H:%M:%S", "%T")
34307 }
34308
34309 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
34310 fn ensure_cast_timestamp(expr: Expression) -> Expression {
34311 use crate::expressions::{Cast, DataType, Literal};
34312 match expr {
34313 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => { let Literal::Timestamp(s) = lit.as_ref() else { unreachable!() }; Expression::Cast(Box::new(Cast {
34314 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
34315 to: DataType::Timestamp {
34316 timezone: false,
34317 precision: None,
34318 },
34319 trailing_comments: vec![],
34320 double_colon_syntax: false,
34321 format: None,
34322 default: None,
34323 inferred_type: None,
34324 })) },
34325 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
34326 this: expr,
34327 to: DataType::Timestamp {
34328 timezone: false,
34329 precision: None,
34330 },
34331 trailing_comments: vec![],
34332 double_colon_syntax: false,
34333 format: None,
34334 default: None,
34335 inferred_type: None,
34336 })),
34337 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => { let Literal::Datetime(s) = lit.as_ref() else { unreachable!() }; Expression::Cast(Box::new(Cast {
34338 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
34339 to: DataType::Timestamp {
34340 timezone: false,
34341 precision: None,
34342 },
34343 trailing_comments: vec![],
34344 double_colon_syntax: false,
34345 format: None,
34346 default: None,
34347 inferred_type: None,
34348 })) },
34349 other => other,
34350 }
34351 }
34352
34353 /// Force CAST to TIMESTAMP for any expression (not just literals)
34354 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
34355 fn force_cast_timestamp(expr: Expression) -> Expression {
34356 use crate::expressions::{Cast, DataType};
34357 // Don't double-wrap if already a CAST to TIMESTAMP
34358 if let Expression::Cast(ref c) = expr {
34359 if matches!(c.to, DataType::Timestamp { .. }) {
34360 return expr;
34361 }
34362 }
34363 Expression::Cast(Box::new(Cast {
34364 this: expr,
34365 to: DataType::Timestamp {
34366 timezone: false,
34367 precision: None,
34368 },
34369 trailing_comments: vec![],
34370 double_colon_syntax: false,
34371 format: None,
34372 default: None,
34373 inferred_type: None,
34374 }))
34375 }
34376
34377 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
34378 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
34379 use crate::expressions::{Cast, DataType, Literal};
34380 match expr {
34381 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => { let Literal::Timestamp(s) = lit.as_ref() else { unreachable!() }; Expression::Cast(Box::new(Cast {
34382 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
34383 to: DataType::Timestamp {
34384 timezone: true,
34385 precision: None,
34386 },
34387 trailing_comments: vec![],
34388 double_colon_syntax: false,
34389 format: None,
34390 default: None,
34391 inferred_type: None,
34392 })) },
34393 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
34394 this: expr,
34395 to: DataType::Timestamp {
34396 timezone: true,
34397 precision: None,
34398 },
34399 trailing_comments: vec![],
34400 double_colon_syntax: false,
34401 format: None,
34402 default: None,
34403 inferred_type: None,
34404 })),
34405 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => { let Literal::Datetime(s) = lit.as_ref() else { unreachable!() }; Expression::Cast(Box::new(Cast {
34406 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
34407 to: DataType::Timestamp {
34408 timezone: true,
34409 precision: None,
34410 },
34411 trailing_comments: vec![],
34412 double_colon_syntax: false,
34413 format: None,
34414 default: None,
34415 inferred_type: None,
34416 })) },
34417 other => other,
34418 }
34419 }
34420
34421 /// Ensure expression is CAST to DATETIME (for BigQuery)
34422 fn ensure_cast_datetime(expr: Expression) -> Expression {
34423 use crate::expressions::{Cast, DataType, Literal};
34424 match expr {
34425 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
34426 this: expr,
34427 to: DataType::Custom {
34428 name: "DATETIME".to_string(),
34429 },
34430 trailing_comments: vec![],
34431 double_colon_syntax: false,
34432 format: None,
34433 default: None,
34434 inferred_type: None,
34435 })),
34436 other => other,
34437 }
34438 }
34439
34440 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
34441 fn force_cast_datetime(expr: Expression) -> Expression {
34442 use crate::expressions::{Cast, DataType};
34443 if let Expression::Cast(ref c) = expr {
34444 if let DataType::Custom { ref name } = c.to {
34445 if name.eq_ignore_ascii_case("DATETIME") {
34446 return expr;
34447 }
34448 }
34449 }
34450 Expression::Cast(Box::new(Cast {
34451 this: expr,
34452 to: DataType::Custom {
34453 name: "DATETIME".to_string(),
34454 },
34455 trailing_comments: vec![],
34456 double_colon_syntax: false,
34457 format: None,
34458 default: None,
34459 inferred_type: None,
34460 }))
34461 }
34462
34463 /// Ensure expression is CAST to DATETIME2 (for TSQL)
34464 fn ensure_cast_datetime2(expr: Expression) -> Expression {
34465 use crate::expressions::{Cast, DataType, Literal};
34466 match expr {
34467 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => Expression::Cast(Box::new(Cast {
34468 this: expr,
34469 to: DataType::Custom {
34470 name: "DATETIME2".to_string(),
34471 },
34472 trailing_comments: vec![],
34473 double_colon_syntax: false,
34474 format: None,
34475 default: None,
34476 inferred_type: None,
34477 })),
34478 other => other,
34479 }
34480 }
34481
34482 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
34483 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
34484 use crate::expressions::{Cast, DataType, Literal};
34485 match expr {
34486 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => { let Literal::Timestamp(s) = lit.as_ref() else { unreachable!() }; Expression::Cast(Box::new(Cast {
34487 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
34488 to: DataType::Timestamp {
34489 timezone: true,
34490 precision: None,
34491 },
34492 trailing_comments: vec![],
34493 double_colon_syntax: false,
34494 format: None,
34495 default: None,
34496 inferred_type: None,
34497 })) },
34498 other => other,
34499 }
34500 }
34501
34502 /// Convert BigQuery format string to Snowflake format string
34503 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
34504 use crate::expressions::Literal;
34505 if let Expression::Literal(lit) = format_expr {
34506 if let Literal::String(s) = lit.as_ref() {
34507 let sf = s
34508 .replace("%Y", "yyyy")
34509 .replace("%m", "mm")
34510 .replace("%d", "DD")
34511 .replace("%H", "HH24")
34512 .replace("%M", "MI")
34513 .replace("%S", "SS")
34514 .replace("%b", "mon")
34515 .replace("%B", "Month")
34516 .replace("%e", "FMDD");
34517 Expression::Literal(Box::new(Literal::String(sf)))
34518 } else { format_expr.clone() }
34519 } else {
34520 format_expr.clone()
34521 }
34522 }
34523
34524 /// Convert BigQuery format string to DuckDB format string
34525 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
34526 use crate::expressions::Literal;
34527 if let Expression::Literal(lit) = format_expr {
34528 if let Literal::String(s) = lit.as_ref() {
34529 let duck = s
34530 .replace("%T", "%H:%M:%S")
34531 .replace("%F", "%Y-%m-%d")
34532 .replace("%D", "%m/%d/%y")
34533 .replace("%x", "%m/%d/%y")
34534 .replace("%c", "%a %b %-d %H:%M:%S %Y")
34535 .replace("%e", "%-d")
34536 .replace("%E6S", "%S.%f");
34537 Expression::Literal(Box::new(Literal::String(duck)))
34538 } else { format_expr.clone() }
34539 } else {
34540 format_expr.clone()
34541 }
34542 }
34543
34544 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
34545 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
34546 use crate::expressions::Literal;
34547 if let Expression::Literal(lit) = format_expr {
34548 if let Literal::String(s) = lit.as_ref() {
34549 // Replace format elements from longest to shortest to avoid partial matches
34550 let result = s
34551 .replace("YYYYMMDD", "%Y%m%d")
34552 .replace("YYYY", "%Y")
34553 .replace("YY", "%y")
34554 .replace("MONTH", "%B")
34555 .replace("MON", "%b")
34556 .replace("MM", "%m")
34557 .replace("DD", "%d")
34558 .replace("HH24", "%H")
34559 .replace("HH12", "%I")
34560 .replace("HH", "%I")
34561 .replace("MI", "%M")
34562 .replace("SSTZH", "%S%z")
34563 .replace("SS", "%S")
34564 .replace("TZH", "%z");
34565 Expression::Literal(Box::new(Literal::String(result)))
34566 } else { format_expr.clone() }
34567 } else {
34568 format_expr.clone()
34569 }
34570 }
34571
34572 /// Normalize BigQuery format strings for BQ->BQ output
34573 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
34574 use crate::expressions::Literal;
34575 if let Expression::Literal(lit) = format_expr {
34576 if let Literal::String(s) = lit.as_ref() {
34577 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
34578 Expression::Literal(Box::new(Literal::String(norm)))
34579 } else { format_expr.clone() }
34580 } else {
34581 format_expr.clone()
34582 }
34583 }
34584}
34585
34586#[cfg(test)]
34587mod tests {
34588 use super::*;
34589
34590 #[test]
34591 fn test_dialect_type_from_str() {
34592 assert_eq!(
34593 "postgres".parse::<DialectType>().unwrap(),
34594 DialectType::PostgreSQL
34595 );
34596 assert_eq!(
34597 "postgresql".parse::<DialectType>().unwrap(),
34598 DialectType::PostgreSQL
34599 );
34600 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
34601 assert_eq!(
34602 "bigquery".parse::<DialectType>().unwrap(),
34603 DialectType::BigQuery
34604 );
34605 }
34606
34607 #[test]
34608 fn test_basic_transpile() {
34609 let dialect = Dialect::get(DialectType::Generic);
34610 let result = dialect
34611 .transpile_to("SELECT 1", DialectType::PostgreSQL)
34612 .unwrap();
34613 assert_eq!(result.len(), 1);
34614 assert_eq!(result[0], "SELECT 1");
34615 }
34616
34617 #[test]
34618 fn test_function_transformation_mysql() {
34619 // NVL should be transformed to IFNULL in MySQL
34620 let dialect = Dialect::get(DialectType::Generic);
34621 let result = dialect
34622 .transpile_to("SELECT NVL(a, b)", DialectType::MySQL)
34623 .unwrap();
34624 assert_eq!(result[0], "SELECT IFNULL(a, b)");
34625 }
34626
34627 #[test]
34628 fn test_get_path_duckdb() {
34629 // Test: step by step
34630 let snowflake = Dialect::get(DialectType::Snowflake);
34631
34632 // Step 1: Parse and check what Snowflake produces as intermediate
34633 let result_sf_sf = snowflake
34634 .transpile_to(
34635 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
34636 DialectType::Snowflake,
34637 )
34638 .unwrap();
34639 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
34640
34641 // Step 2: DuckDB target
34642 let result_sf_dk = snowflake
34643 .transpile_to(
34644 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
34645 DialectType::DuckDB,
34646 )
34647 .unwrap();
34648 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
34649
34650 // Step 3: GET_PATH directly
34651 let result_gp = snowflake
34652 .transpile_to(
34653 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
34654 DialectType::DuckDB,
34655 )
34656 .unwrap();
34657 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
34658 }
34659
34660 #[test]
34661 fn test_function_transformation_postgres() {
34662 // IFNULL should be transformed to COALESCE in PostgreSQL
34663 let dialect = Dialect::get(DialectType::Generic);
34664 let result = dialect
34665 .transpile_to("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
34666 .unwrap();
34667 assert_eq!(result[0], "SELECT COALESCE(a, b)");
34668
34669 // NVL should also be transformed to COALESCE
34670 let result = dialect
34671 .transpile_to("SELECT NVL(a, b)", DialectType::PostgreSQL)
34672 .unwrap();
34673 assert_eq!(result[0], "SELECT COALESCE(a, b)");
34674 }
34675
34676 #[test]
34677 fn test_hive_cast_to_trycast() {
34678 // Hive CAST should become TRY_CAST for targets that support it
34679 let hive = Dialect::get(DialectType::Hive);
34680 let result = hive
34681 .transpile_to("CAST(1 AS INT)", DialectType::DuckDB)
34682 .unwrap();
34683 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
34684
34685 let result = hive
34686 .transpile_to("CAST(1 AS INT)", DialectType::Presto)
34687 .unwrap();
34688 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
34689 }
34690
34691 #[test]
34692 fn test_hive_array_identity() {
34693 // Hive ARRAY<DATE> should preserve angle bracket syntax
34694 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
34695 let hive = Dialect::get(DialectType::Hive);
34696
34697 // Test via transpile_to (this works)
34698 let result = hive.transpile_to(sql, DialectType::Hive).unwrap();
34699 eprintln!("Hive ARRAY via transpile_to: {}", result[0]);
34700 assert!(
34701 result[0].contains("ARRAY<DATE>"),
34702 "transpile_to: Expected ARRAY<DATE>, got: {}",
34703 result[0]
34704 );
34705
34706 // Test via parse -> transform -> generate (identity test path)
34707 let ast = hive.parse(sql).unwrap();
34708 let transformed = hive.transform(ast[0].clone()).unwrap();
34709 let output = hive.generate(&transformed).unwrap();
34710 eprintln!("Hive ARRAY via identity path: {}", output);
34711 assert!(
34712 output.contains("ARRAY<DATE>"),
34713 "identity path: Expected ARRAY<DATE>, got: {}",
34714 output
34715 );
34716 }
34717
34718 #[test]
34719 fn test_starrocks_delete_between_expansion() {
34720 // StarRocks doesn't support BETWEEN in DELETE statements
34721 let dialect = Dialect::get(DialectType::Generic);
34722
34723 // BETWEEN should be expanded to >= AND <= in DELETE
34724 let result = dialect
34725 .transpile_to(
34726 "DELETE FROM t WHERE a BETWEEN b AND c",
34727 DialectType::StarRocks,
34728 )
34729 .unwrap();
34730 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
34731
34732 // NOT BETWEEN should be expanded to < OR > in DELETE
34733 let result = dialect
34734 .transpile_to(
34735 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
34736 DialectType::StarRocks,
34737 )
34738 .unwrap();
34739 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
34740
34741 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
34742 let result = dialect
34743 .transpile_to(
34744 "SELECT * FROM t WHERE a BETWEEN b AND c",
34745 DialectType::StarRocks,
34746 )
34747 .unwrap();
34748 assert!(
34749 result[0].contains("BETWEEN"),
34750 "BETWEEN should be preserved in SELECT"
34751 );
34752 }
34753
34754 #[test]
34755 fn test_snowflake_ltrim_rtrim_parse() {
34756 let sf = Dialect::get(DialectType::Snowflake);
34757 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
34758 let result = sf.transpile_to(sql, DialectType::DuckDB);
34759 match &result {
34760 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
34761 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
34762 }
34763 assert!(
34764 result.is_ok(),
34765 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
34766 result.err()
34767 );
34768 }
34769
34770 #[test]
34771 fn test_duckdb_count_if_parse() {
34772 let duck = Dialect::get(DialectType::DuckDB);
34773 let sql = "COUNT_IF(x)";
34774 let result = duck.transpile_to(sql, DialectType::DuckDB);
34775 match &result {
34776 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
34777 Err(e) => eprintln!("COUNT_IF error: {}", e),
34778 }
34779 assert!(
34780 result.is_ok(),
34781 "Expected successful parse of COUNT_IF(x), got error: {:?}",
34782 result.err()
34783 );
34784 }
34785
34786 #[test]
34787 fn test_tsql_cast_tinyint_parse() {
34788 let tsql = Dialect::get(DialectType::TSQL);
34789 let sql = "CAST(X AS TINYINT)";
34790 let result = tsql.transpile_to(sql, DialectType::DuckDB);
34791 match &result {
34792 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
34793 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
34794 }
34795 assert!(
34796 result.is_ok(),
34797 "Expected successful transpile, got error: {:?}",
34798 result.err()
34799 );
34800 }
34801
34802 #[test]
34803 fn test_pg_hash_bitwise_xor() {
34804 let dialect = Dialect::get(DialectType::PostgreSQL);
34805 let result = dialect
34806 .transpile_to("x # y", DialectType::PostgreSQL)
34807 .unwrap();
34808 assert_eq!(result[0], "x # y");
34809 }
34810
34811 #[test]
34812 fn test_pg_array_to_duckdb() {
34813 let dialect = Dialect::get(DialectType::PostgreSQL);
34814 let result = dialect
34815 .transpile_to("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
34816 .unwrap();
34817 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
34818 }
34819
34820 #[test]
34821 fn test_array_remove_bigquery() {
34822 let dialect = Dialect::get(DialectType::Generic);
34823 let result = dialect
34824 .transpile_to("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
34825 .unwrap();
34826 assert_eq!(
34827 result[0],
34828 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
34829 );
34830 }
34831
34832 #[test]
34833 fn test_map_clickhouse_case() {
34834 let dialect = Dialect::get(DialectType::Generic);
34835 let parsed = dialect
34836 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
34837 .unwrap();
34838 eprintln!("MAP parsed: {:?}", parsed);
34839 let result = dialect
34840 .transpile_to(
34841 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
34842 DialectType::ClickHouse,
34843 )
34844 .unwrap();
34845 eprintln!("MAP result: {}", result[0]);
34846 }
34847
34848 #[test]
34849 fn test_generate_date_array_presto() {
34850 let dialect = Dialect::get(DialectType::Generic);
34851 let result = dialect.transpile_to(
34852 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
34853 DialectType::Presto,
34854 ).unwrap();
34855 eprintln!("GDA -> Presto: {}", result[0]);
34856 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
34857 }
34858
34859 #[test]
34860 fn test_generate_date_array_postgres() {
34861 let dialect = Dialect::get(DialectType::Generic);
34862 let result = dialect.transpile_to(
34863 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
34864 DialectType::PostgreSQL,
34865 ).unwrap();
34866 eprintln!("GDA -> PostgreSQL: {}", result[0]);
34867 }
34868
34869 #[test]
34870 fn test_generate_date_array_snowflake() {
34871 std::thread::Builder::new()
34872 .stack_size(16 * 1024 * 1024)
34873 .spawn(|| {
34874 let dialect = Dialect::get(DialectType::Generic);
34875 let result = dialect.transpile_to(
34876 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
34877 DialectType::Snowflake,
34878 ).unwrap();
34879 eprintln!("GDA -> Snowflake: {}", result[0]);
34880 })
34881 .unwrap()
34882 .join()
34883 .unwrap();
34884 }
34885
34886 #[test]
34887 fn test_array_length_generate_date_array_snowflake() {
34888 let dialect = Dialect::get(DialectType::Generic);
34889 let result = dialect.transpile_to(
34890 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
34891 DialectType::Snowflake,
34892 ).unwrap();
34893 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
34894 }
34895
34896 #[test]
34897 fn test_generate_date_array_mysql() {
34898 let dialect = Dialect::get(DialectType::Generic);
34899 let result = dialect.transpile_to(
34900 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
34901 DialectType::MySQL,
34902 ).unwrap();
34903 eprintln!("GDA -> MySQL: {}", result[0]);
34904 }
34905
34906 #[test]
34907 fn test_generate_date_array_redshift() {
34908 let dialect = Dialect::get(DialectType::Generic);
34909 let result = dialect.transpile_to(
34910 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
34911 DialectType::Redshift,
34912 ).unwrap();
34913 eprintln!("GDA -> Redshift: {}", result[0]);
34914 }
34915
34916 #[test]
34917 fn test_generate_date_array_tsql() {
34918 let dialect = Dialect::get(DialectType::Generic);
34919 let result = dialect.transpile_to(
34920 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
34921 DialectType::TSQL,
34922 ).unwrap();
34923 eprintln!("GDA -> TSQL: {}", result[0]);
34924 }
34925
34926 #[test]
34927 fn test_struct_colon_syntax() {
34928 let dialect = Dialect::get(DialectType::Generic);
34929 // Test without colon first
34930 let result = dialect.transpile_to(
34931 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
34932 DialectType::ClickHouse,
34933 );
34934 match result {
34935 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
34936 Err(e) => eprintln!("STRUCT no colon error: {}", e),
34937 }
34938 // Now test with colon
34939 let result = dialect.transpile_to(
34940 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
34941 DialectType::ClickHouse,
34942 );
34943 match result {
34944 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
34945 Err(e) => eprintln!("STRUCT colon error: {}", e),
34946 }
34947 }
34948
34949 #[test]
34950 fn test_generate_date_array_cte_wrapped_mysql() {
34951 let dialect = Dialect::get(DialectType::Generic);
34952 let result = dialect.transpile_to(
34953 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
34954 DialectType::MySQL,
34955 ).unwrap();
34956 eprintln!("GDA CTE -> MySQL: {}", result[0]);
34957 }
34958
34959 #[test]
34960 fn test_generate_date_array_cte_wrapped_tsql() {
34961 let dialect = Dialect::get(DialectType::Generic);
34962 let result = dialect.transpile_to(
34963 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
34964 DialectType::TSQL,
34965 ).unwrap();
34966 eprintln!("GDA CTE -> TSQL: {}", result[0]);
34967 }
34968
34969 #[test]
34970 fn test_decode_literal_no_null_check() {
34971 // Oracle DECODE with all literals should produce simple equality, no IS NULL
34972 let dialect = Dialect::get(DialectType::Oracle);
34973 let result = dialect
34974 .transpile_to("SELECT decode(1,2,3,4)", DialectType::DuckDB)
34975 .unwrap();
34976 assert_eq!(
34977 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
34978 "Literal DECODE should not have IS NULL checks"
34979 );
34980 }
34981
34982 #[test]
34983 fn test_decode_column_vs_literal_no_null_check() {
34984 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
34985 let dialect = Dialect::get(DialectType::Oracle);
34986 let result = dialect
34987 .transpile_to("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
34988 .unwrap();
34989 assert_eq!(
34990 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
34991 "Column vs literal DECODE should not have IS NULL checks"
34992 );
34993 }
34994
34995 #[test]
34996 fn test_decode_column_vs_column_keeps_null_check() {
34997 // Oracle DECODE with column vs column should keep null-safe comparison
34998 let dialect = Dialect::get(DialectType::Oracle);
34999 let result = dialect
35000 .transpile_to("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
35001 .unwrap();
35002 assert!(
35003 result[0].contains("IS NULL"),
35004 "Column vs column DECODE should have IS NULL checks, got: {}",
35005 result[0]
35006 );
35007 }
35008
35009 #[test]
35010 fn test_decode_null_search() {
35011 // Oracle DECODE with NULL search should use IS NULL
35012 let dialect = Dialect::get(DialectType::Oracle);
35013 let result = dialect
35014 .transpile_to("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
35015 .unwrap();
35016 assert_eq!(
35017 result[0],
35018 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
35019 );
35020 }
35021
35022 // =========================================================================
35023 // REGEXP function transpilation tests
35024 // =========================================================================
35025
35026 #[test]
35027 fn test_regexp_substr_snowflake_to_duckdb_2arg() {
35028 let dialect = Dialect::get(DialectType::Snowflake);
35029 let result = dialect
35030 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern')", DialectType::DuckDB)
35031 .unwrap();
35032 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
35033 }
35034
35035 #[test]
35036 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos1() {
35037 let dialect = Dialect::get(DialectType::Snowflake);
35038 let result = dialect
35039 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern', 1)", DialectType::DuckDB)
35040 .unwrap();
35041 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
35042 }
35043
35044 #[test]
35045 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos_gt1() {
35046 let dialect = Dialect::get(DialectType::Snowflake);
35047 let result = dialect
35048 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern', 3)", DialectType::DuckDB)
35049 .unwrap();
35050 assert_eq!(
35051 result[0],
35052 "SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(s, 3), ''), 'pattern')"
35053 );
35054 }
35055
35056 #[test]
35057 fn test_regexp_substr_snowflake_to_duckdb_4arg_occ_gt1() {
35058 let dialect = Dialect::get(DialectType::Snowflake);
35059 let result = dialect
35060 .transpile_to("SELECT REGEXP_SUBSTR(s, 'pattern', 1, 3)", DialectType::DuckDB)
35061 .unwrap();
35062 assert_eq!(
35063 result[0],
35064 "SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, 'pattern'), 3)"
35065 );
35066 }
35067
35068 #[test]
35069 fn test_regexp_substr_snowflake_to_duckdb_5arg_e_flag() {
35070 let dialect = Dialect::get(DialectType::Snowflake);
35071 let result = dialect
35072 .transpile_to(
35073 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')",
35074 DialectType::DuckDB,
35075 )
35076 .unwrap();
35077 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
35078 }
35079
35080 #[test]
35081 fn test_regexp_substr_snowflake_to_duckdb_6arg_group0() {
35082 let dialect = Dialect::get(DialectType::Snowflake);
35083 let result = dialect
35084 .transpile_to(
35085 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
35086 DialectType::DuckDB,
35087 )
35088 .unwrap();
35089 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
35090 }
35091
35092 #[test]
35093 fn test_regexp_substr_snowflake_identity_strip_group0() {
35094 let dialect = Dialect::get(DialectType::Snowflake);
35095 let result = dialect
35096 .transpile_to(
35097 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
35098 DialectType::Snowflake,
35099 )
35100 .unwrap();
35101 assert_eq!(
35102 result[0],
35103 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')"
35104 );
35105 }
35106
35107 #[test]
35108 fn test_regexp_substr_all_snowflake_to_duckdb_2arg() {
35109 let dialect = Dialect::get(DialectType::Snowflake);
35110 let result = dialect
35111 .transpile_to("SELECT REGEXP_SUBSTR_ALL(s, 'pattern')", DialectType::DuckDB)
35112 .unwrap();
35113 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
35114 }
35115
35116 #[test]
35117 fn test_regexp_substr_all_snowflake_to_duckdb_3arg_pos_gt1() {
35118 let dialect = Dialect::get(DialectType::Snowflake);
35119 let result = dialect
35120 .transpile_to(
35121 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 3)",
35122 DialectType::DuckDB,
35123 )
35124 .unwrap();
35125 assert_eq!(
35126 result[0],
35127 "SELECT REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')"
35128 );
35129 }
35130
35131 #[test]
35132 fn test_regexp_substr_all_snowflake_to_duckdb_5arg_e_flag() {
35133 let dialect = Dialect::get(DialectType::Snowflake);
35134 let result = dialect
35135 .transpile_to(
35136 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')",
35137 DialectType::DuckDB,
35138 )
35139 .unwrap();
35140 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
35141 }
35142
35143 #[test]
35144 fn test_regexp_substr_all_snowflake_to_duckdb_6arg_group0() {
35145 let dialect = Dialect::get(DialectType::Snowflake);
35146 let result = dialect
35147 .transpile_to(
35148 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
35149 DialectType::DuckDB,
35150 )
35151 .unwrap();
35152 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
35153 }
35154
35155 #[test]
35156 fn test_regexp_substr_all_snowflake_identity_strip_group0() {
35157 let dialect = Dialect::get(DialectType::Snowflake);
35158 let result = dialect
35159 .transpile_to(
35160 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
35161 DialectType::Snowflake,
35162 )
35163 .unwrap();
35164 assert_eq!(
35165 result[0],
35166 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')"
35167 );
35168 }
35169
35170 #[test]
35171 fn test_regexp_count_snowflake_to_duckdb_2arg() {
35172 let dialect = Dialect::get(DialectType::Snowflake);
35173 let result = dialect
35174 .transpile_to("SELECT REGEXP_COUNT(s, 'pattern')", DialectType::DuckDB)
35175 .unwrap();
35176 assert_eq!(
35177 result[0],
35178 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, 'pattern')) END"
35179 );
35180 }
35181
35182 #[test]
35183 fn test_regexp_count_snowflake_to_duckdb_3arg() {
35184 let dialect = Dialect::get(DialectType::Snowflake);
35185 let result = dialect
35186 .transpile_to("SELECT REGEXP_COUNT(s, 'pattern', 3)", DialectType::DuckDB)
35187 .unwrap();
35188 assert_eq!(
35189 result[0],
35190 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')) END"
35191 );
35192 }
35193
35194 #[test]
35195 fn test_regexp_count_snowflake_to_duckdb_4arg_flags() {
35196 let dialect = Dialect::get(DialectType::Snowflake);
35197 let result = dialect
35198 .transpile_to(
35199 "SELECT REGEXP_COUNT(s, 'pattern', 1, 'i')",
35200 DialectType::DuckDB,
35201 )
35202 .unwrap();
35203 assert_eq!(
35204 result[0],
35205 "SELECT CASE WHEN '(?i)' || 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 1), '(?i)' || 'pattern')) END"
35206 );
35207 }
35208
35209 #[test]
35210 fn test_regexp_count_snowflake_to_duckdb_4arg_flags_literal_string() {
35211 let dialect = Dialect::get(DialectType::Snowflake);
35212 let result = dialect
35213 .transpile_to(
35214 "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')",
35215 DialectType::DuckDB,
35216 )
35217 .unwrap();
35218 assert_eq!(
35219 result[0],
35220 "SELECT CASE WHEN '(?im)' || 'L' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('Hello World', 1), '(?im)' || 'L')) END"
35221 );
35222 }
35223
35224 #[test]
35225 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos1_occ1() {
35226 let dialect = Dialect::get(DialectType::Snowflake);
35227 let result = dialect
35228 .transpile_to(
35229 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 1, 1)",
35230 DialectType::DuckDB,
35231 )
35232 .unwrap();
35233 assert_eq!(result[0], "SELECT REGEXP_REPLACE(s, 'pattern', 'repl')");
35234 }
35235
35236 #[test]
35237 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ0() {
35238 let dialect = Dialect::get(DialectType::Snowflake);
35239 let result = dialect
35240 .transpile_to(
35241 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 0)",
35242 DialectType::DuckDB,
35243 )
35244 .unwrap();
35245 assert_eq!(
35246 result[0],
35247 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl', 'g')"
35248 );
35249 }
35250
35251 #[test]
35252 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ1() {
35253 let dialect = Dialect::get(DialectType::Snowflake);
35254 let result = dialect
35255 .transpile_to(
35256 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 1)",
35257 DialectType::DuckDB,
35258 )
35259 .unwrap();
35260 assert_eq!(
35261 result[0],
35262 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl')"
35263 );
35264 }
35265
35266 #[test]
35267 fn test_rlike_snowflake_to_duckdb_2arg() {
35268 let dialect = Dialect::get(DialectType::Snowflake);
35269 let result = dialect
35270 .transpile_to("SELECT RLIKE(a, b)", DialectType::DuckDB)
35271 .unwrap();
35272 assert_eq!(
35273 result[0],
35274 "SELECT REGEXP_MATCHES(a, '^(' || (b) || ')$')"
35275 );
35276 }
35277
35278 #[test]
35279 fn test_rlike_snowflake_to_duckdb_3arg_flags() {
35280 let dialect = Dialect::get(DialectType::Snowflake);
35281 let result = dialect
35282 .transpile_to("SELECT RLIKE(a, b, 'i')", DialectType::DuckDB)
35283 .unwrap();
35284 assert_eq!(
35285 result[0],
35286 "SELECT REGEXP_MATCHES(a, '^(' || (b) || ')$', 'i')"
35287 );
35288 }
35289
35290 #[test]
35291 fn test_regexp_extract_all_bigquery_to_snowflake_no_capture() {
35292 let dialect = Dialect::get(DialectType::BigQuery);
35293 let result = dialect
35294 .transpile_to(
35295 "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')",
35296 DialectType::Snowflake,
35297 )
35298 .unwrap();
35299 assert_eq!(result[0], "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')");
35300 }
35301
35302 #[test]
35303 fn test_regexp_extract_all_bigquery_to_snowflake_with_capture() {
35304 let dialect = Dialect::get(DialectType::BigQuery);
35305 let result = dialect
35306 .transpile_to(
35307 "SELECT REGEXP_EXTRACT_ALL(s, '(a)[0-9]')",
35308 DialectType::Snowflake,
35309 )
35310 .unwrap();
35311 assert_eq!(
35312 result[0],
35313 "SELECT REGEXP_SUBSTR_ALL(s, '(a)[0-9]', 1, 1, 'c', 1)"
35314 );
35315 }
35316
35317 #[test]
35318 fn test_regexp_instr_snowflake_to_duckdb_2arg() {
35319 let handle = std::thread::Builder::new()
35320 .stack_size(16 * 1024 * 1024)
35321 .spawn(|| {
35322 let dialect = Dialect::get(DialectType::Snowflake);
35323 let result = dialect
35324 .transpile_to("SELECT REGEXP_INSTR(s, 'pattern')", DialectType::DuckDB)
35325 .unwrap();
35326 // Should produce a CASE WHEN expression
35327 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN in result: {}", result[0]);
35328 assert!(result[0].contains("LIST_SUM"), "Expected LIST_SUM in result: {}", result[0]);
35329 })
35330 .unwrap();
35331 handle.join().unwrap();
35332 }
35333
35334 #[test]
35335 fn test_array_except_generic_to_duckdb() {
35336 // Use larger stack to avoid overflow from deeply nested expression Drop
35337 let handle = std::thread::Builder::new()
35338 .stack_size(16 * 1024 * 1024)
35339 .spawn(|| {
35340 let dialect = Dialect::get(DialectType::Generic);
35341 let result = dialect
35342 .transpile_to("SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))", DialectType::DuckDB)
35343 .unwrap();
35344 eprintln!("ARRAY_EXCEPT Generic->DuckDB: {}", result[0]);
35345 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN: {}", result[0]);
35346 assert!(result[0].contains("LIST_FILTER"), "Expected LIST_FILTER: {}", result[0]);
35347 assert!(result[0].contains("LIST_DISTINCT"), "Expected LIST_DISTINCT: {}", result[0]);
35348 assert!(result[0].contains("IS NOT DISTINCT FROM"), "Expected IS NOT DISTINCT FROM: {}", result[0]);
35349 assert!(result[0].contains("= 0"), "Expected = 0 filter: {}", result[0]);
35350 })
35351 .unwrap();
35352 handle.join().unwrap();
35353 }
35354
35355 #[test]
35356 fn test_array_except_generic_to_snowflake() {
35357 let dialect = Dialect::get(DialectType::Generic);
35358 let result = dialect
35359 .transpile_to("SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))", DialectType::Snowflake)
35360 .unwrap();
35361 eprintln!("ARRAY_EXCEPT Generic->Snowflake: {}", result[0]);
35362 assert_eq!(result[0], "SELECT ARRAY_EXCEPT([1, 2, 3], [2])");
35363 }
35364
35365 #[test]
35366 fn test_array_except_generic_to_presto() {
35367 let dialect = Dialect::get(DialectType::Generic);
35368 let result = dialect
35369 .transpile_to("SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))", DialectType::Presto)
35370 .unwrap();
35371 eprintln!("ARRAY_EXCEPT Generic->Presto: {}", result[0]);
35372 assert_eq!(result[0], "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])");
35373 }
35374
35375 #[test]
35376 fn test_array_except_snowflake_to_duckdb() {
35377 let handle = std::thread::Builder::new()
35378 .stack_size(16 * 1024 * 1024)
35379 .spawn(|| {
35380 let dialect = Dialect::get(DialectType::Snowflake);
35381 let result = dialect
35382 .transpile_to("SELECT ARRAY_EXCEPT([1, 2, 3], [2])", DialectType::DuckDB)
35383 .unwrap();
35384 eprintln!("ARRAY_EXCEPT Snowflake->DuckDB: {}", result[0]);
35385 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN: {}", result[0]);
35386 assert!(result[0].contains("LIST_TRANSFORM"), "Expected LIST_TRANSFORM: {}", result[0]);
35387 })
35388 .unwrap();
35389 handle.join().unwrap();
35390 }
35391
35392 #[test]
35393 fn test_array_contains_snowflake_to_snowflake() {
35394 let dialect = Dialect::get(DialectType::Snowflake);
35395 let result = dialect
35396 .transpile_to("SELECT ARRAY_CONTAINS(x, [1, NULL, 3])", DialectType::Snowflake)
35397 .unwrap();
35398 eprintln!("ARRAY_CONTAINS Snowflake->Snowflake: {}", result[0]);
35399 assert_eq!(result[0], "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])");
35400 }
35401
35402 #[test]
35403 fn test_array_contains_snowflake_to_duckdb() {
35404 let dialect = Dialect::get(DialectType::Snowflake);
35405 let result = dialect
35406 .transpile_to("SELECT ARRAY_CONTAINS(x, [1, NULL, 3])", DialectType::DuckDB)
35407 .unwrap();
35408 eprintln!("ARRAY_CONTAINS Snowflake->DuckDB: {}", result[0]);
35409 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN: {}", result[0]);
35410 assert!(result[0].contains("NULLIF"), "Expected NULLIF: {}", result[0]);
35411 assert!(result[0].contains("ARRAY_CONTAINS"), "Expected ARRAY_CONTAINS: {}", result[0]);
35412 }
35413
35414 #[test]
35415 fn test_array_distinct_snowflake_to_duckdb() {
35416 let dialect = Dialect::get(DialectType::Snowflake);
35417 let result = dialect
35418 .transpile_to("SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])", DialectType::DuckDB)
35419 .unwrap();
35420 eprintln!("ARRAY_DISTINCT Snowflake->DuckDB: {}", result[0]);
35421 assert!(result[0].contains("CASE WHEN"), "Expected CASE WHEN: {}", result[0]);
35422 assert!(result[0].contains("LIST_DISTINCT"), "Expected LIST_DISTINCT: {}", result[0]);
35423 assert!(result[0].contains("LIST_APPEND"), "Expected LIST_APPEND: {}", result[0]);
35424 assert!(result[0].contains("LIST_FILTER"), "Expected LIST_FILTER: {}", result[0]);
35425 }
35426}