polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile`](Dialect::transpile) to another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{
162 Expression, From, Function, FunctionBody, Identifier, Join, Null, OrderBy, OutputClause,
163 TableRef, With,
164};
165use crate::generator::{Generator, GeneratorConfig};
166use crate::parser::Parser;
167use crate::tokens::{Token, TokenType, Tokenizer, TokenizerConfig};
168use serde::{Deserialize, Serialize};
169use std::collections::HashMap;
170use std::sync::{Arc, LazyLock, RwLock};
171
172/// Enumeration of all supported SQL dialects.
173///
174/// Each variant corresponds to a specific SQL database engine or query language.
175/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
176/// and is used as the default when no dialect is specified.
177///
178/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
179/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
180#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
181#[serde(rename_all = "lowercase")]
182pub enum DialectType {
183 /// Standard SQL with no dialect-specific behavior (default).
184 Generic,
185 /// PostgreSQL -- advanced open-source relational database.
186 PostgreSQL,
187 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
188 MySQL,
189 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
190 BigQuery,
191 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
192 Snowflake,
193 /// DuckDB -- in-process analytical database with modern SQL extensions.
194 DuckDB,
195 /// SQLite -- lightweight embedded relational database.
196 SQLite,
197 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
198 Hive,
199 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
200 Spark,
201 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
202 Trino,
203 /// PrestoDB -- distributed SQL query engine for big data.
204 Presto,
205 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
206 Redshift,
207 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
208 TSQL,
209 /// Oracle Database -- commercial relational database with PL/SQL extensions.
210 Oracle,
211 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
212 ClickHouse,
213 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
214 Databricks,
215 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
216 Athena,
217 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
218 Teradata,
219 /// Apache Doris -- real-time analytical database (MySQL-compatible).
220 Doris,
221 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
222 StarRocks,
223 /// Materialize -- streaming SQL database built on differential dataflow.
224 Materialize,
225 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
226 RisingWave,
227 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
228 SingleStore,
229 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
230 CockroachDB,
231 /// TiDB -- distributed HTAP database with MySQL compatibility.
232 TiDB,
233 /// Apache Druid -- real-time analytics database.
234 Druid,
235 /// Apache Solr -- search platform with SQL interface.
236 Solr,
237 /// Tableau -- data visualization platform with its own SQL dialect.
238 Tableau,
239 /// Dune Analytics -- blockchain analytics SQL engine.
240 Dune,
241 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
242 Fabric,
243 /// Apache Drill -- schema-free SQL query engine for big data.
244 Drill,
245 /// Dremio -- data lakehouse platform with Arrow-based query engine.
246 Dremio,
247 /// Exasol -- in-memory analytic database.
248 Exasol,
249 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
250 DataFusion,
251}
252
253impl Default for DialectType {
254 fn default() -> Self {
255 DialectType::Generic
256 }
257}
258
259impl std::fmt::Display for DialectType {
260 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
261 match self {
262 DialectType::Generic => write!(f, "generic"),
263 DialectType::PostgreSQL => write!(f, "postgresql"),
264 DialectType::MySQL => write!(f, "mysql"),
265 DialectType::BigQuery => write!(f, "bigquery"),
266 DialectType::Snowflake => write!(f, "snowflake"),
267 DialectType::DuckDB => write!(f, "duckdb"),
268 DialectType::SQLite => write!(f, "sqlite"),
269 DialectType::Hive => write!(f, "hive"),
270 DialectType::Spark => write!(f, "spark"),
271 DialectType::Trino => write!(f, "trino"),
272 DialectType::Presto => write!(f, "presto"),
273 DialectType::Redshift => write!(f, "redshift"),
274 DialectType::TSQL => write!(f, "tsql"),
275 DialectType::Oracle => write!(f, "oracle"),
276 DialectType::ClickHouse => write!(f, "clickhouse"),
277 DialectType::Databricks => write!(f, "databricks"),
278 DialectType::Athena => write!(f, "athena"),
279 DialectType::Teradata => write!(f, "teradata"),
280 DialectType::Doris => write!(f, "doris"),
281 DialectType::StarRocks => write!(f, "starrocks"),
282 DialectType::Materialize => write!(f, "materialize"),
283 DialectType::RisingWave => write!(f, "risingwave"),
284 DialectType::SingleStore => write!(f, "singlestore"),
285 DialectType::CockroachDB => write!(f, "cockroachdb"),
286 DialectType::TiDB => write!(f, "tidb"),
287 DialectType::Druid => write!(f, "druid"),
288 DialectType::Solr => write!(f, "solr"),
289 DialectType::Tableau => write!(f, "tableau"),
290 DialectType::Dune => write!(f, "dune"),
291 DialectType::Fabric => write!(f, "fabric"),
292 DialectType::Drill => write!(f, "drill"),
293 DialectType::Dremio => write!(f, "dremio"),
294 DialectType::Exasol => write!(f, "exasol"),
295 DialectType::DataFusion => write!(f, "datafusion"),
296 }
297 }
298}
299
300impl std::str::FromStr for DialectType {
301 type Err = crate::error::Error;
302
303 fn from_str(s: &str) -> Result<Self> {
304 match s.to_ascii_lowercase().as_str() {
305 "generic" | "" => Ok(DialectType::Generic),
306 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
307 "mysql" => Ok(DialectType::MySQL),
308 "bigquery" => Ok(DialectType::BigQuery),
309 "snowflake" => Ok(DialectType::Snowflake),
310 "duckdb" => Ok(DialectType::DuckDB),
311 "sqlite" => Ok(DialectType::SQLite),
312 "hive" => Ok(DialectType::Hive),
313 "spark" | "spark2" => Ok(DialectType::Spark),
314 "trino" => Ok(DialectType::Trino),
315 "presto" => Ok(DialectType::Presto),
316 "redshift" => Ok(DialectType::Redshift),
317 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
318 "oracle" => Ok(DialectType::Oracle),
319 "clickhouse" => Ok(DialectType::ClickHouse),
320 "databricks" => Ok(DialectType::Databricks),
321 "athena" => Ok(DialectType::Athena),
322 "teradata" => Ok(DialectType::Teradata),
323 "doris" => Ok(DialectType::Doris),
324 "starrocks" => Ok(DialectType::StarRocks),
325 "materialize" => Ok(DialectType::Materialize),
326 "risingwave" => Ok(DialectType::RisingWave),
327 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
328 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
329 "tidb" => Ok(DialectType::TiDB),
330 "druid" => Ok(DialectType::Druid),
331 "solr" => Ok(DialectType::Solr),
332 "tableau" => Ok(DialectType::Tableau),
333 "dune" => Ok(DialectType::Dune),
334 "fabric" => Ok(DialectType::Fabric),
335 "drill" => Ok(DialectType::Drill),
336 "dremio" => Ok(DialectType::Dremio),
337 "exasol" => Ok(DialectType::Exasol),
338 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
339 _ => Err(crate::error::Error::parse(
340 format!("Unknown dialect: {}", s),
341 0,
342 0,
343 0,
344 0,
345 )),
346 }
347 }
348}
349
350/// Trait that each concrete SQL dialect must implement.
351///
352/// `DialectImpl` provides the configuration hooks and per-expression transform logic
353/// that distinguish one dialect from another. Implementors supply:
354///
355/// - A [`DialectType`] identifier.
356/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
357/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
358/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
359/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
360/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
361///
362/// The default implementations are no-ops, so a minimal dialect only needs to provide
363/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
364/// standard SQL.
365pub trait DialectImpl {
366 /// Returns the [`DialectType`] that identifies this dialect.
367 fn dialect_type(&self) -> DialectType;
368
369 /// Returns the tokenizer configuration for this dialect.
370 ///
371 /// Override to customize identifier quoting characters, string escape rules,
372 /// comment styles, and other lexing behavior.
373 fn tokenizer_config(&self) -> TokenizerConfig {
374 TokenizerConfig::default()
375 }
376
377 /// Returns the generator configuration for this dialect.
378 ///
379 /// Override to customize identifier quoting style, function name casing,
380 /// keyword casing, and other SQL generation behavior.
381 fn generator_config(&self) -> GeneratorConfig {
382 GeneratorConfig::default()
383 }
384
385 /// Returns a generator configuration tailored to a specific expression.
386 ///
387 /// Override this for hybrid dialects like Athena that route to different SQL engines
388 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
389 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
390 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
391 self.generator_config()
392 }
393
394 /// Transforms a single expression node for this dialect, without recursing into children.
395 ///
396 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
397 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
398 /// typically include function renaming, operator substitution, and type mapping.
399 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
400 Ok(expr)
401 }
402
403 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
404 ///
405 /// Override this to apply structural rewrites that must see the entire tree at once,
406 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
407 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
408 fn preprocess(&self, expr: Expression) -> Result<Expression> {
409 Ok(expr)
410 }
411}
412
413/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
414/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
415///
416/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
417/// and then nested element/field types are recursed into. This ensures that dialect-level
418/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
419fn transform_data_type_recursive<F>(
420 dt: crate::expressions::DataType,
421 transform_fn: &F,
422) -> Result<crate::expressions::DataType>
423where
424 F: Fn(Expression) -> Result<Expression>,
425{
426 use crate::expressions::DataType;
427 // First, transform the outermost type through the expression system
428 let dt_expr = transform_fn(Expression::DataType(dt))?;
429 let dt = match dt_expr {
430 Expression::DataType(d) => d,
431 _ => {
432 return Ok(match dt_expr {
433 _ => DataType::Custom {
434 name: "UNKNOWN".to_string(),
435 },
436 })
437 }
438 };
439 // Then recurse into nested types
440 match dt {
441 DataType::Array {
442 element_type,
443 dimension,
444 } => {
445 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
446 Ok(DataType::Array {
447 element_type: Box::new(inner),
448 dimension,
449 })
450 }
451 DataType::List { element_type } => {
452 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
453 Ok(DataType::List {
454 element_type: Box::new(inner),
455 })
456 }
457 DataType::Struct { fields, nested } => {
458 let mut new_fields = Vec::new();
459 for mut field in fields {
460 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
461 new_fields.push(field);
462 }
463 Ok(DataType::Struct {
464 fields: new_fields,
465 nested,
466 })
467 }
468 DataType::Map {
469 key_type,
470 value_type,
471 } => {
472 let k = transform_data_type_recursive(*key_type, transform_fn)?;
473 let v = transform_data_type_recursive(*value_type, transform_fn)?;
474 Ok(DataType::Map {
475 key_type: Box::new(k),
476 value_type: Box::new(v),
477 })
478 }
479 other => Ok(other),
480 }
481}
482
483/// Convert DuckDB C-style format strings to Presto C-style format strings.
484/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
485#[cfg(feature = "transpile")]
486fn duckdb_to_presto_format(fmt: &str) -> String {
487 // Order matters: handle longer patterns first to avoid partial replacements
488 let mut result = fmt.to_string();
489 // First pass: mark multi-char patterns with placeholders
490 result = result.replace("%-m", "\x01NOPADM\x01");
491 result = result.replace("%-d", "\x01NOPADD\x01");
492 result = result.replace("%-I", "\x01NOPADI\x01");
493 result = result.replace("%-H", "\x01NOPADH\x01");
494 result = result.replace("%H:%M:%S", "\x01HMS\x01");
495 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
496 // Now convert individual specifiers
497 result = result.replace("%M", "%i");
498 result = result.replace("%S", "%s");
499 // Restore multi-char patterns with Presto equivalents
500 result = result.replace("\x01NOPADM\x01", "%c");
501 result = result.replace("\x01NOPADD\x01", "%e");
502 result = result.replace("\x01NOPADI\x01", "%l");
503 result = result.replace("\x01NOPADH\x01", "%k");
504 result = result.replace("\x01HMS\x01", "%T");
505 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
506 result
507}
508
509/// Convert DuckDB C-style format strings to BigQuery format strings.
510/// BigQuery uses a mix of strftime-like directives.
511#[cfg(feature = "transpile")]
512fn duckdb_to_bigquery_format(fmt: &str) -> String {
513 let mut result = fmt.to_string();
514 // Handle longer patterns first
515 result = result.replace("%-d", "%e");
516 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
517 result = result.replace("%Y-%m-%d", "%F");
518 result = result.replace("%H:%M:%S", "%T");
519 result
520}
521
522#[derive(Debug)]
523enum TransformTask {
524 Visit(Expression),
525 Finish(FinishTask),
526}
527
528#[derive(Debug)]
529enum FinishTask {
530 Unary(Expression),
531 Binary(Expression),
532 CastLike(Expression),
533 List(Expression, usize),
534 From(crate::expressions::From, usize),
535 Select(SelectFrame),
536 SetOp(Expression),
537}
538
539#[derive(Debug)]
540struct SelectFrame {
541 select: Box<crate::expressions::Select>,
542 expr_count: usize,
543 from_present: bool,
544 where_present: bool,
545 group_by_count: usize,
546 having_present: bool,
547 qualify_present: bool,
548}
549
550fn transform_pop_result(results: &mut Vec<Expression>) -> Result<Expression> {
551 results
552 .pop()
553 .ok_or_else(|| crate::error::Error::Internal("transform stack underflow".to_string()))
554}
555
556fn transform_pop_results(results: &mut Vec<Expression>, count: usize) -> Result<Vec<Expression>> {
557 if results.len() < count {
558 return Err(crate::error::Error::Internal(
559 "transform result stack underflow".to_string(),
560 ));
561 }
562 Ok(results.split_off(results.len() - count))
563}
564
565/// Applies a transform function bottom-up through an entire expression tree.
566///
567/// The public entrypoint uses an explicit task stack for the recursion-heavy shapes
568/// that dominate deeply nested SQL (nested SELECT/FROM/SUBQUERY chains, set-operation
569/// trees, and common binary/unary expression chains). Less common shapes currently
570/// reuse the reference recursive implementation so semantics stay identical while
571/// the hot path avoids stack growth.
572pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
573where
574 F: Fn(Expression) -> Result<Expression>,
575{
576 #[cfg(feature = "stacker")]
577 {
578 let red_zone = if cfg!(debug_assertions) {
579 4 * 1024 * 1024
580 } else {
581 1024 * 1024
582 };
583 stacker::maybe_grow(red_zone, 8 * 1024 * 1024, move || {
584 transform_recursive_inner(expr, transform_fn)
585 })
586 }
587 #[cfg(not(feature = "stacker"))]
588 {
589 transform_recursive_inner(expr, transform_fn)
590 }
591}
592
593fn transform_recursive_inner<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
594where
595 F: Fn(Expression) -> Result<Expression>,
596{
597 let mut tasks = vec![TransformTask::Visit(expr)];
598 let mut results = Vec::new();
599
600 while let Some(task) = tasks.pop() {
601 match task {
602 TransformTask::Visit(expr) => {
603 if matches!(
604 &expr,
605 Expression::Literal(_)
606 | Expression::Boolean(_)
607 | Expression::Null(_)
608 | Expression::Identifier(_)
609 | Expression::Star(_)
610 | Expression::Parameter(_)
611 | Expression::Placeholder(_)
612 | Expression::SessionParameter(_)
613 ) {
614 results.push(transform_fn(expr)?);
615 continue;
616 }
617
618 match expr {
619 Expression::Alias(mut alias) => {
620 let child = std::mem::replace(&mut alias.this, Expression::Null(Null));
621 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Alias(
622 alias,
623 ))));
624 tasks.push(TransformTask::Visit(child));
625 }
626 Expression::Paren(mut paren) => {
627 let child = std::mem::replace(&mut paren.this, Expression::Null(Null));
628 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Paren(
629 paren,
630 ))));
631 tasks.push(TransformTask::Visit(child));
632 }
633 Expression::Not(mut not) => {
634 let child = std::mem::replace(&mut not.this, Expression::Null(Null));
635 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Not(
636 not,
637 ))));
638 tasks.push(TransformTask::Visit(child));
639 }
640 Expression::Neg(mut neg) => {
641 let child = std::mem::replace(&mut neg.this, Expression::Null(Null));
642 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Neg(
643 neg,
644 ))));
645 tasks.push(TransformTask::Visit(child));
646 }
647 Expression::IsNull(mut expr) => {
648 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
649 tasks.push(TransformTask::Finish(FinishTask::Unary(
650 Expression::IsNull(expr),
651 )));
652 tasks.push(TransformTask::Visit(child));
653 }
654 Expression::IsTrue(mut expr) => {
655 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
656 tasks.push(TransformTask::Finish(FinishTask::Unary(
657 Expression::IsTrue(expr),
658 )));
659 tasks.push(TransformTask::Visit(child));
660 }
661 Expression::IsFalse(mut expr) => {
662 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
663 tasks.push(TransformTask::Finish(FinishTask::Unary(
664 Expression::IsFalse(expr),
665 )));
666 tasks.push(TransformTask::Visit(child));
667 }
668 Expression::Subquery(mut subquery) => {
669 let child = std::mem::replace(&mut subquery.this, Expression::Null(Null));
670 tasks.push(TransformTask::Finish(FinishTask::Unary(
671 Expression::Subquery(subquery),
672 )));
673 tasks.push(TransformTask::Visit(child));
674 }
675 Expression::Exists(mut exists) => {
676 let child = std::mem::replace(&mut exists.this, Expression::Null(Null));
677 tasks.push(TransformTask::Finish(FinishTask::Unary(
678 Expression::Exists(exists),
679 )));
680 tasks.push(TransformTask::Visit(child));
681 }
682 Expression::TableArgument(mut arg) => {
683 let child = std::mem::replace(&mut arg.this, Expression::Null(Null));
684 tasks.push(TransformTask::Finish(FinishTask::Unary(
685 Expression::TableArgument(arg),
686 )));
687 tasks.push(TransformTask::Visit(child));
688 }
689 Expression::And(mut op) => {
690 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
691 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
692 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::And(
693 op,
694 ))));
695 tasks.push(TransformTask::Visit(right));
696 tasks.push(TransformTask::Visit(left));
697 }
698 Expression::Or(mut op) => {
699 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
700 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
701 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Or(
702 op,
703 ))));
704 tasks.push(TransformTask::Visit(right));
705 tasks.push(TransformTask::Visit(left));
706 }
707 Expression::Add(mut op) => {
708 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
709 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
710 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Add(
711 op,
712 ))));
713 tasks.push(TransformTask::Visit(right));
714 tasks.push(TransformTask::Visit(left));
715 }
716 Expression::Sub(mut op) => {
717 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
718 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
719 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Sub(
720 op,
721 ))));
722 tasks.push(TransformTask::Visit(right));
723 tasks.push(TransformTask::Visit(left));
724 }
725 Expression::Mul(mut op) => {
726 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
727 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
728 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Mul(
729 op,
730 ))));
731 tasks.push(TransformTask::Visit(right));
732 tasks.push(TransformTask::Visit(left));
733 }
734 Expression::Div(mut op) => {
735 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
736 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
737 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Div(
738 op,
739 ))));
740 tasks.push(TransformTask::Visit(right));
741 tasks.push(TransformTask::Visit(left));
742 }
743 Expression::Eq(mut op) => {
744 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
745 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
746 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Eq(
747 op,
748 ))));
749 tasks.push(TransformTask::Visit(right));
750 tasks.push(TransformTask::Visit(left));
751 }
752 Expression::Lt(mut op) => {
753 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
754 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
755 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Lt(
756 op,
757 ))));
758 tasks.push(TransformTask::Visit(right));
759 tasks.push(TransformTask::Visit(left));
760 }
761 Expression::Gt(mut op) => {
762 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
763 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
764 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Gt(
765 op,
766 ))));
767 tasks.push(TransformTask::Visit(right));
768 tasks.push(TransformTask::Visit(left));
769 }
770 Expression::Neq(mut op) => {
771 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
772 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
773 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Neq(
774 op,
775 ))));
776 tasks.push(TransformTask::Visit(right));
777 tasks.push(TransformTask::Visit(left));
778 }
779 Expression::Lte(mut op) => {
780 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
781 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
782 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Lte(
783 op,
784 ))));
785 tasks.push(TransformTask::Visit(right));
786 tasks.push(TransformTask::Visit(left));
787 }
788 Expression::Gte(mut op) => {
789 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
790 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
791 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Gte(
792 op,
793 ))));
794 tasks.push(TransformTask::Visit(right));
795 tasks.push(TransformTask::Visit(left));
796 }
797 Expression::Mod(mut op) => {
798 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
799 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
800 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Mod(
801 op,
802 ))));
803 tasks.push(TransformTask::Visit(right));
804 tasks.push(TransformTask::Visit(left));
805 }
806 Expression::Concat(mut op) => {
807 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
808 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
809 tasks.push(TransformTask::Finish(FinishTask::Binary(
810 Expression::Concat(op),
811 )));
812 tasks.push(TransformTask::Visit(right));
813 tasks.push(TransformTask::Visit(left));
814 }
815 Expression::BitwiseAnd(mut op) => {
816 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
817 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
818 tasks.push(TransformTask::Finish(FinishTask::Binary(
819 Expression::BitwiseAnd(op),
820 )));
821 tasks.push(TransformTask::Visit(right));
822 tasks.push(TransformTask::Visit(left));
823 }
824 Expression::BitwiseOr(mut op) => {
825 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
826 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
827 tasks.push(TransformTask::Finish(FinishTask::Binary(
828 Expression::BitwiseOr(op),
829 )));
830 tasks.push(TransformTask::Visit(right));
831 tasks.push(TransformTask::Visit(left));
832 }
833 Expression::BitwiseXor(mut op) => {
834 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
835 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
836 tasks.push(TransformTask::Finish(FinishTask::Binary(
837 Expression::BitwiseXor(op),
838 )));
839 tasks.push(TransformTask::Visit(right));
840 tasks.push(TransformTask::Visit(left));
841 }
842 Expression::Is(mut op) => {
843 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
844 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
845 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Is(
846 op,
847 ))));
848 tasks.push(TransformTask::Visit(right));
849 tasks.push(TransformTask::Visit(left));
850 }
851 Expression::MemberOf(mut op) => {
852 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
853 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
854 tasks.push(TransformTask::Finish(FinishTask::Binary(
855 Expression::MemberOf(op),
856 )));
857 tasks.push(TransformTask::Visit(right));
858 tasks.push(TransformTask::Visit(left));
859 }
860 Expression::ArrayContainsAll(mut op) => {
861 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
862 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
863 tasks.push(TransformTask::Finish(FinishTask::Binary(
864 Expression::ArrayContainsAll(op),
865 )));
866 tasks.push(TransformTask::Visit(right));
867 tasks.push(TransformTask::Visit(left));
868 }
869 Expression::ArrayContainedBy(mut op) => {
870 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
871 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
872 tasks.push(TransformTask::Finish(FinishTask::Binary(
873 Expression::ArrayContainedBy(op),
874 )));
875 tasks.push(TransformTask::Visit(right));
876 tasks.push(TransformTask::Visit(left));
877 }
878 Expression::ArrayOverlaps(mut op) => {
879 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
880 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
881 tasks.push(TransformTask::Finish(FinishTask::Binary(
882 Expression::ArrayOverlaps(op),
883 )));
884 tasks.push(TransformTask::Visit(right));
885 tasks.push(TransformTask::Visit(left));
886 }
887 Expression::TsMatch(mut op) => {
888 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
889 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
890 tasks.push(TransformTask::Finish(FinishTask::Binary(
891 Expression::TsMatch(op),
892 )));
893 tasks.push(TransformTask::Visit(right));
894 tasks.push(TransformTask::Visit(left));
895 }
896 Expression::Adjacent(mut op) => {
897 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
898 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
899 tasks.push(TransformTask::Finish(FinishTask::Binary(
900 Expression::Adjacent(op),
901 )));
902 tasks.push(TransformTask::Visit(right));
903 tasks.push(TransformTask::Visit(left));
904 }
905 Expression::Like(mut like) => {
906 let right = std::mem::replace(&mut like.right, Expression::Null(Null));
907 let left = std::mem::replace(&mut like.left, Expression::Null(Null));
908 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Like(
909 like,
910 ))));
911 tasks.push(TransformTask::Visit(right));
912 tasks.push(TransformTask::Visit(left));
913 }
914 Expression::ILike(mut like) => {
915 let right = std::mem::replace(&mut like.right, Expression::Null(Null));
916 let left = std::mem::replace(&mut like.left, Expression::Null(Null));
917 tasks.push(TransformTask::Finish(FinishTask::Binary(
918 Expression::ILike(like),
919 )));
920 tasks.push(TransformTask::Visit(right));
921 tasks.push(TransformTask::Visit(left));
922 }
923 Expression::Cast(mut cast) => {
924 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
925 tasks.push(TransformTask::Finish(FinishTask::CastLike(
926 Expression::Cast(cast),
927 )));
928 tasks.push(TransformTask::Visit(child));
929 }
930 Expression::TryCast(mut cast) => {
931 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
932 tasks.push(TransformTask::Finish(FinishTask::CastLike(
933 Expression::TryCast(cast),
934 )));
935 tasks.push(TransformTask::Visit(child));
936 }
937 Expression::SafeCast(mut cast) => {
938 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
939 tasks.push(TransformTask::Finish(FinishTask::CastLike(
940 Expression::SafeCast(cast),
941 )));
942 tasks.push(TransformTask::Visit(child));
943 }
944 Expression::Function(mut function) => {
945 let args = std::mem::take(&mut function.args);
946 let count = args.len();
947 tasks.push(TransformTask::Finish(FinishTask::List(
948 Expression::Function(function),
949 count,
950 )));
951 for child in args.into_iter().rev() {
952 tasks.push(TransformTask::Visit(child));
953 }
954 }
955 Expression::Array(mut array) => {
956 let expressions = std::mem::take(&mut array.expressions);
957 let count = expressions.len();
958 tasks.push(TransformTask::Finish(FinishTask::List(
959 Expression::Array(array),
960 count,
961 )));
962 for child in expressions.into_iter().rev() {
963 tasks.push(TransformTask::Visit(child));
964 }
965 }
966 Expression::Tuple(mut tuple) => {
967 let expressions = std::mem::take(&mut tuple.expressions);
968 let count = expressions.len();
969 tasks.push(TransformTask::Finish(FinishTask::List(
970 Expression::Tuple(tuple),
971 count,
972 )));
973 for child in expressions.into_iter().rev() {
974 tasks.push(TransformTask::Visit(child));
975 }
976 }
977 Expression::ArrayFunc(mut array) => {
978 let expressions = std::mem::take(&mut array.expressions);
979 let count = expressions.len();
980 tasks.push(TransformTask::Finish(FinishTask::List(
981 Expression::ArrayFunc(array),
982 count,
983 )));
984 for child in expressions.into_iter().rev() {
985 tasks.push(TransformTask::Visit(child));
986 }
987 }
988 Expression::Coalesce(mut func) => {
989 let expressions = std::mem::take(&mut func.expressions);
990 let count = expressions.len();
991 tasks.push(TransformTask::Finish(FinishTask::List(
992 Expression::Coalesce(func),
993 count,
994 )));
995 for child in expressions.into_iter().rev() {
996 tasks.push(TransformTask::Visit(child));
997 }
998 }
999 Expression::Greatest(mut func) => {
1000 let expressions = std::mem::take(&mut func.expressions);
1001 let count = expressions.len();
1002 tasks.push(TransformTask::Finish(FinishTask::List(
1003 Expression::Greatest(func),
1004 count,
1005 )));
1006 for child in expressions.into_iter().rev() {
1007 tasks.push(TransformTask::Visit(child));
1008 }
1009 }
1010 Expression::Least(mut func) => {
1011 let expressions = std::mem::take(&mut func.expressions);
1012 let count = expressions.len();
1013 tasks.push(TransformTask::Finish(FinishTask::List(
1014 Expression::Least(func),
1015 count,
1016 )));
1017 for child in expressions.into_iter().rev() {
1018 tasks.push(TransformTask::Visit(child));
1019 }
1020 }
1021 Expression::ArrayConcat(mut func) => {
1022 let expressions = std::mem::take(&mut func.expressions);
1023 let count = expressions.len();
1024 tasks.push(TransformTask::Finish(FinishTask::List(
1025 Expression::ArrayConcat(func),
1026 count,
1027 )));
1028 for child in expressions.into_iter().rev() {
1029 tasks.push(TransformTask::Visit(child));
1030 }
1031 }
1032 Expression::ArrayIntersect(mut func) => {
1033 let expressions = std::mem::take(&mut func.expressions);
1034 let count = expressions.len();
1035 tasks.push(TransformTask::Finish(FinishTask::List(
1036 Expression::ArrayIntersect(func),
1037 count,
1038 )));
1039 for child in expressions.into_iter().rev() {
1040 tasks.push(TransformTask::Visit(child));
1041 }
1042 }
1043 Expression::ArrayZip(mut func) => {
1044 let expressions = std::mem::take(&mut func.expressions);
1045 let count = expressions.len();
1046 tasks.push(TransformTask::Finish(FinishTask::List(
1047 Expression::ArrayZip(func),
1048 count,
1049 )));
1050 for child in expressions.into_iter().rev() {
1051 tasks.push(TransformTask::Visit(child));
1052 }
1053 }
1054 Expression::MapConcat(mut func) => {
1055 let expressions = std::mem::take(&mut func.expressions);
1056 let count = expressions.len();
1057 tasks.push(TransformTask::Finish(FinishTask::List(
1058 Expression::MapConcat(func),
1059 count,
1060 )));
1061 for child in expressions.into_iter().rev() {
1062 tasks.push(TransformTask::Visit(child));
1063 }
1064 }
1065 Expression::JsonArray(mut func) => {
1066 let expressions = std::mem::take(&mut func.expressions);
1067 let count = expressions.len();
1068 tasks.push(TransformTask::Finish(FinishTask::List(
1069 Expression::JsonArray(func),
1070 count,
1071 )));
1072 for child in expressions.into_iter().rev() {
1073 tasks.push(TransformTask::Visit(child));
1074 }
1075 }
1076 Expression::From(mut from) => {
1077 let expressions = std::mem::take(&mut from.expressions);
1078 let count = expressions.len();
1079 tasks.push(TransformTask::Finish(FinishTask::From(*from, count)));
1080 for child in expressions.into_iter().rev() {
1081 tasks.push(TransformTask::Visit(child));
1082 }
1083 }
1084 Expression::Select(mut select) => {
1085 let expressions = std::mem::take(&mut select.expressions);
1086 let expr_count = expressions.len();
1087
1088 let from_info = select.from.take().map(|mut from| {
1089 let children = std::mem::take(&mut from.expressions);
1090 (from, children)
1091 });
1092 let from_present = from_info.is_some();
1093
1094 let where_child = select.where_clause.as_mut().map(|where_clause| {
1095 std::mem::replace(&mut where_clause.this, Expression::Null(Null))
1096 });
1097 let where_present = where_child.is_some();
1098
1099 let group_expressions = select
1100 .group_by
1101 .as_mut()
1102 .map(|group_by| std::mem::take(&mut group_by.expressions))
1103 .unwrap_or_default();
1104 let group_by_count = group_expressions.len();
1105
1106 let having_child = select.having.as_mut().map(|having| {
1107 std::mem::replace(&mut having.this, Expression::Null(Null))
1108 });
1109 let having_present = having_child.is_some();
1110
1111 let qualify_child = select.qualify.as_mut().map(|qualify| {
1112 std::mem::replace(&mut qualify.this, Expression::Null(Null))
1113 });
1114 let qualify_present = qualify_child.is_some();
1115
1116 tasks.push(TransformTask::Finish(FinishTask::Select(SelectFrame {
1117 select,
1118 expr_count,
1119 from_present,
1120 where_present,
1121 group_by_count,
1122 having_present,
1123 qualify_present,
1124 })));
1125
1126 if let Some(child) = qualify_child {
1127 tasks.push(TransformTask::Visit(child));
1128 }
1129 if let Some(child) = having_child {
1130 tasks.push(TransformTask::Visit(child));
1131 }
1132 for child in group_expressions.into_iter().rev() {
1133 tasks.push(TransformTask::Visit(child));
1134 }
1135 if let Some(child) = where_child {
1136 tasks.push(TransformTask::Visit(child));
1137 }
1138 if let Some((from, children)) = from_info {
1139 tasks.push(TransformTask::Finish(FinishTask::From(
1140 from,
1141 children.len(),
1142 )));
1143 for child in children.into_iter().rev() {
1144 tasks.push(TransformTask::Visit(child));
1145 }
1146 }
1147 for child in expressions.into_iter().rev() {
1148 tasks.push(TransformTask::Visit(child));
1149 }
1150 }
1151 Expression::Union(mut union) => {
1152 let right = std::mem::replace(&mut union.right, Expression::Null(Null));
1153 let left = std::mem::replace(&mut union.left, Expression::Null(Null));
1154 tasks.push(TransformTask::Finish(FinishTask::SetOp(Expression::Union(
1155 union,
1156 ))));
1157 tasks.push(TransformTask::Visit(right));
1158 tasks.push(TransformTask::Visit(left));
1159 }
1160 Expression::Intersect(mut intersect) => {
1161 let right = std::mem::replace(&mut intersect.right, Expression::Null(Null));
1162 let left = std::mem::replace(&mut intersect.left, Expression::Null(Null));
1163 tasks.push(TransformTask::Finish(FinishTask::SetOp(
1164 Expression::Intersect(intersect),
1165 )));
1166 tasks.push(TransformTask::Visit(right));
1167 tasks.push(TransformTask::Visit(left));
1168 }
1169 Expression::Except(mut except) => {
1170 let right = std::mem::replace(&mut except.right, Expression::Null(Null));
1171 let left = std::mem::replace(&mut except.left, Expression::Null(Null));
1172 tasks.push(TransformTask::Finish(FinishTask::SetOp(
1173 Expression::Except(except),
1174 )));
1175 tasks.push(TransformTask::Visit(right));
1176 tasks.push(TransformTask::Visit(left));
1177 }
1178 other => {
1179 results.push(transform_recursive_reference(other, transform_fn)?);
1180 }
1181 }
1182 }
1183 TransformTask::Finish(finish) => match finish {
1184 FinishTask::Unary(expr) => {
1185 let child = transform_pop_result(&mut results)?;
1186 let rebuilt = match expr {
1187 Expression::Alias(mut alias) => {
1188 alias.this = child;
1189 Expression::Alias(alias)
1190 }
1191 Expression::Paren(mut paren) => {
1192 paren.this = child;
1193 Expression::Paren(paren)
1194 }
1195 Expression::Not(mut not) => {
1196 not.this = child;
1197 Expression::Not(not)
1198 }
1199 Expression::Neg(mut neg) => {
1200 neg.this = child;
1201 Expression::Neg(neg)
1202 }
1203 Expression::IsNull(mut expr) => {
1204 expr.this = child;
1205 Expression::IsNull(expr)
1206 }
1207 Expression::IsTrue(mut expr) => {
1208 expr.this = child;
1209 Expression::IsTrue(expr)
1210 }
1211 Expression::IsFalse(mut expr) => {
1212 expr.this = child;
1213 Expression::IsFalse(expr)
1214 }
1215 Expression::Subquery(mut subquery) => {
1216 subquery.this = child;
1217 Expression::Subquery(subquery)
1218 }
1219 Expression::Exists(mut exists) => {
1220 exists.this = child;
1221 Expression::Exists(exists)
1222 }
1223 Expression::TableArgument(mut arg) => {
1224 arg.this = child;
1225 Expression::TableArgument(arg)
1226 }
1227 _ => {
1228 return Err(crate::error::Error::Internal(
1229 "unexpected unary transform task".to_string(),
1230 ));
1231 }
1232 };
1233 results.push(transform_fn(rebuilt)?);
1234 }
1235 FinishTask::Binary(expr) => {
1236 let mut children = transform_pop_results(&mut results, 2)?.into_iter();
1237 let left = children.next().expect("left child");
1238 let right = children.next().expect("right child");
1239 let rebuilt = match expr {
1240 Expression::And(mut op) => {
1241 op.left = left;
1242 op.right = right;
1243 Expression::And(op)
1244 }
1245 Expression::Or(mut op) => {
1246 op.left = left;
1247 op.right = right;
1248 Expression::Or(op)
1249 }
1250 Expression::Add(mut op) => {
1251 op.left = left;
1252 op.right = right;
1253 Expression::Add(op)
1254 }
1255 Expression::Sub(mut op) => {
1256 op.left = left;
1257 op.right = right;
1258 Expression::Sub(op)
1259 }
1260 Expression::Mul(mut op) => {
1261 op.left = left;
1262 op.right = right;
1263 Expression::Mul(op)
1264 }
1265 Expression::Div(mut op) => {
1266 op.left = left;
1267 op.right = right;
1268 Expression::Div(op)
1269 }
1270 Expression::Eq(mut op) => {
1271 op.left = left;
1272 op.right = right;
1273 Expression::Eq(op)
1274 }
1275 Expression::Lt(mut op) => {
1276 op.left = left;
1277 op.right = right;
1278 Expression::Lt(op)
1279 }
1280 Expression::Gt(mut op) => {
1281 op.left = left;
1282 op.right = right;
1283 Expression::Gt(op)
1284 }
1285 Expression::Neq(mut op) => {
1286 op.left = left;
1287 op.right = right;
1288 Expression::Neq(op)
1289 }
1290 Expression::Lte(mut op) => {
1291 op.left = left;
1292 op.right = right;
1293 Expression::Lte(op)
1294 }
1295 Expression::Gte(mut op) => {
1296 op.left = left;
1297 op.right = right;
1298 Expression::Gte(op)
1299 }
1300 Expression::Mod(mut op) => {
1301 op.left = left;
1302 op.right = right;
1303 Expression::Mod(op)
1304 }
1305 Expression::Concat(mut op) => {
1306 op.left = left;
1307 op.right = right;
1308 Expression::Concat(op)
1309 }
1310 Expression::BitwiseAnd(mut op) => {
1311 op.left = left;
1312 op.right = right;
1313 Expression::BitwiseAnd(op)
1314 }
1315 Expression::BitwiseOr(mut op) => {
1316 op.left = left;
1317 op.right = right;
1318 Expression::BitwiseOr(op)
1319 }
1320 Expression::BitwiseXor(mut op) => {
1321 op.left = left;
1322 op.right = right;
1323 Expression::BitwiseXor(op)
1324 }
1325 Expression::Is(mut op) => {
1326 op.left = left;
1327 op.right = right;
1328 Expression::Is(op)
1329 }
1330 Expression::MemberOf(mut op) => {
1331 op.left = left;
1332 op.right = right;
1333 Expression::MemberOf(op)
1334 }
1335 Expression::ArrayContainsAll(mut op) => {
1336 op.left = left;
1337 op.right = right;
1338 Expression::ArrayContainsAll(op)
1339 }
1340 Expression::ArrayContainedBy(mut op) => {
1341 op.left = left;
1342 op.right = right;
1343 Expression::ArrayContainedBy(op)
1344 }
1345 Expression::ArrayOverlaps(mut op) => {
1346 op.left = left;
1347 op.right = right;
1348 Expression::ArrayOverlaps(op)
1349 }
1350 Expression::TsMatch(mut op) => {
1351 op.left = left;
1352 op.right = right;
1353 Expression::TsMatch(op)
1354 }
1355 Expression::Adjacent(mut op) => {
1356 op.left = left;
1357 op.right = right;
1358 Expression::Adjacent(op)
1359 }
1360 Expression::Like(mut like) => {
1361 like.left = left;
1362 like.right = right;
1363 Expression::Like(like)
1364 }
1365 Expression::ILike(mut like) => {
1366 like.left = left;
1367 like.right = right;
1368 Expression::ILike(like)
1369 }
1370 _ => {
1371 return Err(crate::error::Error::Internal(
1372 "unexpected binary transform task".to_string(),
1373 ));
1374 }
1375 };
1376 results.push(transform_fn(rebuilt)?);
1377 }
1378 FinishTask::CastLike(expr) => {
1379 let child = transform_pop_result(&mut results)?;
1380 let rebuilt = match expr {
1381 Expression::Cast(mut cast) => {
1382 cast.this = child;
1383 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1384 Expression::Cast(cast)
1385 }
1386 Expression::TryCast(mut cast) => {
1387 cast.this = child;
1388 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1389 Expression::TryCast(cast)
1390 }
1391 Expression::SafeCast(mut cast) => {
1392 cast.this = child;
1393 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1394 Expression::SafeCast(cast)
1395 }
1396 _ => {
1397 return Err(crate::error::Error::Internal(
1398 "unexpected cast transform task".to_string(),
1399 ));
1400 }
1401 };
1402 results.push(transform_fn(rebuilt)?);
1403 }
1404 FinishTask::List(expr, count) => {
1405 let children = transform_pop_results(&mut results, count)?;
1406 let rebuilt = match expr {
1407 Expression::Function(mut function) => {
1408 function.args = children;
1409 Expression::Function(function)
1410 }
1411 Expression::Array(mut array) => {
1412 array.expressions = children;
1413 Expression::Array(array)
1414 }
1415 Expression::Tuple(mut tuple) => {
1416 tuple.expressions = children;
1417 Expression::Tuple(tuple)
1418 }
1419 Expression::ArrayFunc(mut array) => {
1420 array.expressions = children;
1421 Expression::ArrayFunc(array)
1422 }
1423 Expression::Coalesce(mut func) => {
1424 func.expressions = children;
1425 Expression::Coalesce(func)
1426 }
1427 Expression::Greatest(mut func) => {
1428 func.expressions = children;
1429 Expression::Greatest(func)
1430 }
1431 Expression::Least(mut func) => {
1432 func.expressions = children;
1433 Expression::Least(func)
1434 }
1435 Expression::ArrayConcat(mut func) => {
1436 func.expressions = children;
1437 Expression::ArrayConcat(func)
1438 }
1439 Expression::ArrayIntersect(mut func) => {
1440 func.expressions = children;
1441 Expression::ArrayIntersect(func)
1442 }
1443 Expression::ArrayZip(mut func) => {
1444 func.expressions = children;
1445 Expression::ArrayZip(func)
1446 }
1447 Expression::MapConcat(mut func) => {
1448 func.expressions = children;
1449 Expression::MapConcat(func)
1450 }
1451 Expression::JsonArray(mut func) => {
1452 func.expressions = children;
1453 Expression::JsonArray(func)
1454 }
1455 _ => {
1456 return Err(crate::error::Error::Internal(
1457 "unexpected list transform task".to_string(),
1458 ));
1459 }
1460 };
1461 results.push(transform_fn(rebuilt)?);
1462 }
1463 FinishTask::From(mut from, count) => {
1464 from.expressions = transform_pop_results(&mut results, count)?;
1465 results.push(transform_fn(Expression::From(Box::new(from)))?);
1466 }
1467 FinishTask::Select(frame) => {
1468 let mut select = *frame.select;
1469
1470 if frame.qualify_present {
1471 if let Some(ref mut qualify) = select.qualify {
1472 qualify.this = transform_pop_result(&mut results)?;
1473 }
1474 }
1475 if frame.having_present {
1476 if let Some(ref mut having) = select.having {
1477 having.this = transform_pop_result(&mut results)?;
1478 }
1479 }
1480 if frame.group_by_count > 0 {
1481 if let Some(ref mut group_by) = select.group_by {
1482 group_by.expressions =
1483 transform_pop_results(&mut results, frame.group_by_count)?;
1484 }
1485 }
1486 if frame.where_present {
1487 if let Some(ref mut where_clause) = select.where_clause {
1488 where_clause.this = transform_pop_result(&mut results)?;
1489 }
1490 }
1491 if frame.from_present {
1492 match transform_pop_result(&mut results)? {
1493 Expression::From(from) => {
1494 select.from = Some(*from);
1495 }
1496 _ => {
1497 return Err(crate::error::Error::Internal(
1498 "expected FROM expression result".to_string(),
1499 ));
1500 }
1501 }
1502 }
1503 select.expressions = transform_pop_results(&mut results, frame.expr_count)?;
1504
1505 select.joins = select
1506 .joins
1507 .into_iter()
1508 .map(|mut join| {
1509 join.this = transform_recursive(join.this, transform_fn)?;
1510 if let Some(on) = join.on.take() {
1511 join.on = Some(transform_recursive(on, transform_fn)?);
1512 }
1513 match transform_fn(Expression::Join(Box::new(join)))? {
1514 Expression::Join(j) => Ok(*j),
1515 _ => Err(crate::error::Error::parse(
1516 "Join transformation returned non-join expression",
1517 0,
1518 0,
1519 0,
1520 0,
1521 )),
1522 }
1523 })
1524 .collect::<Result<Vec<_>>>()?;
1525
1526 select.lateral_views = select
1527 .lateral_views
1528 .into_iter()
1529 .map(|mut lv| {
1530 lv.this = transform_recursive(lv.this, transform_fn)?;
1531 Ok(lv)
1532 })
1533 .collect::<Result<Vec<_>>>()?;
1534
1535 if let Some(mut with) = select.with.take() {
1536 with.ctes = with
1537 .ctes
1538 .into_iter()
1539 .map(|mut cte| {
1540 let original = cte.this.clone();
1541 cte.this =
1542 transform_recursive(cte.this, transform_fn).unwrap_or(original);
1543 cte
1544 })
1545 .collect();
1546 select.with = Some(with);
1547 }
1548
1549 if let Some(mut order) = select.order_by.take() {
1550 order.expressions = order
1551 .expressions
1552 .into_iter()
1553 .map(|o| {
1554 let mut o = o;
1555 let original = o.this.clone();
1556 o.this =
1557 transform_recursive(o.this, transform_fn).unwrap_or(original);
1558 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1559 Ok(Expression::Ordered(transformed)) => *transformed,
1560 Ok(_) | Err(_) => o,
1561 }
1562 })
1563 .collect();
1564 select.order_by = Some(order);
1565 }
1566
1567 if let Some(ref mut windows) = select.windows {
1568 for nw in windows.iter_mut() {
1569 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
1570 .into_iter()
1571 .map(|o| {
1572 let mut o = o;
1573 let original = o.this.clone();
1574 o.this = transform_recursive(o.this, transform_fn)
1575 .unwrap_or(original);
1576 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1577 Ok(Expression::Ordered(transformed)) => *transformed,
1578 Ok(_) | Err(_) => o,
1579 }
1580 })
1581 .collect();
1582 }
1583 }
1584
1585 results.push(transform_fn(Expression::Select(Box::new(select)))?);
1586 }
1587 FinishTask::SetOp(expr) => {
1588 let mut children = transform_pop_results(&mut results, 2)?.into_iter();
1589 let left = children.next().expect("left child");
1590 let right = children.next().expect("right child");
1591
1592 let rebuilt = match expr {
1593 Expression::Union(mut union) => {
1594 union.left = left;
1595 union.right = right;
1596 if let Some(mut with) = union.with.take() {
1597 with.ctes = with
1598 .ctes
1599 .into_iter()
1600 .map(|mut cte| {
1601 let original = cte.this.clone();
1602 cte.this = transform_recursive(cte.this, transform_fn)
1603 .unwrap_or(original);
1604 cte
1605 })
1606 .collect();
1607 union.with = Some(with);
1608 }
1609 Expression::Union(union)
1610 }
1611 Expression::Intersect(mut intersect) => {
1612 intersect.left = left;
1613 intersect.right = right;
1614 if let Some(mut with) = intersect.with.take() {
1615 with.ctes = with
1616 .ctes
1617 .into_iter()
1618 .map(|mut cte| {
1619 let original = cte.this.clone();
1620 cte.this = transform_recursive(cte.this, transform_fn)
1621 .unwrap_or(original);
1622 cte
1623 })
1624 .collect();
1625 intersect.with = Some(with);
1626 }
1627 Expression::Intersect(intersect)
1628 }
1629 Expression::Except(mut except) => {
1630 except.left = left;
1631 except.right = right;
1632 if let Some(mut with) = except.with.take() {
1633 with.ctes = with
1634 .ctes
1635 .into_iter()
1636 .map(|mut cte| {
1637 let original = cte.this.clone();
1638 cte.this = transform_recursive(cte.this, transform_fn)
1639 .unwrap_or(original);
1640 cte
1641 })
1642 .collect();
1643 except.with = Some(with);
1644 }
1645 Expression::Except(except)
1646 }
1647 _ => {
1648 return Err(crate::error::Error::Internal(
1649 "unexpected set-op transform task".to_string(),
1650 ));
1651 }
1652 };
1653 results.push(transform_fn(rebuilt)?);
1654 }
1655 },
1656 }
1657 }
1658
1659 match results.len() {
1660 1 => Ok(results.pop().expect("single transform result")),
1661 _ => Err(crate::error::Error::Internal(
1662 "unexpected transform result stack size".to_string(),
1663 )),
1664 }
1665}
1666
1667fn transform_table_ref_recursive<F>(table: TableRef, transform_fn: &F) -> Result<TableRef>
1668where
1669 F: Fn(Expression) -> Result<Expression>,
1670{
1671 match transform_recursive(Expression::Table(Box::new(table)), transform_fn)? {
1672 Expression::Table(table) => Ok(*table),
1673 _ => Err(crate::error::Error::parse(
1674 "TableRef transformation returned non-table expression",
1675 0,
1676 0,
1677 0,
1678 0,
1679 )),
1680 }
1681}
1682
1683fn transform_from_recursive<F>(from: From, transform_fn: &F) -> Result<From>
1684where
1685 F: Fn(Expression) -> Result<Expression>,
1686{
1687 match transform_recursive(Expression::From(Box::new(from)), transform_fn)? {
1688 Expression::From(from) => Ok(*from),
1689 _ => Err(crate::error::Error::parse(
1690 "FROM transformation returned non-FROM expression",
1691 0,
1692 0,
1693 0,
1694 0,
1695 )),
1696 }
1697}
1698
1699fn transform_join_recursive<F>(mut join: Join, transform_fn: &F) -> Result<Join>
1700where
1701 F: Fn(Expression) -> Result<Expression>,
1702{
1703 join.this = transform_recursive(join.this, transform_fn)?;
1704 if let Some(on) = join.on.take() {
1705 join.on = Some(transform_recursive(on, transform_fn)?);
1706 }
1707 if let Some(match_condition) = join.match_condition.take() {
1708 join.match_condition = Some(transform_recursive(match_condition, transform_fn)?);
1709 }
1710 join.pivots = join
1711 .pivots
1712 .into_iter()
1713 .map(|pivot| transform_recursive(pivot, transform_fn))
1714 .collect::<Result<Vec<_>>>()?;
1715
1716 match transform_fn(Expression::Join(Box::new(join)))? {
1717 Expression::Join(join) => Ok(*join),
1718 _ => Err(crate::error::Error::parse(
1719 "Join transformation returned non-join expression",
1720 0,
1721 0,
1722 0,
1723 0,
1724 )),
1725 }
1726}
1727
1728fn transform_output_clause_recursive<F>(
1729 mut output: OutputClause,
1730 transform_fn: &F,
1731) -> Result<OutputClause>
1732where
1733 F: Fn(Expression) -> Result<Expression>,
1734{
1735 output.columns = output
1736 .columns
1737 .into_iter()
1738 .map(|column| transform_recursive(column, transform_fn))
1739 .collect::<Result<Vec<_>>>()?;
1740 if let Some(into_table) = output.into_table.take() {
1741 output.into_table = Some(transform_recursive(into_table, transform_fn)?);
1742 }
1743 Ok(output)
1744}
1745
1746fn transform_with_recursive<F>(mut with: With, transform_fn: &F) -> Result<With>
1747where
1748 F: Fn(Expression) -> Result<Expression>,
1749{
1750 with.ctes = with
1751 .ctes
1752 .into_iter()
1753 .map(|mut cte| {
1754 cte.this = transform_recursive(cte.this, transform_fn)?;
1755 Ok(cte)
1756 })
1757 .collect::<Result<Vec<_>>>()?;
1758 if let Some(search) = with.search.take() {
1759 with.search = Some(Box::new(transform_recursive(*search, transform_fn)?));
1760 }
1761 Ok(with)
1762}
1763
1764fn transform_order_by_recursive<F>(mut order: OrderBy, transform_fn: &F) -> Result<OrderBy>
1765where
1766 F: Fn(Expression) -> Result<Expression>,
1767{
1768 order.expressions = order
1769 .expressions
1770 .into_iter()
1771 .map(|mut ordered| {
1772 let original = ordered.this.clone();
1773 ordered.this = transform_recursive(ordered.this, transform_fn).unwrap_or(original);
1774 match transform_fn(Expression::Ordered(Box::new(ordered.clone()))) {
1775 Ok(Expression::Ordered(transformed)) => Ok(*transformed),
1776 Ok(_) | Err(_) => Ok(ordered),
1777 }
1778 })
1779 .collect::<Result<Vec<_>>>()?;
1780 Ok(order)
1781}
1782
1783fn transform_recursive_reference<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
1784where
1785 F: Fn(Expression) -> Result<Expression>,
1786{
1787 use crate::expressions::BinaryOp;
1788
1789 // Helper macro to recurse into AggFunc-based expressions (this, filter, order_by, having_max, limit).
1790 macro_rules! recurse_agg {
1791 ($variant:ident, $f:expr) => {{
1792 let mut f = $f;
1793 f.this = transform_recursive(f.this, transform_fn)?;
1794 if let Some(filter) = f.filter.take() {
1795 f.filter = Some(transform_recursive(filter, transform_fn)?);
1796 }
1797 for ord in &mut f.order_by {
1798 ord.this = transform_recursive(
1799 std::mem::replace(&mut ord.this, Expression::Null(crate::expressions::Null)),
1800 transform_fn,
1801 )?;
1802 }
1803 if let Some((ref mut expr, _)) = f.having_max {
1804 *expr = Box::new(transform_recursive(
1805 std::mem::replace(expr.as_mut(), Expression::Null(crate::expressions::Null)),
1806 transform_fn,
1807 )?);
1808 }
1809 if let Some(limit) = f.limit.take() {
1810 f.limit = Some(Box::new(transform_recursive(*limit, transform_fn)?));
1811 }
1812 Expression::$variant(f)
1813 }};
1814 }
1815
1816 // Helper macro to transform binary ops with Box<BinaryOp>
1817 macro_rules! transform_binary {
1818 ($variant:ident, $op:expr) => {{
1819 let left = transform_recursive($op.left, transform_fn)?;
1820 let right = transform_recursive($op.right, transform_fn)?;
1821 Expression::$variant(Box::new(BinaryOp {
1822 left,
1823 right,
1824 left_comments: $op.left_comments,
1825 operator_comments: $op.operator_comments,
1826 trailing_comments: $op.trailing_comments,
1827 inferred_type: $op.inferred_type,
1828 }))
1829 }};
1830 }
1831
1832 // Fast path: leaf nodes never need child traversal, apply transform directly
1833 if matches!(
1834 &expr,
1835 Expression::Literal(_)
1836 | Expression::Boolean(_)
1837 | Expression::Null(_)
1838 | Expression::Identifier(_)
1839 | Expression::Star(_)
1840 | Expression::Parameter(_)
1841 | Expression::Placeholder(_)
1842 | Expression::SessionParameter(_)
1843 ) {
1844 return transform_fn(expr);
1845 }
1846
1847 // First recursively transform children, then apply the transform function
1848 let expr = match expr {
1849 Expression::Select(mut select) => {
1850 select.expressions = select
1851 .expressions
1852 .into_iter()
1853 .map(|e| transform_recursive(e, transform_fn))
1854 .collect::<Result<Vec<_>>>()?;
1855
1856 // Transform FROM clause
1857 if let Some(mut from) = select.from.take() {
1858 from.expressions = from
1859 .expressions
1860 .into_iter()
1861 .map(|e| transform_recursive(e, transform_fn))
1862 .collect::<Result<Vec<_>>>()?;
1863 select.from = Some(from);
1864 }
1865
1866 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
1867 select.joins = select
1868 .joins
1869 .into_iter()
1870 .map(|mut join| {
1871 join.this = transform_recursive(join.this, transform_fn)?;
1872 if let Some(on) = join.on.take() {
1873 join.on = Some(transform_recursive(on, transform_fn)?);
1874 }
1875 // Wrap join in Expression::Join to allow transform_fn to transform it
1876 match transform_fn(Expression::Join(Box::new(join)))? {
1877 Expression::Join(j) => Ok(*j),
1878 _ => Err(crate::error::Error::parse(
1879 "Join transformation returned non-join expression",
1880 0,
1881 0,
1882 0,
1883 0,
1884 )),
1885 }
1886 })
1887 .collect::<Result<Vec<_>>>()?;
1888
1889 // Transform LATERAL VIEW expressions (Hive/Spark)
1890 select.lateral_views = select
1891 .lateral_views
1892 .into_iter()
1893 .map(|mut lv| {
1894 lv.this = transform_recursive(lv.this, transform_fn)?;
1895 Ok(lv)
1896 })
1897 .collect::<Result<Vec<_>>>()?;
1898
1899 // Transform WHERE clause
1900 if let Some(mut where_clause) = select.where_clause.take() {
1901 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1902 select.where_clause = Some(where_clause);
1903 }
1904
1905 // Transform GROUP BY
1906 if let Some(mut group_by) = select.group_by.take() {
1907 group_by.expressions = group_by
1908 .expressions
1909 .into_iter()
1910 .map(|e| transform_recursive(e, transform_fn))
1911 .collect::<Result<Vec<_>>>()?;
1912 select.group_by = Some(group_by);
1913 }
1914
1915 // Transform HAVING
1916 if let Some(mut having) = select.having.take() {
1917 having.this = transform_recursive(having.this, transform_fn)?;
1918 select.having = Some(having);
1919 }
1920
1921 // Transform WITH (CTEs)
1922 if let Some(mut with) = select.with.take() {
1923 with.ctes = with
1924 .ctes
1925 .into_iter()
1926 .map(|mut cte| {
1927 let original = cte.this.clone();
1928 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1929 cte
1930 })
1931 .collect();
1932 select.with = Some(with);
1933 }
1934
1935 // Transform ORDER BY
1936 if let Some(mut order) = select.order_by.take() {
1937 order.expressions = order
1938 .expressions
1939 .into_iter()
1940 .map(|o| {
1941 let mut o = o;
1942 let original = o.this.clone();
1943 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1944 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
1945 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1946 Ok(Expression::Ordered(transformed)) => *transformed,
1947 Ok(_) | Err(_) => o,
1948 }
1949 })
1950 .collect();
1951 select.order_by = Some(order);
1952 }
1953
1954 // Transform WINDOW clause order_by
1955 if let Some(ref mut windows) = select.windows {
1956 for nw in windows.iter_mut() {
1957 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
1958 .into_iter()
1959 .map(|o| {
1960 let mut o = o;
1961 let original = o.this.clone();
1962 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1963 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1964 Ok(Expression::Ordered(transformed)) => *transformed,
1965 Ok(_) | Err(_) => o,
1966 }
1967 })
1968 .collect();
1969 }
1970 }
1971
1972 // Transform QUALIFY
1973 if let Some(mut qual) = select.qualify.take() {
1974 qual.this = transform_recursive(qual.this, transform_fn)?;
1975 select.qualify = Some(qual);
1976 }
1977
1978 Expression::Select(select)
1979 }
1980 Expression::Function(mut f) => {
1981 f.args = f
1982 .args
1983 .into_iter()
1984 .map(|e| transform_recursive(e, transform_fn))
1985 .collect::<Result<Vec<_>>>()?;
1986 Expression::Function(f)
1987 }
1988 Expression::AggregateFunction(mut f) => {
1989 f.args = f
1990 .args
1991 .into_iter()
1992 .map(|e| transform_recursive(e, transform_fn))
1993 .collect::<Result<Vec<_>>>()?;
1994 if let Some(filter) = f.filter {
1995 f.filter = Some(transform_recursive(filter, transform_fn)?);
1996 }
1997 Expression::AggregateFunction(f)
1998 }
1999 Expression::WindowFunction(mut wf) => {
2000 wf.this = transform_recursive(wf.this, transform_fn)?;
2001 wf.over.partition_by = wf
2002 .over
2003 .partition_by
2004 .into_iter()
2005 .map(|e| transform_recursive(e, transform_fn))
2006 .collect::<Result<Vec<_>>>()?;
2007 // Transform order_by items through Expression::Ordered wrapper
2008 wf.over.order_by = wf
2009 .over
2010 .order_by
2011 .into_iter()
2012 .map(|o| {
2013 let mut o = o;
2014 o.this = transform_recursive(o.this, transform_fn)?;
2015 match transform_fn(Expression::Ordered(Box::new(o)))? {
2016 Expression::Ordered(transformed) => Ok(*transformed),
2017 _ => Err(crate::error::Error::parse(
2018 "Ordered transformation returned non-Ordered expression",
2019 0,
2020 0,
2021 0,
2022 0,
2023 )),
2024 }
2025 })
2026 .collect::<Result<Vec<_>>>()?;
2027 Expression::WindowFunction(wf)
2028 }
2029 Expression::Alias(mut a) => {
2030 a.this = transform_recursive(a.this, transform_fn)?;
2031 Expression::Alias(a)
2032 }
2033 Expression::Cast(mut c) => {
2034 c.this = transform_recursive(c.this, transform_fn)?;
2035 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
2036 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2037 Expression::Cast(c)
2038 }
2039 Expression::And(op) => transform_binary!(And, *op),
2040 Expression::Or(op) => transform_binary!(Or, *op),
2041 Expression::Add(op) => transform_binary!(Add, *op),
2042 Expression::Sub(op) => transform_binary!(Sub, *op),
2043 Expression::Mul(op) => transform_binary!(Mul, *op),
2044 Expression::Div(op) => transform_binary!(Div, *op),
2045 Expression::Eq(op) => transform_binary!(Eq, *op),
2046 Expression::Lt(op) => transform_binary!(Lt, *op),
2047 Expression::Gt(op) => transform_binary!(Gt, *op),
2048 Expression::Paren(mut p) => {
2049 p.this = transform_recursive(p.this, transform_fn)?;
2050 Expression::Paren(p)
2051 }
2052 Expression::Coalesce(mut f) => {
2053 f.expressions = f
2054 .expressions
2055 .into_iter()
2056 .map(|e| transform_recursive(e, transform_fn))
2057 .collect::<Result<Vec<_>>>()?;
2058 Expression::Coalesce(f)
2059 }
2060 Expression::IfNull(mut f) => {
2061 f.this = transform_recursive(f.this, transform_fn)?;
2062 f.expression = transform_recursive(f.expression, transform_fn)?;
2063 Expression::IfNull(f)
2064 }
2065 Expression::Nvl(mut f) => {
2066 f.this = transform_recursive(f.this, transform_fn)?;
2067 f.expression = transform_recursive(f.expression, transform_fn)?;
2068 Expression::Nvl(f)
2069 }
2070 Expression::In(mut i) => {
2071 i.this = transform_recursive(i.this, transform_fn)?;
2072 i.expressions = i
2073 .expressions
2074 .into_iter()
2075 .map(|e| transform_recursive(e, transform_fn))
2076 .collect::<Result<Vec<_>>>()?;
2077 if let Some(query) = i.query {
2078 i.query = Some(transform_recursive(query, transform_fn)?);
2079 }
2080 Expression::In(i)
2081 }
2082 Expression::Not(mut n) => {
2083 n.this = transform_recursive(n.this, transform_fn)?;
2084 Expression::Not(n)
2085 }
2086 Expression::ArraySlice(mut s) => {
2087 s.this = transform_recursive(s.this, transform_fn)?;
2088 if let Some(start) = s.start {
2089 s.start = Some(transform_recursive(start, transform_fn)?);
2090 }
2091 if let Some(end) = s.end {
2092 s.end = Some(transform_recursive(end, transform_fn)?);
2093 }
2094 Expression::ArraySlice(s)
2095 }
2096 Expression::Subscript(mut s) => {
2097 s.this = transform_recursive(s.this, transform_fn)?;
2098 s.index = transform_recursive(s.index, transform_fn)?;
2099 Expression::Subscript(s)
2100 }
2101 Expression::Array(mut a) => {
2102 a.expressions = a
2103 .expressions
2104 .into_iter()
2105 .map(|e| transform_recursive(e, transform_fn))
2106 .collect::<Result<Vec<_>>>()?;
2107 Expression::Array(a)
2108 }
2109 Expression::Struct(mut s) => {
2110 let mut new_fields = Vec::new();
2111 for (name, expr) in s.fields {
2112 let transformed = transform_recursive(expr, transform_fn)?;
2113 new_fields.push((name, transformed));
2114 }
2115 s.fields = new_fields;
2116 Expression::Struct(s)
2117 }
2118 Expression::NamedArgument(mut na) => {
2119 na.value = transform_recursive(na.value, transform_fn)?;
2120 Expression::NamedArgument(na)
2121 }
2122 Expression::MapFunc(mut m) => {
2123 m.keys = m
2124 .keys
2125 .into_iter()
2126 .map(|e| transform_recursive(e, transform_fn))
2127 .collect::<Result<Vec<_>>>()?;
2128 m.values = m
2129 .values
2130 .into_iter()
2131 .map(|e| transform_recursive(e, transform_fn))
2132 .collect::<Result<Vec<_>>>()?;
2133 Expression::MapFunc(m)
2134 }
2135 Expression::ArrayFunc(mut a) => {
2136 a.expressions = a
2137 .expressions
2138 .into_iter()
2139 .map(|e| transform_recursive(e, transform_fn))
2140 .collect::<Result<Vec<_>>>()?;
2141 Expression::ArrayFunc(a)
2142 }
2143 Expression::Lambda(mut l) => {
2144 l.body = transform_recursive(l.body, transform_fn)?;
2145 Expression::Lambda(l)
2146 }
2147 Expression::JsonExtract(mut f) => {
2148 f.this = transform_recursive(f.this, transform_fn)?;
2149 f.path = transform_recursive(f.path, transform_fn)?;
2150 Expression::JsonExtract(f)
2151 }
2152 Expression::JsonExtractScalar(mut f) => {
2153 f.this = transform_recursive(f.this, transform_fn)?;
2154 f.path = transform_recursive(f.path, transform_fn)?;
2155 Expression::JsonExtractScalar(f)
2156 }
2157
2158 // ===== UnaryFunc-based expressions =====
2159 // These all have a single `this: Expression` child
2160 Expression::Length(mut f) => {
2161 f.this = transform_recursive(f.this, transform_fn)?;
2162 Expression::Length(f)
2163 }
2164 Expression::Upper(mut f) => {
2165 f.this = transform_recursive(f.this, transform_fn)?;
2166 Expression::Upper(f)
2167 }
2168 Expression::Lower(mut f) => {
2169 f.this = transform_recursive(f.this, transform_fn)?;
2170 Expression::Lower(f)
2171 }
2172 Expression::LTrim(mut f) => {
2173 f.this = transform_recursive(f.this, transform_fn)?;
2174 Expression::LTrim(f)
2175 }
2176 Expression::RTrim(mut f) => {
2177 f.this = transform_recursive(f.this, transform_fn)?;
2178 Expression::RTrim(f)
2179 }
2180 Expression::Reverse(mut f) => {
2181 f.this = transform_recursive(f.this, transform_fn)?;
2182 Expression::Reverse(f)
2183 }
2184 Expression::Abs(mut f) => {
2185 f.this = transform_recursive(f.this, transform_fn)?;
2186 Expression::Abs(f)
2187 }
2188 Expression::Ceil(mut f) => {
2189 f.this = transform_recursive(f.this, transform_fn)?;
2190 Expression::Ceil(f)
2191 }
2192 Expression::Floor(mut f) => {
2193 f.this = transform_recursive(f.this, transform_fn)?;
2194 Expression::Floor(f)
2195 }
2196 Expression::Sign(mut f) => {
2197 f.this = transform_recursive(f.this, transform_fn)?;
2198 Expression::Sign(f)
2199 }
2200 Expression::Sqrt(mut f) => {
2201 f.this = transform_recursive(f.this, transform_fn)?;
2202 Expression::Sqrt(f)
2203 }
2204 Expression::Cbrt(mut f) => {
2205 f.this = transform_recursive(f.this, transform_fn)?;
2206 Expression::Cbrt(f)
2207 }
2208 Expression::Ln(mut f) => {
2209 f.this = transform_recursive(f.this, transform_fn)?;
2210 Expression::Ln(f)
2211 }
2212 Expression::Log(mut f) => {
2213 f.this = transform_recursive(f.this, transform_fn)?;
2214 if let Some(base) = f.base {
2215 f.base = Some(transform_recursive(base, transform_fn)?);
2216 }
2217 Expression::Log(f)
2218 }
2219 Expression::Exp(mut f) => {
2220 f.this = transform_recursive(f.this, transform_fn)?;
2221 Expression::Exp(f)
2222 }
2223 Expression::Date(mut f) => {
2224 f.this = transform_recursive(f.this, transform_fn)?;
2225 Expression::Date(f)
2226 }
2227 Expression::Stddev(f) => recurse_agg!(Stddev, f),
2228 Expression::StddevSamp(f) => recurse_agg!(StddevSamp, f),
2229 Expression::Variance(f) => recurse_agg!(Variance, f),
2230
2231 // ===== BinaryFunc-based expressions =====
2232 Expression::ModFunc(mut f) => {
2233 f.this = transform_recursive(f.this, transform_fn)?;
2234 f.expression = transform_recursive(f.expression, transform_fn)?;
2235 Expression::ModFunc(f)
2236 }
2237 Expression::Power(mut f) => {
2238 f.this = transform_recursive(f.this, transform_fn)?;
2239 f.expression = transform_recursive(f.expression, transform_fn)?;
2240 Expression::Power(f)
2241 }
2242 Expression::MapFromArrays(mut f) => {
2243 f.this = transform_recursive(f.this, transform_fn)?;
2244 f.expression = transform_recursive(f.expression, transform_fn)?;
2245 Expression::MapFromArrays(f)
2246 }
2247 Expression::ElementAt(mut f) => {
2248 f.this = transform_recursive(f.this, transform_fn)?;
2249 f.expression = transform_recursive(f.expression, transform_fn)?;
2250 Expression::ElementAt(f)
2251 }
2252 Expression::MapContainsKey(mut f) => {
2253 f.this = transform_recursive(f.this, transform_fn)?;
2254 f.expression = transform_recursive(f.expression, transform_fn)?;
2255 Expression::MapContainsKey(f)
2256 }
2257 Expression::Left(mut f) => {
2258 f.this = transform_recursive(f.this, transform_fn)?;
2259 f.length = transform_recursive(f.length, transform_fn)?;
2260 Expression::Left(f)
2261 }
2262 Expression::Right(mut f) => {
2263 f.this = transform_recursive(f.this, transform_fn)?;
2264 f.length = transform_recursive(f.length, transform_fn)?;
2265 Expression::Right(f)
2266 }
2267 Expression::Repeat(mut f) => {
2268 f.this = transform_recursive(f.this, transform_fn)?;
2269 f.times = transform_recursive(f.times, transform_fn)?;
2270 Expression::Repeat(f)
2271 }
2272
2273 // ===== Complex function expressions =====
2274 Expression::Substring(mut f) => {
2275 f.this = transform_recursive(f.this, transform_fn)?;
2276 f.start = transform_recursive(f.start, transform_fn)?;
2277 if let Some(len) = f.length {
2278 f.length = Some(transform_recursive(len, transform_fn)?);
2279 }
2280 Expression::Substring(f)
2281 }
2282 Expression::Replace(mut f) => {
2283 f.this = transform_recursive(f.this, transform_fn)?;
2284 f.old = transform_recursive(f.old, transform_fn)?;
2285 f.new = transform_recursive(f.new, transform_fn)?;
2286 Expression::Replace(f)
2287 }
2288 Expression::ConcatWs(mut f) => {
2289 f.separator = transform_recursive(f.separator, transform_fn)?;
2290 f.expressions = f
2291 .expressions
2292 .into_iter()
2293 .map(|e| transform_recursive(e, transform_fn))
2294 .collect::<Result<Vec<_>>>()?;
2295 Expression::ConcatWs(f)
2296 }
2297 Expression::Trim(mut f) => {
2298 f.this = transform_recursive(f.this, transform_fn)?;
2299 if let Some(chars) = f.characters {
2300 f.characters = Some(transform_recursive(chars, transform_fn)?);
2301 }
2302 Expression::Trim(f)
2303 }
2304 Expression::Split(mut f) => {
2305 f.this = transform_recursive(f.this, transform_fn)?;
2306 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
2307 Expression::Split(f)
2308 }
2309 Expression::Lpad(mut f) => {
2310 f.this = transform_recursive(f.this, transform_fn)?;
2311 f.length = transform_recursive(f.length, transform_fn)?;
2312 if let Some(fill) = f.fill {
2313 f.fill = Some(transform_recursive(fill, transform_fn)?);
2314 }
2315 Expression::Lpad(f)
2316 }
2317 Expression::Rpad(mut f) => {
2318 f.this = transform_recursive(f.this, transform_fn)?;
2319 f.length = transform_recursive(f.length, transform_fn)?;
2320 if let Some(fill) = f.fill {
2321 f.fill = Some(transform_recursive(fill, transform_fn)?);
2322 }
2323 Expression::Rpad(f)
2324 }
2325
2326 // ===== Conditional expressions =====
2327 Expression::Case(mut c) => {
2328 if let Some(operand) = c.operand {
2329 c.operand = Some(transform_recursive(operand, transform_fn)?);
2330 }
2331 c.whens = c
2332 .whens
2333 .into_iter()
2334 .map(|(cond, then)| {
2335 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
2336 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
2337 (new_cond, new_then)
2338 })
2339 .collect();
2340 if let Some(else_expr) = c.else_ {
2341 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
2342 }
2343 Expression::Case(c)
2344 }
2345 Expression::IfFunc(mut f) => {
2346 f.condition = transform_recursive(f.condition, transform_fn)?;
2347 f.true_value = transform_recursive(f.true_value, transform_fn)?;
2348 if let Some(false_val) = f.false_value {
2349 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
2350 }
2351 Expression::IfFunc(f)
2352 }
2353
2354 // ===== Date/Time expressions =====
2355 Expression::DateAdd(mut f) => {
2356 f.this = transform_recursive(f.this, transform_fn)?;
2357 f.interval = transform_recursive(f.interval, transform_fn)?;
2358 Expression::DateAdd(f)
2359 }
2360 Expression::DateSub(mut f) => {
2361 f.this = transform_recursive(f.this, transform_fn)?;
2362 f.interval = transform_recursive(f.interval, transform_fn)?;
2363 Expression::DateSub(f)
2364 }
2365 Expression::DateDiff(mut f) => {
2366 f.this = transform_recursive(f.this, transform_fn)?;
2367 f.expression = transform_recursive(f.expression, transform_fn)?;
2368 Expression::DateDiff(f)
2369 }
2370 Expression::DateTrunc(mut f) => {
2371 f.this = transform_recursive(f.this, transform_fn)?;
2372 Expression::DateTrunc(f)
2373 }
2374 Expression::Extract(mut f) => {
2375 f.this = transform_recursive(f.this, transform_fn)?;
2376 Expression::Extract(f)
2377 }
2378
2379 // ===== JSON expressions =====
2380 Expression::JsonObject(mut f) => {
2381 f.pairs = f
2382 .pairs
2383 .into_iter()
2384 .map(|(k, v)| {
2385 let new_k = transform_recursive(k, transform_fn)?;
2386 let new_v = transform_recursive(v, transform_fn)?;
2387 Ok((new_k, new_v))
2388 })
2389 .collect::<Result<Vec<_>>>()?;
2390 Expression::JsonObject(f)
2391 }
2392
2393 // ===== Subquery expressions =====
2394 Expression::Subquery(mut s) => {
2395 s.this = transform_recursive(s.this, transform_fn)?;
2396 Expression::Subquery(s)
2397 }
2398 Expression::Exists(mut e) => {
2399 e.this = transform_recursive(e.this, transform_fn)?;
2400 Expression::Exists(e)
2401 }
2402 Expression::Describe(mut d) => {
2403 d.target = transform_recursive(d.target, transform_fn)?;
2404 Expression::Describe(d)
2405 }
2406
2407 // ===== Set operations =====
2408 Expression::Union(mut u) => {
2409 let left = std::mem::replace(&mut u.left, Expression::Null(Null));
2410 u.left = transform_recursive(left, transform_fn)?;
2411 let right = std::mem::replace(&mut u.right, Expression::Null(Null));
2412 u.right = transform_recursive(right, transform_fn)?;
2413 if let Some(mut with) = u.with.take() {
2414 with.ctes = with
2415 .ctes
2416 .into_iter()
2417 .map(|mut cte| {
2418 let original = cte.this.clone();
2419 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2420 cte
2421 })
2422 .collect();
2423 u.with = Some(with);
2424 }
2425 Expression::Union(u)
2426 }
2427 Expression::Intersect(mut i) => {
2428 let left = std::mem::replace(&mut i.left, Expression::Null(Null));
2429 i.left = transform_recursive(left, transform_fn)?;
2430 let right = std::mem::replace(&mut i.right, Expression::Null(Null));
2431 i.right = transform_recursive(right, transform_fn)?;
2432 if let Some(mut with) = i.with.take() {
2433 with.ctes = with
2434 .ctes
2435 .into_iter()
2436 .map(|mut cte| {
2437 let original = cte.this.clone();
2438 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2439 cte
2440 })
2441 .collect();
2442 i.with = Some(with);
2443 }
2444 Expression::Intersect(i)
2445 }
2446 Expression::Except(mut e) => {
2447 let left = std::mem::replace(&mut e.left, Expression::Null(Null));
2448 e.left = transform_recursive(left, transform_fn)?;
2449 let right = std::mem::replace(&mut e.right, Expression::Null(Null));
2450 e.right = transform_recursive(right, transform_fn)?;
2451 if let Some(mut with) = e.with.take() {
2452 with.ctes = with
2453 .ctes
2454 .into_iter()
2455 .map(|mut cte| {
2456 let original = cte.this.clone();
2457 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2458 cte
2459 })
2460 .collect();
2461 e.with = Some(with);
2462 }
2463 Expression::Except(e)
2464 }
2465
2466 // ===== DML expressions =====
2467 Expression::Insert(mut ins) => {
2468 // Transform VALUES clause expressions
2469 let mut new_values = Vec::new();
2470 for row in ins.values {
2471 let mut new_row = Vec::new();
2472 for e in row {
2473 new_row.push(transform_recursive(e, transform_fn)?);
2474 }
2475 new_values.push(new_row);
2476 }
2477 ins.values = new_values;
2478
2479 // Transform query (for INSERT ... SELECT)
2480 if let Some(query) = ins.query {
2481 ins.query = Some(transform_recursive(query, transform_fn)?);
2482 }
2483
2484 // Transform RETURNING clause
2485 let mut new_returning = Vec::new();
2486 for e in ins.returning {
2487 new_returning.push(transform_recursive(e, transform_fn)?);
2488 }
2489 ins.returning = new_returning;
2490
2491 // Transform ON CONFLICT clause
2492 if let Some(on_conflict) = ins.on_conflict {
2493 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
2494 }
2495
2496 Expression::Insert(ins)
2497 }
2498 Expression::Update(mut upd) => {
2499 upd.table = transform_table_ref_recursive(upd.table, transform_fn)?;
2500 upd.extra_tables = upd
2501 .extra_tables
2502 .into_iter()
2503 .map(|table| transform_table_ref_recursive(table, transform_fn))
2504 .collect::<Result<Vec<_>>>()?;
2505 upd.table_joins = upd
2506 .table_joins
2507 .into_iter()
2508 .map(|join| transform_join_recursive(join, transform_fn))
2509 .collect::<Result<Vec<_>>>()?;
2510 upd.set = upd
2511 .set
2512 .into_iter()
2513 .map(|(id, val)| {
2514 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
2515 (id, new_val)
2516 })
2517 .collect();
2518 if let Some(from_clause) = upd.from_clause.take() {
2519 upd.from_clause = Some(transform_from_recursive(from_clause, transform_fn)?);
2520 }
2521 upd.from_joins = upd
2522 .from_joins
2523 .into_iter()
2524 .map(|join| transform_join_recursive(join, transform_fn))
2525 .collect::<Result<Vec<_>>>()?;
2526 if let Some(mut where_clause) = upd.where_clause.take() {
2527 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2528 upd.where_clause = Some(where_clause);
2529 }
2530 upd.returning = upd
2531 .returning
2532 .into_iter()
2533 .map(|expr| transform_recursive(expr, transform_fn))
2534 .collect::<Result<Vec<_>>>()?;
2535 if let Some(output) = upd.output.take() {
2536 upd.output = Some(transform_output_clause_recursive(output, transform_fn)?);
2537 }
2538 if let Some(with) = upd.with.take() {
2539 upd.with = Some(transform_with_recursive(with, transform_fn)?);
2540 }
2541 if let Some(limit) = upd.limit.take() {
2542 upd.limit = Some(transform_recursive(limit, transform_fn)?);
2543 }
2544 if let Some(order_by) = upd.order_by.take() {
2545 upd.order_by = Some(transform_order_by_recursive(order_by, transform_fn)?);
2546 }
2547 Expression::Update(upd)
2548 }
2549 Expression::Delete(mut del) => {
2550 del.table = transform_table_ref_recursive(del.table, transform_fn)?;
2551 del.using = del
2552 .using
2553 .into_iter()
2554 .map(|table| transform_table_ref_recursive(table, transform_fn))
2555 .collect::<Result<Vec<_>>>()?;
2556 if let Some(mut where_clause) = del.where_clause.take() {
2557 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2558 del.where_clause = Some(where_clause);
2559 }
2560 if let Some(output) = del.output.take() {
2561 del.output = Some(transform_output_clause_recursive(output, transform_fn)?);
2562 }
2563 if let Some(with) = del.with.take() {
2564 del.with = Some(transform_with_recursive(with, transform_fn)?);
2565 }
2566 if let Some(limit) = del.limit.take() {
2567 del.limit = Some(transform_recursive(limit, transform_fn)?);
2568 }
2569 if let Some(order_by) = del.order_by.take() {
2570 del.order_by = Some(transform_order_by_recursive(order_by, transform_fn)?);
2571 }
2572 del.returning = del
2573 .returning
2574 .into_iter()
2575 .map(|expr| transform_recursive(expr, transform_fn))
2576 .collect::<Result<Vec<_>>>()?;
2577 del.tables = del
2578 .tables
2579 .into_iter()
2580 .map(|table| transform_table_ref_recursive(table, transform_fn))
2581 .collect::<Result<Vec<_>>>()?;
2582 del.joins = del
2583 .joins
2584 .into_iter()
2585 .map(|join| transform_join_recursive(join, transform_fn))
2586 .collect::<Result<Vec<_>>>()?;
2587 Expression::Delete(del)
2588 }
2589
2590 // ===== CTE expressions =====
2591 Expression::With(mut w) => {
2592 w.ctes = w
2593 .ctes
2594 .into_iter()
2595 .map(|mut cte| {
2596 let original = cte.this.clone();
2597 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2598 cte
2599 })
2600 .collect();
2601 Expression::With(w)
2602 }
2603 Expression::Cte(mut c) => {
2604 c.this = transform_recursive(c.this, transform_fn)?;
2605 Expression::Cte(c)
2606 }
2607
2608 // ===== Order expressions =====
2609 Expression::Ordered(mut o) => {
2610 o.this = transform_recursive(o.this, transform_fn)?;
2611 Expression::Ordered(o)
2612 }
2613
2614 // ===== Negation =====
2615 Expression::Neg(mut n) => {
2616 n.this = transform_recursive(n.this, transform_fn)?;
2617 Expression::Neg(n)
2618 }
2619
2620 // ===== Between =====
2621 Expression::Between(mut b) => {
2622 b.this = transform_recursive(b.this, transform_fn)?;
2623 b.low = transform_recursive(b.low, transform_fn)?;
2624 b.high = transform_recursive(b.high, transform_fn)?;
2625 Expression::Between(b)
2626 }
2627 Expression::IsNull(mut i) => {
2628 i.this = transform_recursive(i.this, transform_fn)?;
2629 Expression::IsNull(i)
2630 }
2631 Expression::IsTrue(mut i) => {
2632 i.this = transform_recursive(i.this, transform_fn)?;
2633 Expression::IsTrue(i)
2634 }
2635 Expression::IsFalse(mut i) => {
2636 i.this = transform_recursive(i.this, transform_fn)?;
2637 Expression::IsFalse(i)
2638 }
2639
2640 // ===== Like expressions =====
2641 Expression::Like(mut l) => {
2642 l.left = transform_recursive(l.left, transform_fn)?;
2643 l.right = transform_recursive(l.right, transform_fn)?;
2644 Expression::Like(l)
2645 }
2646 Expression::ILike(mut l) => {
2647 l.left = transform_recursive(l.left, transform_fn)?;
2648 l.right = transform_recursive(l.right, transform_fn)?;
2649 Expression::ILike(l)
2650 }
2651
2652 // ===== Additional binary ops not covered by macro =====
2653 Expression::Neq(op) => transform_binary!(Neq, *op),
2654 Expression::Lte(op) => transform_binary!(Lte, *op),
2655 Expression::Gte(op) => transform_binary!(Gte, *op),
2656 Expression::Mod(op) => transform_binary!(Mod, *op),
2657 Expression::Concat(op) => transform_binary!(Concat, *op),
2658 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
2659 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
2660 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
2661 Expression::Is(op) => transform_binary!(Is, *op),
2662
2663 // ===== TryCast / SafeCast =====
2664 Expression::TryCast(mut c) => {
2665 c.this = transform_recursive(c.this, transform_fn)?;
2666 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2667 Expression::TryCast(c)
2668 }
2669 Expression::SafeCast(mut c) => {
2670 c.this = transform_recursive(c.this, transform_fn)?;
2671 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2672 Expression::SafeCast(c)
2673 }
2674
2675 // ===== Misc =====
2676 Expression::Unnest(mut f) => {
2677 f.this = transform_recursive(f.this, transform_fn)?;
2678 f.expressions = f
2679 .expressions
2680 .into_iter()
2681 .map(|e| transform_recursive(e, transform_fn))
2682 .collect::<Result<Vec<_>>>()?;
2683 Expression::Unnest(f)
2684 }
2685 Expression::Explode(mut f) => {
2686 f.this = transform_recursive(f.this, transform_fn)?;
2687 Expression::Explode(f)
2688 }
2689 Expression::GroupConcat(mut f) => {
2690 f.this = transform_recursive(f.this, transform_fn)?;
2691 Expression::GroupConcat(f)
2692 }
2693 Expression::StringAgg(mut f) => {
2694 f.this = transform_recursive(f.this, transform_fn)?;
2695 Expression::StringAgg(f)
2696 }
2697 Expression::ListAgg(mut f) => {
2698 f.this = transform_recursive(f.this, transform_fn)?;
2699 Expression::ListAgg(f)
2700 }
2701 Expression::ArrayAgg(mut f) => {
2702 f.this = transform_recursive(f.this, transform_fn)?;
2703 Expression::ArrayAgg(f)
2704 }
2705 Expression::ParseJson(mut f) => {
2706 f.this = transform_recursive(f.this, transform_fn)?;
2707 Expression::ParseJson(f)
2708 }
2709 Expression::ToJson(mut f) => {
2710 f.this = transform_recursive(f.this, transform_fn)?;
2711 Expression::ToJson(f)
2712 }
2713 Expression::JSONExtract(mut e) => {
2714 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2715 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
2716 Expression::JSONExtract(e)
2717 }
2718 Expression::JSONExtractScalar(mut e) => {
2719 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2720 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
2721 Expression::JSONExtractScalar(e)
2722 }
2723
2724 // StrToTime: recurse into this
2725 Expression::StrToTime(mut e) => {
2726 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2727 Expression::StrToTime(e)
2728 }
2729
2730 // UnixToTime: recurse into this
2731 Expression::UnixToTime(mut e) => {
2732 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2733 Expression::UnixToTime(e)
2734 }
2735
2736 // CreateTable: recurse into column defaults, on_update expressions, and data types
2737 Expression::CreateTable(mut ct) => {
2738 for col in &mut ct.columns {
2739 if let Some(default_expr) = col.default.take() {
2740 col.default = Some(transform_recursive(default_expr, transform_fn)?);
2741 }
2742 if let Some(on_update_expr) = col.on_update.take() {
2743 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
2744 }
2745 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
2746 // are NOT applied here because per-dialect transforms are designed for CAST/expression
2747 // contexts and may not produce correct results for DDL column definitions.
2748 // The DDL type mappings would need dedicated handling per source/target pair.
2749 }
2750 if let Some(as_select) = ct.as_select.take() {
2751 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
2752 }
2753 Expression::CreateTable(ct)
2754 }
2755
2756 // CreateView: recurse into the view body query
2757 Expression::CreateView(mut cv) => {
2758 cv.query = transform_recursive(cv.query, transform_fn)?;
2759 Expression::CreateView(cv)
2760 }
2761
2762 // CreateTask: recurse into the task body
2763 Expression::CreateTask(mut ct) => {
2764 ct.body = transform_recursive(ct.body, transform_fn)?;
2765 Expression::CreateTask(ct)
2766 }
2767
2768 // CreateProcedure: recurse into body expressions
2769 Expression::CreateProcedure(mut cp) => {
2770 if let Some(body) = cp.body.take() {
2771 cp.body = Some(match body {
2772 FunctionBody::Expression(expr) => {
2773 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
2774 }
2775 FunctionBody::Return(expr) => {
2776 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
2777 }
2778 FunctionBody::Statements(stmts) => {
2779 let transformed_stmts = stmts
2780 .into_iter()
2781 .map(|s| transform_recursive(s, transform_fn))
2782 .collect::<Result<Vec<_>>>()?;
2783 FunctionBody::Statements(transformed_stmts)
2784 }
2785 other => other,
2786 });
2787 }
2788 Expression::CreateProcedure(cp)
2789 }
2790
2791 // CreateFunction: recurse into body expressions
2792 Expression::CreateFunction(mut cf) => {
2793 if let Some(body) = cf.body.take() {
2794 cf.body = Some(match body {
2795 FunctionBody::Expression(expr) => {
2796 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
2797 }
2798 FunctionBody::Return(expr) => {
2799 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
2800 }
2801 FunctionBody::Statements(stmts) => {
2802 let transformed_stmts = stmts
2803 .into_iter()
2804 .map(|s| transform_recursive(s, transform_fn))
2805 .collect::<Result<Vec<_>>>()?;
2806 FunctionBody::Statements(transformed_stmts)
2807 }
2808 other => other,
2809 });
2810 }
2811 Expression::CreateFunction(cf)
2812 }
2813
2814 // MemberOf: recurse into left and right operands
2815 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
2816 // ArrayContainsAll (@>): recurse into left and right operands
2817 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
2818 // ArrayContainedBy (<@): recurse into left and right operands
2819 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
2820 // ArrayOverlaps (&&): recurse into left and right operands
2821 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
2822 // TsMatch (@@): recurse into left and right operands
2823 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
2824 // Adjacent (-|-): recurse into left and right operands
2825 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
2826
2827 // Table: recurse into when (HistoricalData) and changes fields
2828 Expression::Table(mut t) => {
2829 if let Some(when) = t.when.take() {
2830 let transformed =
2831 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
2832 if let Expression::HistoricalData(hd) = transformed {
2833 t.when = Some(hd);
2834 }
2835 }
2836 if let Some(changes) = t.changes.take() {
2837 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
2838 if let Expression::Changes(c) = transformed {
2839 t.changes = Some(c);
2840 }
2841 }
2842 Expression::Table(t)
2843 }
2844
2845 // HistoricalData (Snowflake time travel): recurse into expression
2846 Expression::HistoricalData(mut hd) => {
2847 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
2848 Expression::HistoricalData(hd)
2849 }
2850
2851 // Changes (Snowflake CHANGES clause): recurse into at_before and end
2852 Expression::Changes(mut c) => {
2853 if let Some(at_before) = c.at_before.take() {
2854 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
2855 }
2856 if let Some(end) = c.end.take() {
2857 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
2858 }
2859 Expression::Changes(c)
2860 }
2861
2862 // TableArgument: TABLE(expr) or MODEL(expr)
2863 Expression::TableArgument(mut ta) => {
2864 ta.this = transform_recursive(ta.this, transform_fn)?;
2865 Expression::TableArgument(ta)
2866 }
2867
2868 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
2869 Expression::JoinedTable(mut jt) => {
2870 jt.left = transform_recursive(jt.left, transform_fn)?;
2871 for join in &mut jt.joins {
2872 join.this = transform_recursive(
2873 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
2874 transform_fn,
2875 )?;
2876 if let Some(on) = join.on.take() {
2877 join.on = Some(transform_recursive(on, transform_fn)?);
2878 }
2879 }
2880 jt.lateral_views = jt
2881 .lateral_views
2882 .into_iter()
2883 .map(|mut lv| {
2884 lv.this = transform_recursive(lv.this, transform_fn)?;
2885 Ok(lv)
2886 })
2887 .collect::<Result<Vec<_>>>()?;
2888 Expression::JoinedTable(jt)
2889 }
2890
2891 // Lateral: LATERAL func() - recurse into the function expression
2892 Expression::Lateral(mut lat) => {
2893 *lat.this = transform_recursive(*lat.this, transform_fn)?;
2894 Expression::Lateral(lat)
2895 }
2896
2897 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
2898 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
2899 // as a unit together with the WithinGroup wrapper
2900 Expression::WithinGroup(mut wg) => {
2901 wg.order_by = wg
2902 .order_by
2903 .into_iter()
2904 .map(|mut o| {
2905 let original = o.this.clone();
2906 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2907 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2908 Ok(Expression::Ordered(transformed)) => *transformed,
2909 Ok(_) | Err(_) => o,
2910 }
2911 })
2912 .collect();
2913 Expression::WithinGroup(wg)
2914 }
2915
2916 // Filter: recurse into both the aggregate and the filter condition
2917 Expression::Filter(mut f) => {
2918 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
2919 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
2920 Expression::Filter(f)
2921 }
2922
2923 // Aggregate functions (AggFunc-based): recurse into the aggregate argument,
2924 // filter, order_by, having_max, and limit.
2925 // Stddev, StddevSamp, Variance, and ArrayAgg are handled earlier in this match.
2926 Expression::Sum(f) => recurse_agg!(Sum, f),
2927 Expression::Avg(f) => recurse_agg!(Avg, f),
2928 Expression::Min(f) => recurse_agg!(Min, f),
2929 Expression::Max(f) => recurse_agg!(Max, f),
2930 Expression::CountIf(f) => recurse_agg!(CountIf, f),
2931 Expression::StddevPop(f) => recurse_agg!(StddevPop, f),
2932 Expression::VarPop(f) => recurse_agg!(VarPop, f),
2933 Expression::VarSamp(f) => recurse_agg!(VarSamp, f),
2934 Expression::Median(f) => recurse_agg!(Median, f),
2935 Expression::Mode(f) => recurse_agg!(Mode, f),
2936 Expression::First(f) => recurse_agg!(First, f),
2937 Expression::Last(f) => recurse_agg!(Last, f),
2938 Expression::AnyValue(f) => recurse_agg!(AnyValue, f),
2939 Expression::ApproxDistinct(f) => recurse_agg!(ApproxDistinct, f),
2940 Expression::ApproxCountDistinct(f) => recurse_agg!(ApproxCountDistinct, f),
2941 Expression::LogicalAnd(f) => recurse_agg!(LogicalAnd, f),
2942 Expression::LogicalOr(f) => recurse_agg!(LogicalOr, f),
2943 Expression::Skewness(f) => recurse_agg!(Skewness, f),
2944 Expression::ArrayConcatAgg(f) => recurse_agg!(ArrayConcatAgg, f),
2945 Expression::ArrayUniqueAgg(f) => recurse_agg!(ArrayUniqueAgg, f),
2946 Expression::BoolXorAgg(f) => recurse_agg!(BoolXorAgg, f),
2947 Expression::BitwiseOrAgg(f) => recurse_agg!(BitwiseOrAgg, f),
2948 Expression::BitwiseAndAgg(f) => recurse_agg!(BitwiseAndAgg, f),
2949 Expression::BitwiseXorAgg(f) => recurse_agg!(BitwiseXorAgg, f),
2950
2951 // Count has its own struct with an Option<Expression> `this` field
2952 Expression::Count(mut c) => {
2953 if let Some(this) = c.this.take() {
2954 c.this = Some(transform_recursive(this, transform_fn)?);
2955 }
2956 if let Some(filter) = c.filter.take() {
2957 c.filter = Some(transform_recursive(filter, transform_fn)?);
2958 }
2959 Expression::Count(c)
2960 }
2961
2962 Expression::PipeOperator(mut pipe) => {
2963 pipe.this = transform_recursive(pipe.this, transform_fn)?;
2964 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
2965 Expression::PipeOperator(pipe)
2966 }
2967
2968 // ArrayExcept/ArrayContains/ArrayDistinct: recurse into children
2969 Expression::ArrayExcept(mut f) => {
2970 f.this = transform_recursive(f.this, transform_fn)?;
2971 f.expression = transform_recursive(f.expression, transform_fn)?;
2972 Expression::ArrayExcept(f)
2973 }
2974 Expression::ArrayContains(mut f) => {
2975 f.this = transform_recursive(f.this, transform_fn)?;
2976 f.expression = transform_recursive(f.expression, transform_fn)?;
2977 Expression::ArrayContains(f)
2978 }
2979 Expression::ArrayDistinct(mut f) => {
2980 f.this = transform_recursive(f.this, transform_fn)?;
2981 Expression::ArrayDistinct(f)
2982 }
2983 Expression::ArrayPosition(mut f) => {
2984 f.this = transform_recursive(f.this, transform_fn)?;
2985 f.expression = transform_recursive(f.expression, transform_fn)?;
2986 Expression::ArrayPosition(f)
2987 }
2988
2989 // Pass through leaf nodes unchanged
2990 other => other,
2991 };
2992
2993 // Then apply the transform function
2994 transform_fn(expr)
2995}
2996
2997/// Returns the tokenizer config, generator config, and expression transform closure
2998/// for a built-in dialect type. This is the shared implementation used by both
2999/// `Dialect::get()` and custom dialect construction.
3000// ---------------------------------------------------------------------------
3001// Cached dialect configurations
3002// ---------------------------------------------------------------------------
3003
3004/// Pre-computed tokenizer + generator configs for a dialect, cached via `LazyLock`.
3005/// Transform closures are cheap (unit-struct method calls) and created fresh each time.
3006struct CachedDialectConfig {
3007 tokenizer_config: TokenizerConfig,
3008 generator_config: Arc<GeneratorConfig>,
3009}
3010
3011/// Declare a per-dialect `LazyLock<CachedDialectConfig>` static.
3012macro_rules! cached_dialect {
3013 ($static_name:ident, $dialect_struct:expr, $feature:literal) => {
3014 #[cfg(feature = $feature)]
3015 static $static_name: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
3016 let d = $dialect_struct;
3017 CachedDialectConfig {
3018 tokenizer_config: d.tokenizer_config(),
3019 generator_config: Arc::new(d.generator_config()),
3020 }
3021 });
3022 };
3023}
3024
3025static CACHED_GENERIC: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
3026 let d = GenericDialect;
3027 CachedDialectConfig {
3028 tokenizer_config: d.tokenizer_config(),
3029 generator_config: Arc::new(d.generator_config()),
3030 }
3031});
3032
3033cached_dialect!(CACHED_POSTGRESQL, PostgresDialect, "dialect-postgresql");
3034cached_dialect!(CACHED_MYSQL, MySQLDialect, "dialect-mysql");
3035cached_dialect!(CACHED_BIGQUERY, BigQueryDialect, "dialect-bigquery");
3036cached_dialect!(CACHED_SNOWFLAKE, SnowflakeDialect, "dialect-snowflake");
3037cached_dialect!(CACHED_DUCKDB, DuckDBDialect, "dialect-duckdb");
3038cached_dialect!(CACHED_TSQL, TSQLDialect, "dialect-tsql");
3039cached_dialect!(CACHED_ORACLE, OracleDialect, "dialect-oracle");
3040cached_dialect!(CACHED_HIVE, HiveDialect, "dialect-hive");
3041cached_dialect!(CACHED_SPARK, SparkDialect, "dialect-spark");
3042cached_dialect!(CACHED_SQLITE, SQLiteDialect, "dialect-sqlite");
3043cached_dialect!(CACHED_PRESTO, PrestoDialect, "dialect-presto");
3044cached_dialect!(CACHED_TRINO, TrinoDialect, "dialect-trino");
3045cached_dialect!(CACHED_REDSHIFT, RedshiftDialect, "dialect-redshift");
3046cached_dialect!(CACHED_CLICKHOUSE, ClickHouseDialect, "dialect-clickhouse");
3047cached_dialect!(CACHED_DATABRICKS, DatabricksDialect, "dialect-databricks");
3048cached_dialect!(CACHED_ATHENA, AthenaDialect, "dialect-athena");
3049cached_dialect!(CACHED_TERADATA, TeradataDialect, "dialect-teradata");
3050cached_dialect!(CACHED_DORIS, DorisDialect, "dialect-doris");
3051cached_dialect!(CACHED_STARROCKS, StarRocksDialect, "dialect-starrocks");
3052cached_dialect!(
3053 CACHED_MATERIALIZE,
3054 MaterializeDialect,
3055 "dialect-materialize"
3056);
3057cached_dialect!(CACHED_RISINGWAVE, RisingWaveDialect, "dialect-risingwave");
3058cached_dialect!(
3059 CACHED_SINGLESTORE,
3060 SingleStoreDialect,
3061 "dialect-singlestore"
3062);
3063cached_dialect!(
3064 CACHED_COCKROACHDB,
3065 CockroachDBDialect,
3066 "dialect-cockroachdb"
3067);
3068cached_dialect!(CACHED_TIDB, TiDBDialect, "dialect-tidb");
3069cached_dialect!(CACHED_DRUID, DruidDialect, "dialect-druid");
3070cached_dialect!(CACHED_SOLR, SolrDialect, "dialect-solr");
3071cached_dialect!(CACHED_TABLEAU, TableauDialect, "dialect-tableau");
3072cached_dialect!(CACHED_DUNE, DuneDialect, "dialect-dune");
3073cached_dialect!(CACHED_FABRIC, FabricDialect, "dialect-fabric");
3074cached_dialect!(CACHED_DRILL, DrillDialect, "dialect-drill");
3075cached_dialect!(CACHED_DREMIO, DremioDialect, "dialect-dremio");
3076cached_dialect!(CACHED_EXASOL, ExasolDialect, "dialect-exasol");
3077cached_dialect!(CACHED_DATAFUSION, DataFusionDialect, "dialect-datafusion");
3078
3079fn configs_for_dialect_type(
3080 dt: DialectType,
3081) -> (
3082 TokenizerConfig,
3083 Arc<GeneratorConfig>,
3084 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
3085) {
3086 /// Clone configs from a cached static and pair with a fresh transform closure.
3087 macro_rules! from_cache {
3088 ($cache:expr, $dialect_struct:expr) => {{
3089 let c = &*$cache;
3090 (
3091 c.tokenizer_config.clone(),
3092 c.generator_config.clone(),
3093 Box::new(move |e| $dialect_struct.transform_expr(e)),
3094 )
3095 }};
3096 }
3097 match dt {
3098 #[cfg(feature = "dialect-postgresql")]
3099 DialectType::PostgreSQL => from_cache!(CACHED_POSTGRESQL, PostgresDialect),
3100 #[cfg(feature = "dialect-mysql")]
3101 DialectType::MySQL => from_cache!(CACHED_MYSQL, MySQLDialect),
3102 #[cfg(feature = "dialect-bigquery")]
3103 DialectType::BigQuery => from_cache!(CACHED_BIGQUERY, BigQueryDialect),
3104 #[cfg(feature = "dialect-snowflake")]
3105 DialectType::Snowflake => from_cache!(CACHED_SNOWFLAKE, SnowflakeDialect),
3106 #[cfg(feature = "dialect-duckdb")]
3107 DialectType::DuckDB => from_cache!(CACHED_DUCKDB, DuckDBDialect),
3108 #[cfg(feature = "dialect-tsql")]
3109 DialectType::TSQL => from_cache!(CACHED_TSQL, TSQLDialect),
3110 #[cfg(feature = "dialect-oracle")]
3111 DialectType::Oracle => from_cache!(CACHED_ORACLE, OracleDialect),
3112 #[cfg(feature = "dialect-hive")]
3113 DialectType::Hive => from_cache!(CACHED_HIVE, HiveDialect),
3114 #[cfg(feature = "dialect-spark")]
3115 DialectType::Spark => from_cache!(CACHED_SPARK, SparkDialect),
3116 #[cfg(feature = "dialect-sqlite")]
3117 DialectType::SQLite => from_cache!(CACHED_SQLITE, SQLiteDialect),
3118 #[cfg(feature = "dialect-presto")]
3119 DialectType::Presto => from_cache!(CACHED_PRESTO, PrestoDialect),
3120 #[cfg(feature = "dialect-trino")]
3121 DialectType::Trino => from_cache!(CACHED_TRINO, TrinoDialect),
3122 #[cfg(feature = "dialect-redshift")]
3123 DialectType::Redshift => from_cache!(CACHED_REDSHIFT, RedshiftDialect),
3124 #[cfg(feature = "dialect-clickhouse")]
3125 DialectType::ClickHouse => from_cache!(CACHED_CLICKHOUSE, ClickHouseDialect),
3126 #[cfg(feature = "dialect-databricks")]
3127 DialectType::Databricks => from_cache!(CACHED_DATABRICKS, DatabricksDialect),
3128 #[cfg(feature = "dialect-athena")]
3129 DialectType::Athena => from_cache!(CACHED_ATHENA, AthenaDialect),
3130 #[cfg(feature = "dialect-teradata")]
3131 DialectType::Teradata => from_cache!(CACHED_TERADATA, TeradataDialect),
3132 #[cfg(feature = "dialect-doris")]
3133 DialectType::Doris => from_cache!(CACHED_DORIS, DorisDialect),
3134 #[cfg(feature = "dialect-starrocks")]
3135 DialectType::StarRocks => from_cache!(CACHED_STARROCKS, StarRocksDialect),
3136 #[cfg(feature = "dialect-materialize")]
3137 DialectType::Materialize => from_cache!(CACHED_MATERIALIZE, MaterializeDialect),
3138 #[cfg(feature = "dialect-risingwave")]
3139 DialectType::RisingWave => from_cache!(CACHED_RISINGWAVE, RisingWaveDialect),
3140 #[cfg(feature = "dialect-singlestore")]
3141 DialectType::SingleStore => from_cache!(CACHED_SINGLESTORE, SingleStoreDialect),
3142 #[cfg(feature = "dialect-cockroachdb")]
3143 DialectType::CockroachDB => from_cache!(CACHED_COCKROACHDB, CockroachDBDialect),
3144 #[cfg(feature = "dialect-tidb")]
3145 DialectType::TiDB => from_cache!(CACHED_TIDB, TiDBDialect),
3146 #[cfg(feature = "dialect-druid")]
3147 DialectType::Druid => from_cache!(CACHED_DRUID, DruidDialect),
3148 #[cfg(feature = "dialect-solr")]
3149 DialectType::Solr => from_cache!(CACHED_SOLR, SolrDialect),
3150 #[cfg(feature = "dialect-tableau")]
3151 DialectType::Tableau => from_cache!(CACHED_TABLEAU, TableauDialect),
3152 #[cfg(feature = "dialect-dune")]
3153 DialectType::Dune => from_cache!(CACHED_DUNE, DuneDialect),
3154 #[cfg(feature = "dialect-fabric")]
3155 DialectType::Fabric => from_cache!(CACHED_FABRIC, FabricDialect),
3156 #[cfg(feature = "dialect-drill")]
3157 DialectType::Drill => from_cache!(CACHED_DRILL, DrillDialect),
3158 #[cfg(feature = "dialect-dremio")]
3159 DialectType::Dremio => from_cache!(CACHED_DREMIO, DremioDialect),
3160 #[cfg(feature = "dialect-exasol")]
3161 DialectType::Exasol => from_cache!(CACHED_EXASOL, ExasolDialect),
3162 #[cfg(feature = "dialect-datafusion")]
3163 DialectType::DataFusion => from_cache!(CACHED_DATAFUSION, DataFusionDialect),
3164 _ => from_cache!(CACHED_GENERIC, GenericDialect),
3165 }
3166}
3167
3168// ---------------------------------------------------------------------------
3169// Custom dialect registry
3170// ---------------------------------------------------------------------------
3171
3172static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
3173 LazyLock::new(|| RwLock::new(HashMap::new()));
3174
3175struct CustomDialectConfig {
3176 name: String,
3177 base_dialect: DialectType,
3178 tokenizer_config: TokenizerConfig,
3179 generator_config: GeneratorConfig,
3180 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3181 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3182}
3183
3184/// Fluent builder for creating and registering custom SQL dialects.
3185///
3186/// A custom dialect is based on an existing built-in dialect and allows selective
3187/// overrides of tokenizer configuration, generator configuration, and expression
3188/// transforms.
3189///
3190/// # Example
3191///
3192/// ```rust,ignore
3193/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
3194/// use polyglot_sql::generator::NormalizeFunctions;
3195///
3196/// CustomDialectBuilder::new("my_postgres")
3197/// .based_on(DialectType::PostgreSQL)
3198/// .generator_config_modifier(|gc| {
3199/// gc.normalize_functions = NormalizeFunctions::Lower;
3200/// })
3201/// .register()
3202/// .unwrap();
3203///
3204/// let d = Dialect::get_by_name("my_postgres").unwrap();
3205/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
3206/// let sql = d.generate(&exprs[0]).unwrap();
3207/// assert_eq!(sql, "select count(*)");
3208///
3209/// polyglot_sql::unregister_custom_dialect("my_postgres");
3210/// ```
3211pub struct CustomDialectBuilder {
3212 name: String,
3213 base_dialect: DialectType,
3214 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
3215 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
3216 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3217 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3218}
3219
3220impl CustomDialectBuilder {
3221 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
3222 pub fn new(name: impl Into<String>) -> Self {
3223 Self {
3224 name: name.into(),
3225 base_dialect: DialectType::Generic,
3226 tokenizer_modifier: None,
3227 generator_modifier: None,
3228 transform: None,
3229 preprocess: None,
3230 }
3231 }
3232
3233 /// Set the base built-in dialect to inherit configuration from.
3234 pub fn based_on(mut self, dialect: DialectType) -> Self {
3235 self.base_dialect = dialect;
3236 self
3237 }
3238
3239 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
3240 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
3241 where
3242 F: FnOnce(&mut TokenizerConfig) + 'static,
3243 {
3244 self.tokenizer_modifier = Some(Box::new(f));
3245 self
3246 }
3247
3248 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
3249 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
3250 where
3251 F: FnOnce(&mut GeneratorConfig) + 'static,
3252 {
3253 self.generator_modifier = Some(Box::new(f));
3254 self
3255 }
3256
3257 /// Set a custom per-node expression transform function.
3258 ///
3259 /// This replaces the base dialect's transform. It is called on every expression
3260 /// node during the recursive transform pass.
3261 pub fn transform_fn<F>(mut self, f: F) -> Self
3262 where
3263 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
3264 {
3265 self.transform = Some(Arc::new(f));
3266 self
3267 }
3268
3269 /// Set a custom whole-tree preprocessing function.
3270 ///
3271 /// This replaces the base dialect's built-in preprocessing. It is called once
3272 /// on the entire expression tree before the recursive per-node transform.
3273 pub fn preprocess_fn<F>(mut self, f: F) -> Self
3274 where
3275 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
3276 {
3277 self.preprocess = Some(Arc::new(f));
3278 self
3279 }
3280
3281 /// Build the custom dialect configuration and register it in the global registry.
3282 ///
3283 /// Returns an error if:
3284 /// - The name collides with a built-in dialect name
3285 /// - A custom dialect with the same name is already registered
3286 pub fn register(self) -> Result<()> {
3287 // Reject names that collide with built-in dialects
3288 if DialectType::from_str(&self.name).is_ok() {
3289 return Err(crate::error::Error::parse(
3290 format!(
3291 "Cannot register custom dialect '{}': name collides with built-in dialect",
3292 self.name
3293 ),
3294 0,
3295 0,
3296 0,
3297 0,
3298 ));
3299 }
3300
3301 // Get base configs
3302 let (mut tok_config, arc_gen_config, _base_transform) =
3303 configs_for_dialect_type(self.base_dialect);
3304 let mut gen_config = (*arc_gen_config).clone();
3305
3306 // Apply modifiers
3307 if let Some(tok_mod) = self.tokenizer_modifier {
3308 tok_mod(&mut tok_config);
3309 }
3310 if let Some(gen_mod) = self.generator_modifier {
3311 gen_mod(&mut gen_config);
3312 }
3313
3314 let config = CustomDialectConfig {
3315 name: self.name.clone(),
3316 base_dialect: self.base_dialect,
3317 tokenizer_config: tok_config,
3318 generator_config: gen_config,
3319 transform: self.transform,
3320 preprocess: self.preprocess,
3321 };
3322
3323 register_custom_dialect(config)
3324 }
3325}
3326
3327use std::str::FromStr;
3328
3329fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
3330 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
3331 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
3332 })?;
3333
3334 if registry.contains_key(&config.name) {
3335 return Err(crate::error::Error::parse(
3336 format!("Custom dialect '{}' is already registered", config.name),
3337 0,
3338 0,
3339 0,
3340 0,
3341 ));
3342 }
3343
3344 registry.insert(config.name.clone(), Arc::new(config));
3345 Ok(())
3346}
3347
3348/// Remove a custom dialect from the global registry.
3349///
3350/// Returns `true` if a dialect with that name was found and removed,
3351/// `false` if no such custom dialect existed.
3352pub fn unregister_custom_dialect(name: &str) -> bool {
3353 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
3354 registry.remove(name).is_some()
3355 } else {
3356 false
3357 }
3358}
3359
3360fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
3361 CUSTOM_DIALECT_REGISTRY
3362 .read()
3363 .ok()
3364 .and_then(|registry| registry.get(name).cloned())
3365}
3366
3367/// Main entry point for dialect-specific SQL operations.
3368///
3369/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
3370/// transformer for a specific SQL database engine. It is the high-level API through
3371/// which callers parse, generate, transform, and transpile SQL.
3372///
3373/// # Usage
3374///
3375/// ```rust,ignore
3376/// use polyglot_sql::dialects::{Dialect, DialectType};
3377///
3378/// // Parse PostgreSQL SQL into an AST
3379/// let pg = Dialect::get(DialectType::PostgreSQL);
3380/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
3381///
3382/// // Transpile from PostgreSQL to BigQuery
3383/// let results = pg.transpile("SELECT NOW()", DialectType::BigQuery)?;
3384/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
3385/// ```
3386///
3387/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
3388/// The struct is `Send + Sync` safe so it can be shared across threads.
3389pub struct Dialect {
3390 dialect_type: DialectType,
3391 tokenizer: Tokenizer,
3392 generator_config: Arc<GeneratorConfig>,
3393 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
3394 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
3395 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
3396 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
3397 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3398}
3399
3400/// Options for [`Dialect::transpile_with`].
3401///
3402/// Use [`TranspileOptions::default`] for defaults, then tweak the fields you need.
3403/// The struct is marked `#[non_exhaustive]` so new fields can be added without
3404/// breaking the API.
3405///
3406/// The struct derives `Serialize`/`Deserialize` using camelCase field names so
3407/// it can be round-tripped over JSON bridges (C FFI, WASM) without mapping.
3408#[derive(Debug, Clone, Default, Serialize, Deserialize)]
3409#[serde(rename_all = "camelCase", default)]
3410#[non_exhaustive]
3411pub struct TranspileOptions {
3412 /// Whether to pretty-print the output SQL.
3413 pub pretty: bool,
3414}
3415
3416impl TranspileOptions {
3417 /// Construct options with pretty-printing enabled.
3418 pub fn pretty() -> Self {
3419 Self { pretty: true }
3420 }
3421}
3422
3423/// A value that can be used as the target dialect in [`Dialect::transpile`] /
3424/// [`Dialect::transpile_with`].
3425///
3426/// Implemented for [`DialectType`] (built-in dialect enum) and `&Dialect` (any
3427/// dialect handle, including custom ones). End users do not normally need to
3428/// implement this trait themselves.
3429pub trait TranspileTarget {
3430 /// Invoke `f` with a reference to the resolved target dialect.
3431 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R;
3432}
3433
3434impl TranspileTarget for DialectType {
3435 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
3436 f(&Dialect::get(self))
3437 }
3438}
3439
3440impl TranspileTarget for &Dialect {
3441 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
3442 f(self)
3443 }
3444}
3445
3446impl Dialect {
3447 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
3448 ///
3449 /// This is the primary constructor. It initializes the tokenizer, generator config,
3450 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
3451 /// For hybrid dialects like Athena, it also sets up expression-specific generator
3452 /// config routing.
3453 pub fn get(dialect_type: DialectType) -> Self {
3454 let (tokenizer_config, generator_config, transformer) =
3455 configs_for_dialect_type(dialect_type);
3456
3457 // Set up expression-specific generator config for hybrid dialects
3458 let generator_config_for_expr: Option<
3459 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
3460 > = match dialect_type {
3461 #[cfg(feature = "dialect-athena")]
3462 DialectType::Athena => Some(Box::new(|expr| {
3463 AthenaDialect.generator_config_for_expr(expr)
3464 })),
3465 _ => None,
3466 };
3467
3468 Self {
3469 dialect_type,
3470 tokenizer: Tokenizer::new(tokenizer_config),
3471 generator_config,
3472 transformer,
3473 generator_config_for_expr,
3474 custom_preprocess: None,
3475 }
3476 }
3477
3478 /// Look up a dialect by string name.
3479 ///
3480 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
3481 /// falls back to the custom dialect registry. Returns `None` if no dialect
3482 /// with the given name exists.
3483 pub fn get_by_name(name: &str) -> Option<Self> {
3484 // Try built-in first
3485 if let Ok(dt) = DialectType::from_str(name) {
3486 return Some(Self::get(dt));
3487 }
3488
3489 // Try custom registry
3490 let config = get_custom_dialect_config(name)?;
3491 Some(Self::from_custom_config(&config))
3492 }
3493
3494 /// Construct a `Dialect` from a custom dialect configuration.
3495 fn from_custom_config(config: &CustomDialectConfig) -> Self {
3496 // Build the transformer: use custom if provided, else use base dialect's
3497 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
3498 if let Some(ref custom_transform) = config.transform {
3499 let t = Arc::clone(custom_transform);
3500 Box::new(move |e| t(e))
3501 } else {
3502 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
3503 base_transform
3504 };
3505
3506 // Build the custom preprocess: use custom if provided
3507 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
3508 config.preprocess.as_ref().map(|p| {
3509 let p = Arc::clone(p);
3510 Box::new(move |e: Expression| p(e))
3511 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
3512 });
3513
3514 Self {
3515 dialect_type: config.base_dialect,
3516 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
3517 generator_config: Arc::new(config.generator_config.clone()),
3518 transformer,
3519 generator_config_for_expr: None,
3520 custom_preprocess,
3521 }
3522 }
3523
3524 /// Get the dialect type
3525 pub fn dialect_type(&self) -> DialectType {
3526 self.dialect_type
3527 }
3528
3529 /// Get the generator configuration
3530 pub fn generator_config(&self) -> &GeneratorConfig {
3531 &self.generator_config
3532 }
3533
3534 /// Parses a SQL string into a list of [`Expression`] AST nodes.
3535 ///
3536 /// The input may contain multiple semicolon-separated statements; each one
3537 /// produces a separate element in the returned vector. Tokenization uses
3538 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
3539 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
3540 let tokens = self.tokenizer.tokenize(sql)?;
3541 let config = crate::parser::ParserConfig {
3542 dialect: Some(self.dialect_type),
3543 ..Default::default()
3544 };
3545 let mut parser = Parser::with_source(tokens, config, sql.to_string());
3546 parser.parse()
3547 }
3548
3549 /// Tokenize SQL using this dialect's tokenizer configuration.
3550 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
3551 self.tokenizer.tokenize(sql)
3552 }
3553
3554 /// Get the generator config for a specific expression (supports hybrid dialects).
3555 /// Returns an owned `GeneratorConfig` suitable for mutation before generation.
3556 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
3557 if let Some(ref config_fn) = self.generator_config_for_expr {
3558 config_fn(expr)
3559 } else {
3560 (*self.generator_config).clone()
3561 }
3562 }
3563
3564 /// Generates a SQL string from an [`Expression`] AST node.
3565 ///
3566 /// The output uses this dialect's generator configuration for identifier quoting,
3567 /// keyword casing, function name normalization, and syntax style. The result is
3568 /// a single-line (non-pretty) SQL string.
3569 pub fn generate(&self, expr: &Expression) -> Result<String> {
3570 // Fast path: when no per-expression config override, share the Arc cheaply.
3571 if self.generator_config_for_expr.is_none() {
3572 let mut generator = Generator::with_arc_config(self.generator_config.clone());
3573 return generator.generate(expr);
3574 }
3575 let config = self.get_config_for_expr(expr);
3576 let mut generator = Generator::with_config(config);
3577 generator.generate(expr)
3578 }
3579
3580 /// Generate SQL from an expression with pretty printing enabled
3581 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
3582 let mut config = self.get_config_for_expr(expr);
3583 config.pretty = true;
3584 let mut generator = Generator::with_config(config);
3585 generator.generate(expr)
3586 }
3587
3588 /// Generate SQL from an expression with source dialect info (for transpilation)
3589 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
3590 let mut config = self.get_config_for_expr(expr);
3591 config.source_dialect = Some(source);
3592 let mut generator = Generator::with_config(config);
3593 generator.generate(expr)
3594 }
3595
3596 /// Generate SQL from an expression with pretty printing and source dialect info
3597 pub fn generate_pretty_with_source(
3598 &self,
3599 expr: &Expression,
3600 source: DialectType,
3601 ) -> Result<String> {
3602 let mut config = self.get_config_for_expr(expr);
3603 config.pretty = true;
3604 config.source_dialect = Some(source);
3605 let mut generator = Generator::with_config(config);
3606 generator.generate(expr)
3607 }
3608
3609 /// Generate SQL from an expression with forced identifier quoting (identify=True)
3610 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
3611 let mut config = self.get_config_for_expr(expr);
3612 config.always_quote_identifiers = true;
3613 let mut generator = Generator::with_config(config);
3614 generator.generate(expr)
3615 }
3616
3617 /// Generate SQL from an expression with pretty printing and forced identifier quoting
3618 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
3619 let mut config = (*self.generator_config).clone();
3620 config.pretty = true;
3621 config.always_quote_identifiers = true;
3622 let mut generator = Generator::with_config(config);
3623 generator.generate(expr)
3624 }
3625
3626 /// Generate SQL from an expression with caller-specified config overrides
3627 pub fn generate_with_overrides(
3628 &self,
3629 expr: &Expression,
3630 overrides: impl FnOnce(&mut GeneratorConfig),
3631 ) -> Result<String> {
3632 let mut config = self.get_config_for_expr(expr);
3633 overrides(&mut config);
3634 let mut generator = Generator::with_config(config);
3635 generator.generate(expr)
3636 }
3637
3638 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
3639 ///
3640 /// The transformation proceeds in two phases:
3641 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
3642 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
3643 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
3644 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
3645 ///
3646 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
3647 /// and for identity transforms (normalizing SQL within the same dialect).
3648 pub fn transform(&self, expr: Expression) -> Result<Expression> {
3649 // Apply preprocessing transforms based on dialect
3650 let preprocessed = self.preprocess(expr)?;
3651 // Then apply recursive transformation
3652 transform_recursive(preprocessed, &self.transformer)
3653 }
3654
3655 /// Apply dialect-specific preprocessing transforms
3656 fn preprocess(&self, expr: Expression) -> Result<Expression> {
3657 // If a custom preprocess function is set, use it instead of the built-in logic
3658 if let Some(ref custom_preprocess) = self.custom_preprocess {
3659 return custom_preprocess(expr);
3660 }
3661
3662 #[cfg(any(
3663 feature = "dialect-mysql",
3664 feature = "dialect-postgresql",
3665 feature = "dialect-bigquery",
3666 feature = "dialect-snowflake",
3667 feature = "dialect-tsql",
3668 feature = "dialect-spark",
3669 feature = "dialect-databricks",
3670 feature = "dialect-hive",
3671 feature = "dialect-sqlite",
3672 feature = "dialect-trino",
3673 feature = "dialect-presto",
3674 feature = "dialect-duckdb",
3675 feature = "dialect-redshift",
3676 feature = "dialect-starrocks",
3677 feature = "dialect-oracle",
3678 feature = "dialect-clickhouse",
3679 ))]
3680 use crate::transforms;
3681
3682 match self.dialect_type {
3683 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
3684 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
3685 #[cfg(feature = "dialect-mysql")]
3686 DialectType::MySQL => {
3687 let expr = transforms::eliminate_qualify(expr)?;
3688 let expr = transforms::eliminate_full_outer_join(expr)?;
3689 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3690 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3691 Ok(expr)
3692 }
3693 // PostgreSQL doesn't support QUALIFY
3694 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
3695 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
3696 #[cfg(feature = "dialect-postgresql")]
3697 DialectType::PostgreSQL => {
3698 let expr = transforms::eliminate_qualify(expr)?;
3699 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3700 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
3701 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
3702 // Only normalize when sqlglot would fully parse (no body) —
3703 // sqlglot falls back to Command for complex function bodies,
3704 // preserving the original text including TO.
3705 let expr = if let Expression::CreateFunction(mut cf) = expr {
3706 if cf.body.is_none() {
3707 for opt in &mut cf.set_options {
3708 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
3709 &mut opt.value
3710 {
3711 *use_to = false;
3712 }
3713 }
3714 }
3715 Expression::CreateFunction(cf)
3716 } else {
3717 expr
3718 };
3719 Ok(expr)
3720 }
3721 // BigQuery doesn't support DISTINCT ON or CTE column aliases
3722 #[cfg(feature = "dialect-bigquery")]
3723 DialectType::BigQuery => {
3724 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3725 let expr = transforms::pushdown_cte_column_names(expr)?;
3726 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
3727 Ok(expr)
3728 }
3729 // Snowflake
3730 #[cfg(feature = "dialect-snowflake")]
3731 DialectType::Snowflake => {
3732 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3733 let expr = transforms::eliminate_window_clause(expr)?;
3734 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
3735 Ok(expr)
3736 }
3737 // TSQL doesn't support QUALIFY
3738 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
3739 // TSQL doesn't support CTEs in subqueries (hoist to top level)
3740 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
3741 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
3742 #[cfg(feature = "dialect-tsql")]
3743 DialectType::TSQL => {
3744 let expr = transforms::eliminate_qualify(expr)?;
3745 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3746 let expr = transforms::ensure_bools(expr)?;
3747 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3748 let expr = transforms::move_ctes_to_top_level(expr)?;
3749 let expr = transforms::qualify_derived_table_outputs(expr)?;
3750 Ok(expr)
3751 }
3752 // Spark doesn't support QUALIFY (but Databricks does)
3753 // Spark doesn't support CTEs in subqueries (hoist to top level)
3754 #[cfg(feature = "dialect-spark")]
3755 DialectType::Spark => {
3756 let expr = transforms::eliminate_qualify(expr)?;
3757 let expr = transforms::add_auto_table_alias(expr)?;
3758 let expr = transforms::simplify_nested_paren_values(expr)?;
3759 let expr = transforms::move_ctes_to_top_level(expr)?;
3760 Ok(expr)
3761 }
3762 // Databricks supports QUALIFY natively
3763 // Databricks doesn't support CTEs in subqueries (hoist to top level)
3764 #[cfg(feature = "dialect-databricks")]
3765 DialectType::Databricks => {
3766 let expr = transforms::add_auto_table_alias(expr)?;
3767 let expr = transforms::simplify_nested_paren_values(expr)?;
3768 let expr = transforms::move_ctes_to_top_level(expr)?;
3769 Ok(expr)
3770 }
3771 // Hive doesn't support QUALIFY or CTEs in subqueries
3772 #[cfg(feature = "dialect-hive")]
3773 DialectType::Hive => {
3774 let expr = transforms::eliminate_qualify(expr)?;
3775 let expr = transforms::move_ctes_to_top_level(expr)?;
3776 Ok(expr)
3777 }
3778 // SQLite doesn't support QUALIFY
3779 #[cfg(feature = "dialect-sqlite")]
3780 DialectType::SQLite => {
3781 let expr = transforms::eliminate_qualify(expr)?;
3782 Ok(expr)
3783 }
3784 // Trino doesn't support QUALIFY
3785 #[cfg(feature = "dialect-trino")]
3786 DialectType::Trino => {
3787 let expr = transforms::eliminate_qualify(expr)?;
3788 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
3789 Ok(expr)
3790 }
3791 // Presto doesn't support QUALIFY or WINDOW clause
3792 #[cfg(feature = "dialect-presto")]
3793 DialectType::Presto => {
3794 let expr = transforms::eliminate_qualify(expr)?;
3795 let expr = transforms::eliminate_window_clause(expr)?;
3796 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
3797 Ok(expr)
3798 }
3799 // DuckDB supports QUALIFY - no elimination needed
3800 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
3801 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
3802 #[cfg(feature = "dialect-duckdb")]
3803 DialectType::DuckDB => {
3804 let expr = transforms::expand_posexplode_duckdb(expr)?;
3805 let expr = transforms::expand_like_any(expr)?;
3806 Ok(expr)
3807 }
3808 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
3809 #[cfg(feature = "dialect-redshift")]
3810 DialectType::Redshift => {
3811 let expr = transforms::eliminate_qualify(expr)?;
3812 let expr = transforms::eliminate_window_clause(expr)?;
3813 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3814 Ok(expr)
3815 }
3816 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
3817 #[cfg(feature = "dialect-starrocks")]
3818 DialectType::StarRocks => {
3819 let expr = transforms::eliminate_qualify(expr)?;
3820 let expr = transforms::expand_between_in_delete(expr)?;
3821 let expr = transforms::eliminate_distinct_on_for_dialect(
3822 expr,
3823 Some(DialectType::StarRocks),
3824 Some(DialectType::StarRocks),
3825 )?;
3826 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3827 Ok(expr)
3828 }
3829 // DataFusion supports QUALIFY and semi/anti joins natively
3830 #[cfg(feature = "dialect-datafusion")]
3831 DialectType::DataFusion => Ok(expr),
3832 // Oracle doesn't support QUALIFY
3833 #[cfg(feature = "dialect-oracle")]
3834 DialectType::Oracle => {
3835 let expr = transforms::eliminate_qualify(expr)?;
3836 Ok(expr)
3837 }
3838 // Drill - no special preprocessing needed
3839 #[cfg(feature = "dialect-drill")]
3840 DialectType::Drill => Ok(expr),
3841 // Teradata - no special preprocessing needed
3842 #[cfg(feature = "dialect-teradata")]
3843 DialectType::Teradata => Ok(expr),
3844 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
3845 #[cfg(feature = "dialect-clickhouse")]
3846 DialectType::ClickHouse => {
3847 let expr = transforms::no_limit_order_by_union(expr)?;
3848 Ok(expr)
3849 }
3850 // Other dialects - no preprocessing
3851 _ => Ok(expr),
3852 }
3853 }
3854
3855 /// Transpile SQL from this dialect to the given target dialect.
3856 ///
3857 /// The target may be specified as either a built-in [`DialectType`] enum variant
3858 /// or as a reference to a [`Dialect`] handle (built-in or custom). Both work:
3859 ///
3860 /// ```rust,ignore
3861 /// let pg = Dialect::get(DialectType::PostgreSQL);
3862 /// pg.transpile("SELECT NOW()", DialectType::BigQuery)?; // enum
3863 /// pg.transpile("SELECT NOW()", &custom_dialect)?; // handle
3864 /// ```
3865 ///
3866 /// For pretty-printing or other options, use [`transpile_with`](Self::transpile_with).
3867 pub fn transpile<T: TranspileTarget>(&self, sql: &str, target: T) -> Result<Vec<String>> {
3868 self.transpile_with(sql, target, TranspileOptions::default())
3869 }
3870
3871 /// Transpile SQL with configurable [`TranspileOptions`] (e.g. pretty-printing).
3872 pub fn transpile_with<T: TranspileTarget>(
3873 &self,
3874 sql: &str,
3875 target: T,
3876 opts: TranspileOptions,
3877 ) -> Result<Vec<String>> {
3878 target.with_dialect(|td| self.transpile_inner(sql, td, opts.pretty))
3879 }
3880
3881 #[cfg(not(feature = "transpile"))]
3882 fn transpile_inner(
3883 &self,
3884 sql: &str,
3885 target_dialect: &Dialect,
3886 pretty: bool,
3887 ) -> Result<Vec<String>> {
3888 let target = target_dialect.dialect_type;
3889 // Without the transpile feature, only same-dialect or to/from generic is supported
3890 if self.dialect_type != target
3891 && self.dialect_type != DialectType::Generic
3892 && target != DialectType::Generic
3893 {
3894 return Err(crate::error::Error::parse(
3895 "Cross-dialect transpilation not available in this build",
3896 0,
3897 0,
3898 0,
3899 0,
3900 ));
3901 }
3902
3903 let expressions = self.parse(sql)?;
3904 let generic_identity =
3905 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
3906
3907 if generic_identity {
3908 return expressions
3909 .into_iter()
3910 .map(|expr| {
3911 if pretty {
3912 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
3913 } else {
3914 target_dialect.generate_with_source(&expr, self.dialect_type)
3915 }
3916 })
3917 .collect();
3918 }
3919
3920 expressions
3921 .into_iter()
3922 .map(|expr| {
3923 let transformed = target_dialect.transform(expr)?;
3924 if pretty {
3925 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
3926 } else {
3927 target_dialect.generate_with_source(&transformed, self.dialect_type)
3928 }
3929 })
3930 .collect()
3931 }
3932
3933 #[cfg(feature = "transpile")]
3934 fn transpile_inner(
3935 &self,
3936 sql: &str,
3937 target_dialect: &Dialect,
3938 pretty: bool,
3939 ) -> Result<Vec<String>> {
3940 let target = target_dialect.dialect_type;
3941 if matches!(self.dialect_type, DialectType::PostgreSQL)
3942 && matches!(target, DialectType::SQLite)
3943 {
3944 self.reject_pgvector_distance_operators_for_sqlite(sql)?;
3945 }
3946 let expressions = self.parse(sql)?;
3947 let generic_identity =
3948 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
3949
3950 if generic_identity {
3951 return expressions
3952 .into_iter()
3953 .map(|expr| {
3954 if pretty {
3955 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
3956 } else {
3957 target_dialect.generate_with_source(&expr, self.dialect_type)
3958 }
3959 })
3960 .collect();
3961 }
3962
3963 expressions
3964 .into_iter()
3965 .map(|expr| {
3966 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
3967 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
3968 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
3969 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
3970 use crate::expressions::DataType as DT;
3971 transform_recursive(expr, &|e| match e {
3972 Expression::DataType(DT::VarChar { .. }) => {
3973 Ok(Expression::DataType(DT::Text))
3974 }
3975 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
3976 _ => Ok(e),
3977 })?
3978 } else {
3979 expr
3980 };
3981
3982 // When source and target differ, first normalize the source dialect's
3983 // AST constructs to standard SQL, so that the target dialect can handle them.
3984 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
3985 let normalized =
3986 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
3987 self.transform(expr)?
3988 } else {
3989 expr
3990 };
3991
3992 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
3993 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
3994 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
3995 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
3996 let normalized =
3997 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
3998 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
3999 {
4000 transform_recursive(normalized, &|e| {
4001 if let Expression::Function(ref f) = e {
4002 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
4003 // Check if first arg is JSON_QUERY and second is JSON_VALUE
4004 if let (
4005 Expression::Function(ref jq),
4006 Expression::Function(ref jv),
4007 ) = (&f.args[0], &f.args[1])
4008 {
4009 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
4010 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
4011 {
4012 // Unwrap: return just JSON_QUERY(...)
4013 return Ok(f.args[0].clone());
4014 }
4015 }
4016 }
4017 }
4018 Ok(e)
4019 })?
4020 } else {
4021 normalized
4022 };
4023
4024 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
4025 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
4026 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
4027 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
4028 && !matches!(target, DialectType::Snowflake)
4029 {
4030 transform_recursive(normalized, &|e| {
4031 if let Expression::Function(ref f) = e {
4032 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
4033 return Ok(Expression::Localtime(Box::new(
4034 crate::expressions::Localtime { this: None },
4035 )));
4036 }
4037 }
4038 Ok(e)
4039 })?
4040 } else {
4041 normalized
4042 };
4043
4044 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
4045 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
4046 // transform. DuckDB requires the count argument to be BIGINT.
4047 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
4048 && matches!(target, DialectType::DuckDB)
4049 {
4050 transform_recursive(normalized, &|e| {
4051 if let Expression::Function(ref f) = e {
4052 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
4053 // Check if first arg is space string literal
4054 if let Expression::Literal(ref lit) = f.args[0] {
4055 if let crate::expressions::Literal::String(ref s) = lit.as_ref()
4056 {
4057 if s == " " {
4058 // Wrap second arg in CAST(... AS BIGINT) if not already
4059 if !matches!(f.args[1], Expression::Cast(_)) {
4060 let mut new_args = f.args.clone();
4061 new_args[1] = Expression::Cast(Box::new(
4062 crate::expressions::Cast {
4063 this: new_args[1].clone(),
4064 to: crate::expressions::DataType::BigInt {
4065 length: None,
4066 },
4067 trailing_comments: Vec::new(),
4068 double_colon_syntax: false,
4069 format: None,
4070 default: None,
4071 inferred_type: None,
4072 },
4073 ));
4074 return Ok(Expression::Function(Box::new(
4075 crate::expressions::Function {
4076 name: f.name.clone(),
4077 args: new_args,
4078 distinct: f.distinct,
4079 trailing_comments: f
4080 .trailing_comments
4081 .clone(),
4082 use_bracket_syntax: f.use_bracket_syntax,
4083 no_parens: f.no_parens,
4084 quoted: f.quoted,
4085 span: None,
4086 inferred_type: None,
4087 },
4088 )));
4089 }
4090 }
4091 }
4092 }
4093 }
4094 }
4095 Ok(e)
4096 })?
4097 } else {
4098 normalized
4099 };
4100
4101 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
4102 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
4103 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4104 && !matches!(target, DialectType::BigQuery)
4105 {
4106 crate::transforms::propagate_struct_field_names(normalized)?
4107 } else {
4108 normalized
4109 };
4110
4111 // Snowflake source to DuckDB target: RANDOM()/RANDOM(seed) -> scaled RANDOM()
4112 // Snowflake RANDOM() returns integer in [-2^63, 2^63-1], DuckDB RANDOM() returns float [0, 1)
4113 // Skip RANDOM inside UNIFORM/NORMAL/ZIPF/RANDSTR generator args since those
4114 // functions handle their generator args differently (as float seeds).
4115 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
4116 && matches!(target, DialectType::DuckDB)
4117 {
4118 fn make_scaled_random() -> Expression {
4119 let lower =
4120 Expression::Literal(Box::new(crate::expressions::Literal::Number(
4121 "-9.223372036854776E+18".to_string(),
4122 )));
4123 let upper =
4124 Expression::Literal(Box::new(crate::expressions::Literal::Number(
4125 "9.223372036854776e+18".to_string(),
4126 )));
4127 let random_call = Expression::Random(crate::expressions::Random);
4128 let range_size = Expression::Paren(Box::new(crate::expressions::Paren {
4129 this: Expression::Sub(Box::new(crate::expressions::BinaryOp {
4130 left: upper,
4131 right: lower.clone(),
4132 left_comments: vec![],
4133 operator_comments: vec![],
4134 trailing_comments: vec![],
4135 inferred_type: None,
4136 })),
4137 trailing_comments: vec![],
4138 }));
4139 let scaled = Expression::Mul(Box::new(crate::expressions::BinaryOp {
4140 left: random_call,
4141 right: range_size,
4142 left_comments: vec![],
4143 operator_comments: vec![],
4144 trailing_comments: vec![],
4145 inferred_type: None,
4146 }));
4147 let shifted = Expression::Add(Box::new(crate::expressions::BinaryOp {
4148 left: lower,
4149 right: scaled,
4150 left_comments: vec![],
4151 operator_comments: vec![],
4152 trailing_comments: vec![],
4153 inferred_type: None,
4154 }));
4155 Expression::Cast(Box::new(crate::expressions::Cast {
4156 this: shifted,
4157 to: crate::expressions::DataType::BigInt { length: None },
4158 trailing_comments: vec![],
4159 double_colon_syntax: false,
4160 format: None,
4161 default: None,
4162 inferred_type: None,
4163 }))
4164 }
4165
4166 // Pre-process: protect seeded RANDOM(seed) inside UNIFORM/NORMAL/ZIPF/RANDSTR
4167 // by converting Rand{seed: Some(s)} to Function{name:"RANDOM", args:[s]}.
4168 // This prevents transform_recursive (which is bottom-up) from expanding
4169 // seeded RANDOM into make_scaled_random() and losing the seed value.
4170 // Unseeded RANDOM()/Rand{seed:None} is left as-is so it gets expanded
4171 // and then un-expanded back to Expression::Random by the code below.
4172 let normalized = transform_recursive(normalized, &|e| {
4173 if let Expression::Function(ref f) = e {
4174 let n = f.name.to_ascii_uppercase();
4175 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" || n == "RANDSTR" {
4176 if let Expression::Function(mut f) = e {
4177 for arg in f.args.iter_mut() {
4178 if let Expression::Rand(ref r) = arg {
4179 if r.lower.is_none() && r.upper.is_none() {
4180 if let Some(ref seed) = r.seed {
4181 // Convert Rand{seed: Some(s)} to Function("RANDOM", [s])
4182 // so it won't be expanded by the RANDOM expansion below
4183 *arg = Expression::Function(Box::new(
4184 crate::expressions::Function::new(
4185 "RANDOM".to_string(),
4186 vec![*seed.clone()],
4187 ),
4188 ));
4189 }
4190 }
4191 }
4192 }
4193 return Ok(Expression::Function(f));
4194 }
4195 }
4196 }
4197 Ok(e)
4198 })?;
4199
4200 // transform_recursive processes bottom-up, so RANDOM() (unseeded) inside
4201 // generator functions (UNIFORM, NORMAL, ZIPF) gets expanded before
4202 // we see the parent. We detect this and undo the expansion by replacing
4203 // the expanded pattern back with Expression::Random.
4204 // Seeded RANDOM(seed) was already protected above as Function("RANDOM", [seed]).
4205 // Note: RANDSTR is NOT included here — it needs the expanded form for unseeded
4206 // RANDOM() since the DuckDB handler uses the expanded SQL as-is in the hash.
4207 transform_recursive(normalized, &|e| {
4208 if let Expression::Function(ref f) = e {
4209 let n = f.name.to_ascii_uppercase();
4210 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" {
4211 if let Expression::Function(mut f) = e {
4212 for arg in f.args.iter_mut() {
4213 // Detect expanded RANDOM pattern: CAST(-9.22... + RANDOM() * (...) AS BIGINT)
4214 if let Expression::Cast(ref cast) = arg {
4215 if matches!(
4216 cast.to,
4217 crate::expressions::DataType::BigInt { .. }
4218 ) {
4219 if let Expression::Add(ref add) = cast.this {
4220 if let Expression::Literal(ref lit) = add.left {
4221 if let crate::expressions::Literal::Number(
4222 ref num,
4223 ) = lit.as_ref()
4224 {
4225 if num == "-9.223372036854776E+18" {
4226 *arg = Expression::Random(
4227 crate::expressions::Random,
4228 );
4229 }
4230 }
4231 }
4232 }
4233 }
4234 }
4235 }
4236 return Ok(Expression::Function(f));
4237 }
4238 return Ok(e);
4239 }
4240 }
4241 match e {
4242 Expression::Random(_) => Ok(make_scaled_random()),
4243 // Rand(seed) with no bounds: drop seed and expand
4244 // (DuckDB RANDOM doesn't support seeds)
4245 Expression::Rand(ref r) if r.lower.is_none() && r.upper.is_none() => {
4246 Ok(make_scaled_random())
4247 }
4248 _ => Ok(e),
4249 }
4250 })?
4251 } else {
4252 normalized
4253 };
4254
4255 // Apply cross-dialect semantic normalizations
4256 let normalized =
4257 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
4258
4259 let normalized = if matches!(self.dialect_type, DialectType::PostgreSQL)
4260 && matches!(target, DialectType::SQLite)
4261 {
4262 Self::normalize_postgres_to_sqlite_types(normalized)?
4263 } else {
4264 normalized
4265 };
4266
4267 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
4268 // (SELECT UNNEST(..., max_depth => 2)) subquery
4269 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
4270 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4271 && matches!(target, DialectType::DuckDB)
4272 {
4273 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
4274 } else {
4275 normalized
4276 };
4277
4278 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
4279 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
4280 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4281 && matches!(
4282 target,
4283 DialectType::DuckDB
4284 | DialectType::Presto
4285 | DialectType::Trino
4286 | DialectType::Athena
4287 | DialectType::Spark
4288 | DialectType::Databricks
4289 ) {
4290 crate::transforms::unnest_alias_to_column_alias(normalized)?
4291 } else if matches!(self.dialect_type, DialectType::BigQuery)
4292 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
4293 {
4294 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
4295 // but don't convert alias format (no _t0 wrapper)
4296 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
4297 // For Redshift: strip UNNEST when arg is a column reference path
4298 if matches!(target, DialectType::Redshift) {
4299 crate::transforms::strip_unnest_column_refs(result)?
4300 } else {
4301 result
4302 }
4303 } else {
4304 normalized
4305 };
4306
4307 // For Presto/Trino targets from PostgreSQL/Redshift source:
4308 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
4309 let normalized = if matches!(
4310 self.dialect_type,
4311 DialectType::PostgreSQL | DialectType::Redshift
4312 ) && matches!(
4313 target,
4314 DialectType::Presto | DialectType::Trino | DialectType::Athena
4315 ) {
4316 crate::transforms::wrap_unnest_join_aliases(normalized)?
4317 } else {
4318 normalized
4319 };
4320
4321 // Eliminate DISTINCT ON with target-dialect awareness
4322 // This must happen after source transform (which may produce DISTINCT ON)
4323 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
4324 let normalized = crate::transforms::eliminate_distinct_on_for_dialect(
4325 normalized,
4326 Some(target),
4327 Some(self.dialect_type),
4328 )?;
4329
4330 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
4331 let normalized = if matches!(target, DialectType::Snowflake) {
4332 Self::transform_generate_date_array_snowflake(normalized)?
4333 } else {
4334 normalized
4335 };
4336
4337 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
4338 let normalized = if matches!(
4339 target,
4340 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4341 ) {
4342 crate::transforms::unnest_to_explode_select(normalized)?
4343 } else {
4344 normalized
4345 };
4346
4347 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
4348 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
4349 crate::transforms::no_limit_order_by_union(normalized)?
4350 } else {
4351 normalized
4352 };
4353
4354 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
4355 // Python sqlglot does this in the TSQL generator, but we can't do it there
4356 // because it would break TSQL -> TSQL identity
4357 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
4358 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
4359 {
4360 transform_recursive(normalized, &|e| {
4361 if let Expression::Count(ref c) = e {
4362 // Build COUNT_BIG(...) as an AggregateFunction
4363 let args = if c.star {
4364 vec![Expression::Star(crate::expressions::Star {
4365 table: None,
4366 except: None,
4367 replace: None,
4368 rename: None,
4369 trailing_comments: Vec::new(),
4370 span: None,
4371 })]
4372 } else if let Some(ref this) = c.this {
4373 vec![this.clone()]
4374 } else {
4375 vec![]
4376 };
4377 Ok(Expression::AggregateFunction(Box::new(
4378 crate::expressions::AggregateFunction {
4379 name: "COUNT_BIG".to_string(),
4380 args,
4381 distinct: c.distinct,
4382 filter: c.filter.clone(),
4383 order_by: Vec::new(),
4384 limit: None,
4385 ignore_nulls: None,
4386 inferred_type: None,
4387 },
4388 )))
4389 } else {
4390 Ok(e)
4391 }
4392 })?
4393 } else {
4394 normalized
4395 };
4396
4397 let transformed = target_dialect.transform(normalized)?;
4398
4399 // DuckDB target: when FROM is RANGE(n), replace SEQ's ROW_NUMBER pattern with `range`
4400 let transformed = if matches!(target, DialectType::DuckDB) {
4401 Self::seq_rownum_to_range(transformed)?
4402 } else {
4403 transformed
4404 };
4405
4406 let mut sql = if pretty {
4407 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
4408 } else {
4409 target_dialect.generate_with_source(&transformed, self.dialect_type)?
4410 };
4411
4412 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
4413 if pretty && target == DialectType::Snowflake {
4414 sql = Self::normalize_snowflake_pretty(sql);
4415 }
4416
4417 Ok(sql)
4418 })
4419 .collect()
4420 }
4421}
4422
4423// Transpile-only methods: cross-dialect normalization and helpers
4424#[cfg(feature = "transpile")]
4425impl Dialect {
4426 fn reject_pgvector_distance_operators_for_sqlite(&self, sql: &str) -> Result<()> {
4427 let tokens = self.tokenize(sql)?;
4428 for (i, token) in tokens.iter().enumerate() {
4429 if token.token_type == TokenType::NullsafeEq {
4430 return Err(crate::error::Error::unsupported(
4431 "PostgreSQL pgvector cosine distance operator <=>",
4432 "SQLite",
4433 ));
4434 }
4435 if token.token_type == TokenType::Lt
4436 && tokens
4437 .get(i + 1)
4438 .is_some_and(|token| token.token_type == TokenType::Tilde)
4439 && tokens
4440 .get(i + 2)
4441 .is_some_and(|token| token.token_type == TokenType::Gt)
4442 {
4443 return Err(crate::error::Error::unsupported(
4444 "PostgreSQL pgvector Hamming distance operator <~>",
4445 "SQLite",
4446 ));
4447 }
4448 }
4449 Ok(())
4450 }
4451
4452 fn normalize_postgres_to_sqlite_types(expr: Expression) -> Result<Expression> {
4453 fn sqlite_type(dt: crate::expressions::DataType) -> crate::expressions::DataType {
4454 use crate::expressions::DataType;
4455
4456 match dt {
4457 DataType::Bit { .. } => DataType::Int {
4458 length: None,
4459 integer_spelling: true,
4460 },
4461 DataType::TextWithLength { .. } => DataType::Text,
4462 DataType::VarChar { .. } => DataType::Text,
4463 DataType::Char { .. } => DataType::Text,
4464 DataType::Timestamp { timezone: true, .. } => DataType::Text,
4465 DataType::Custom { name } => {
4466 let base = name
4467 .split_once('(')
4468 .map_or(name.as_str(), |(base, _)| base)
4469 .trim();
4470 if base.eq_ignore_ascii_case("TSVECTOR")
4471 || base.eq_ignore_ascii_case("TIMESTAMPTZ")
4472 || base.eq_ignore_ascii_case("TIMESTAMP WITH TIME ZONE")
4473 || base.eq_ignore_ascii_case("NVARCHAR")
4474 || base.eq_ignore_ascii_case("NCHAR")
4475 {
4476 DataType::Text
4477 } else {
4478 DataType::Custom { name }
4479 }
4480 }
4481 _ => dt,
4482 }
4483 }
4484
4485 transform_recursive(expr, &|e| match e {
4486 Expression::DataType(dt) => Ok(Expression::DataType(sqlite_type(dt))),
4487 Expression::CreateTable(mut ct) => {
4488 for column in &mut ct.columns {
4489 column.data_type = sqlite_type(column.data_type.clone());
4490 }
4491 Ok(Expression::CreateTable(ct))
4492 }
4493 _ => Ok(e),
4494 })
4495 }
4496
4497 /// For DuckDB target: when FROM clause contains RANGE(n), replace
4498 /// `(ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1)` with `range` in select expressions.
4499 /// This handles SEQ1/2/4/8 → RANGE transpilation from Snowflake.
4500 fn seq_rownum_to_range(expr: Expression) -> Result<Expression> {
4501 if let Expression::Select(mut select) = expr {
4502 // Check if FROM contains a RANGE function
4503 let has_range_from = if let Some(ref from) = select.from {
4504 from.expressions.iter().any(|e| {
4505 // Check for direct RANGE(...) or aliased RANGE(...)
4506 match e {
4507 Expression::Function(f) => f.name.eq_ignore_ascii_case("RANGE"),
4508 Expression::Alias(a) => {
4509 matches!(&a.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("RANGE"))
4510 }
4511 _ => false,
4512 }
4513 })
4514 } else {
4515 false
4516 };
4517
4518 if has_range_from {
4519 // Replace the ROW_NUMBER pattern in select expressions
4520 select.expressions = select
4521 .expressions
4522 .into_iter()
4523 .map(|e| Self::replace_rownum_with_range(e))
4524 .collect();
4525 }
4526
4527 Ok(Expression::Select(select))
4528 } else {
4529 Ok(expr)
4530 }
4531 }
4532
4533 /// Replace `(ROW_NUMBER() OVER (...) - 1)` with `range` column reference
4534 fn replace_rownum_with_range(expr: Expression) -> Expression {
4535 match expr {
4536 // Match: (ROW_NUMBER() OVER (...) - 1) % N → range % N
4537 Expression::Mod(op) => {
4538 let new_left = Self::try_replace_rownum_paren(&op.left);
4539 Expression::Mod(Box::new(crate::expressions::BinaryOp {
4540 left: new_left,
4541 right: op.right,
4542 left_comments: op.left_comments,
4543 operator_comments: op.operator_comments,
4544 trailing_comments: op.trailing_comments,
4545 inferred_type: op.inferred_type,
4546 }))
4547 }
4548 // Match: (CASE WHEN (ROW...) % N >= ... THEN ... ELSE ... END)
4549 Expression::Paren(p) => {
4550 let inner = Self::replace_rownum_with_range(p.this);
4551 Expression::Paren(Box::new(crate::expressions::Paren {
4552 this: inner,
4553 trailing_comments: p.trailing_comments,
4554 }))
4555 }
4556 Expression::Case(mut c) => {
4557 // Replace ROW_NUMBER in WHEN conditions and THEN expressions
4558 c.whens = c
4559 .whens
4560 .into_iter()
4561 .map(|(cond, then)| {
4562 (
4563 Self::replace_rownum_with_range(cond),
4564 Self::replace_rownum_with_range(then),
4565 )
4566 })
4567 .collect();
4568 if let Some(else_) = c.else_ {
4569 c.else_ = Some(Self::replace_rownum_with_range(else_));
4570 }
4571 Expression::Case(c)
4572 }
4573 Expression::Gte(op) => Expression::Gte(Box::new(crate::expressions::BinaryOp {
4574 left: Self::replace_rownum_with_range(op.left),
4575 right: op.right,
4576 left_comments: op.left_comments,
4577 operator_comments: op.operator_comments,
4578 trailing_comments: op.trailing_comments,
4579 inferred_type: op.inferred_type,
4580 })),
4581 Expression::Sub(op) => Expression::Sub(Box::new(crate::expressions::BinaryOp {
4582 left: Self::replace_rownum_with_range(op.left),
4583 right: op.right,
4584 left_comments: op.left_comments,
4585 operator_comments: op.operator_comments,
4586 trailing_comments: op.trailing_comments,
4587 inferred_type: op.inferred_type,
4588 })),
4589 Expression::Alias(mut a) => {
4590 a.this = Self::replace_rownum_with_range(a.this);
4591 Expression::Alias(a)
4592 }
4593 other => other,
4594 }
4595 }
4596
4597 /// Check if an expression is `(ROW_NUMBER() OVER (...) - 1)` and replace with `range`
4598 fn try_replace_rownum_paren(expr: &Expression) -> Expression {
4599 if let Expression::Paren(ref p) = expr {
4600 if let Expression::Sub(ref sub) = p.this {
4601 if let Expression::WindowFunction(ref wf) = sub.left {
4602 if let Expression::Function(ref f) = wf.this {
4603 if f.name.eq_ignore_ascii_case("ROW_NUMBER") {
4604 if let Expression::Literal(ref lit) = sub.right {
4605 if let crate::expressions::Literal::Number(ref n) = lit.as_ref() {
4606 if n == "1" {
4607 return Expression::column("range");
4608 }
4609 }
4610 }
4611 }
4612 }
4613 }
4614 }
4615 }
4616 expr.clone()
4617 }
4618
4619 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
4620 /// Converts:
4621 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
4622 /// To:
4623 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
4624 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)) AS _t0(seq, key, path, index, alias, this)
4625 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
4626 use crate::expressions::*;
4627 transform_recursive(expr, &|e| {
4628 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
4629 if let Expression::ArraySize(ref af) = e {
4630 if let Expression::Function(ref f) = af.this {
4631 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
4632 let result = Self::convert_array_size_gda_snowflake(f)?;
4633 return Ok(result);
4634 }
4635 }
4636 }
4637
4638 let Expression::Select(mut sel) = e else {
4639 return Ok(e);
4640 };
4641
4642 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
4643 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
4644 let mut gda_join_idx: Option<usize> = None;
4645
4646 for (idx, join) in sel.joins.iter().enumerate() {
4647 // The join.this may be:
4648 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
4649 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
4650 let (unnest_ref, alias_name) = match &join.this {
4651 Expression::Unnest(ref unnest) => {
4652 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
4653 (Some(unnest.as_ref()), alias)
4654 }
4655 Expression::Alias(ref a) => {
4656 if let Expression::Unnest(ref unnest) = a.this {
4657 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
4658 } else {
4659 (None, None)
4660 }
4661 }
4662 _ => (None, None),
4663 };
4664
4665 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
4666 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
4667 if let Expression::Function(ref f) = unnest.this {
4668 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
4669 let start_expr = f.args[0].clone();
4670 let end_expr = f.args[1].clone();
4671 let step = f.args.get(2).cloned();
4672
4673 // Extract unit from step interval
4674 let unit = if let Some(Expression::Interval(ref iv)) = step {
4675 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
4676 Some(format!("{:?}", unit).to_ascii_uppercase())
4677 } else if let Some(ref this) = iv.this {
4678 // The interval may be stored as a string like "1 MONTH"
4679 if let Expression::Literal(lit) = this {
4680 if let Literal::String(ref s) = lit.as_ref() {
4681 let parts: Vec<&str> = s.split_whitespace().collect();
4682 if parts.len() == 2 {
4683 Some(parts[1].to_ascii_uppercase())
4684 } else if parts.len() == 1 {
4685 // Single word like "MONTH" or just "1"
4686 let upper = parts[0].to_ascii_uppercase();
4687 if matches!(
4688 upper.as_str(),
4689 "YEAR"
4690 | "QUARTER"
4691 | "MONTH"
4692 | "WEEK"
4693 | "DAY"
4694 | "HOUR"
4695 | "MINUTE"
4696 | "SECOND"
4697 ) {
4698 Some(upper)
4699 } else {
4700 None
4701 }
4702 } else {
4703 None
4704 }
4705 } else {
4706 None
4707 }
4708 } else {
4709 None
4710 }
4711 } else {
4712 None
4713 }
4714 } else {
4715 None
4716 };
4717
4718 if let Some(unit_str) = unit {
4719 gda_info = Some((alias, start_expr, end_expr, unit_str));
4720 gda_join_idx = Some(idx);
4721 }
4722 }
4723 }
4724 }
4725 if gda_info.is_some() {
4726 break;
4727 }
4728 }
4729
4730 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
4731 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
4732 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
4733 let result = Self::try_transform_from_gda_snowflake(sel);
4734 return result;
4735 };
4736 let join_idx = gda_join_idx.unwrap();
4737
4738 // Build ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)
4739 // ARRAY_GENERATE_RANGE uses exclusive end, and we need DATEDIFF + 1 values
4740 // (inclusive date range), so the exclusive end is DATEDIFF + 1.
4741 let datediff = Expression::Function(Box::new(Function::new(
4742 "DATEDIFF".to_string(),
4743 vec![
4744 Expression::boxed_column(Column {
4745 name: Identifier::new(&unit_str),
4746 table: None,
4747 join_mark: false,
4748 trailing_comments: vec![],
4749 span: None,
4750 inferred_type: None,
4751 }),
4752 start_expr.clone(),
4753 end_expr.clone(),
4754 ],
4755 )));
4756 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
4757 left: datediff,
4758 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4759 left_comments: vec![],
4760 operator_comments: vec![],
4761 trailing_comments: vec![],
4762 inferred_type: None,
4763 }));
4764
4765 let array_gen_range = Expression::Function(Box::new(Function::new(
4766 "ARRAY_GENERATE_RANGE".to_string(),
4767 vec![
4768 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
4769 datediff_plus_one,
4770 ],
4771 )));
4772
4773 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
4774 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
4775 name: Identifier::new("INPUT"),
4776 value: array_gen_range,
4777 separator: crate::expressions::NamedArgSeparator::DArrow,
4778 }));
4779 let flatten = Expression::Function(Box::new(Function::new(
4780 "FLATTEN".to_string(),
4781 vec![flatten_input],
4782 )));
4783
4784 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
4785 let alias_table = Alias {
4786 this: flatten,
4787 alias: Identifier::new("_t0"),
4788 column_aliases: vec![
4789 Identifier::new("seq"),
4790 Identifier::new("key"),
4791 Identifier::new("path"),
4792 Identifier::new("index"),
4793 Identifier::new(&alias_name),
4794 Identifier::new("this"),
4795 ],
4796 alias_explicit_as: false,
4797 alias_keyword: None,
4798 pre_alias_comments: vec![],
4799 trailing_comments: vec![],
4800 inferred_type: None,
4801 };
4802 let lateral_expr = Expression::Lateral(Box::new(Lateral {
4803 this: Box::new(Expression::Alias(Box::new(alias_table))),
4804 view: None,
4805 outer: None,
4806 alias: None,
4807 alias_quoted: false,
4808 cross_apply: None,
4809 ordinality: None,
4810 column_aliases: vec![],
4811 }));
4812
4813 // Remove the original join and add to FROM expressions
4814 sel.joins.remove(join_idx);
4815 if let Some(ref mut from) = sel.from {
4816 from.expressions.push(lateral_expr);
4817 }
4818
4819 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
4820 let dateadd_expr = Expression::Function(Box::new(Function::new(
4821 "DATEADD".to_string(),
4822 vec![
4823 Expression::boxed_column(Column {
4824 name: Identifier::new(&unit_str),
4825 table: None,
4826 join_mark: false,
4827 trailing_comments: vec![],
4828 span: None,
4829 inferred_type: None,
4830 }),
4831 Expression::Cast(Box::new(Cast {
4832 this: Expression::boxed_column(Column {
4833 name: Identifier::new(&alias_name),
4834 table: None,
4835 join_mark: false,
4836 trailing_comments: vec![],
4837 span: None,
4838 inferred_type: None,
4839 }),
4840 to: DataType::Int {
4841 length: None,
4842 integer_spelling: false,
4843 },
4844 trailing_comments: vec![],
4845 double_colon_syntax: false,
4846 format: None,
4847 default: None,
4848 inferred_type: None,
4849 })),
4850 Expression::Cast(Box::new(Cast {
4851 this: start_expr.clone(),
4852 to: DataType::Date,
4853 trailing_comments: vec![],
4854 double_colon_syntax: false,
4855 format: None,
4856 default: None,
4857 inferred_type: None,
4858 })),
4859 ],
4860 )));
4861
4862 // Replace references to the alias in the SELECT list
4863 let new_exprs: Vec<Expression> = sel
4864 .expressions
4865 .iter()
4866 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
4867 .collect();
4868 sel.expressions = new_exprs;
4869
4870 Ok(Expression::Select(sel))
4871 })
4872 }
4873
4874 /// Helper: replace column references to `alias_name` with dateadd expression
4875 fn replace_column_ref_with_dateadd(
4876 expr: &Expression,
4877 alias_name: &str,
4878 dateadd: &Expression,
4879 ) -> Expression {
4880 use crate::expressions::*;
4881 match expr {
4882 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
4883 // Plain column reference -> DATEADD(...) AS alias_name
4884 Expression::Alias(Box::new(Alias {
4885 this: dateadd.clone(),
4886 alias: Identifier::new(alias_name),
4887 column_aliases: vec![],
4888 alias_explicit_as: false,
4889 alias_keyword: None,
4890 pre_alias_comments: vec![],
4891 trailing_comments: vec![],
4892 inferred_type: None,
4893 }))
4894 }
4895 Expression::Alias(a) => {
4896 // Check if the inner expression references the alias
4897 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
4898 Expression::Alias(Box::new(Alias {
4899 this: new_this,
4900 alias: a.alias.clone(),
4901 column_aliases: a.column_aliases.clone(),
4902 alias_explicit_as: false,
4903 alias_keyword: None,
4904 pre_alias_comments: a.pre_alias_comments.clone(),
4905 trailing_comments: a.trailing_comments.clone(),
4906 inferred_type: None,
4907 }))
4908 }
4909 _ => expr.clone(),
4910 }
4911 }
4912
4913 /// Helper: replace column references in inner expression (not top-level)
4914 fn replace_column_ref_inner(
4915 expr: &Expression,
4916 alias_name: &str,
4917 dateadd: &Expression,
4918 ) -> Expression {
4919 use crate::expressions::*;
4920 match expr {
4921 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
4922 dateadd.clone()
4923 }
4924 Expression::Add(op) => {
4925 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4926 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4927 Expression::Add(Box::new(BinaryOp {
4928 left,
4929 right,
4930 left_comments: op.left_comments.clone(),
4931 operator_comments: op.operator_comments.clone(),
4932 trailing_comments: op.trailing_comments.clone(),
4933 inferred_type: None,
4934 }))
4935 }
4936 Expression::Sub(op) => {
4937 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4938 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4939 Expression::Sub(Box::new(BinaryOp {
4940 left,
4941 right,
4942 left_comments: op.left_comments.clone(),
4943 operator_comments: op.operator_comments.clone(),
4944 trailing_comments: op.trailing_comments.clone(),
4945 inferred_type: None,
4946 }))
4947 }
4948 Expression::Mul(op) => {
4949 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4950 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4951 Expression::Mul(Box::new(BinaryOp {
4952 left,
4953 right,
4954 left_comments: op.left_comments.clone(),
4955 operator_comments: op.operator_comments.clone(),
4956 trailing_comments: op.trailing_comments.clone(),
4957 inferred_type: None,
4958 }))
4959 }
4960 _ => expr.clone(),
4961 }
4962 }
4963
4964 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
4965 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
4966 fn try_transform_from_gda_snowflake(
4967 mut sel: Box<crate::expressions::Select>,
4968 ) -> Result<Expression> {
4969 use crate::expressions::*;
4970
4971 // Extract GDA info from FROM clause
4972 let mut gda_info: Option<(
4973 usize,
4974 String,
4975 Expression,
4976 Expression,
4977 String,
4978 Option<(String, Vec<Identifier>)>,
4979 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
4980
4981 if let Some(ref from) = sel.from {
4982 for (idx, table_expr) in from.expressions.iter().enumerate() {
4983 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
4984 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
4985 let (unnest_opt, outer_alias_info) = match table_expr {
4986 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
4987 Expression::Alias(ref a) => {
4988 if let Expression::Unnest(ref unnest) = a.this {
4989 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
4990 (Some(unnest.as_ref()), Some(alias_info))
4991 } else {
4992 (None, None)
4993 }
4994 }
4995 _ => (None, None),
4996 };
4997
4998 if let Some(unnest) = unnest_opt {
4999 // Check for GENERATE_DATE_ARRAY function
5000 let func_opt = match &unnest.this {
5001 Expression::Function(ref f)
5002 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
5003 && f.args.len() >= 2 =>
5004 {
5005 Some(f)
5006 }
5007 // Also check for GenerateSeries (from earlier normalization)
5008 _ => None,
5009 };
5010
5011 if let Some(f) = func_opt {
5012 let start_expr = f.args[0].clone();
5013 let end_expr = f.args[1].clone();
5014 let step = f.args.get(2).cloned();
5015
5016 // Extract unit and column name
5017 let unit = Self::extract_interval_unit_str(&step);
5018 let col_name = outer_alias_info
5019 .as_ref()
5020 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
5021 .unwrap_or_else(|| "value".to_string());
5022
5023 if let Some(unit_str) = unit {
5024 gda_info = Some((
5025 idx,
5026 col_name,
5027 start_expr,
5028 end_expr,
5029 unit_str,
5030 outer_alias_info,
5031 ));
5032 break;
5033 }
5034 }
5035 }
5036 }
5037 }
5038
5039 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
5040 else {
5041 return Ok(Expression::Select(sel));
5042 };
5043
5044 // Build the Snowflake subquery:
5045 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
5046 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(seq, key, path, index, col_name, this))
5047
5048 // DATEDIFF(unit, start, end)
5049 let datediff = Expression::Function(Box::new(Function::new(
5050 "DATEDIFF".to_string(),
5051 vec![
5052 Expression::boxed_column(Column {
5053 name: Identifier::new(&unit_str),
5054 table: None,
5055 join_mark: false,
5056 trailing_comments: vec![],
5057 span: None,
5058 inferred_type: None,
5059 }),
5060 start_expr.clone(),
5061 end_expr.clone(),
5062 ],
5063 )));
5064 // DATEDIFF(...) + 1
5065 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
5066 left: datediff,
5067 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
5068 left_comments: vec![],
5069 operator_comments: vec![],
5070 trailing_comments: vec![],
5071 inferred_type: None,
5072 }));
5073
5074 let array_gen_range = Expression::Function(Box::new(Function::new(
5075 "ARRAY_GENERATE_RANGE".to_string(),
5076 vec![
5077 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
5078 datediff_plus_one,
5079 ],
5080 )));
5081
5082 // TABLE(FLATTEN(INPUT => ...))
5083 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
5084 name: Identifier::new("INPUT"),
5085 value: array_gen_range,
5086 separator: crate::expressions::NamedArgSeparator::DArrow,
5087 }));
5088 let flatten = Expression::Function(Box::new(Function::new(
5089 "FLATTEN".to_string(),
5090 vec![flatten_input],
5091 )));
5092
5093 // Determine alias name for the table: use outer alias or _t0
5094 let table_alias_name = outer_alias_info
5095 .as_ref()
5096 .map(|(name, _)| name.clone())
5097 .unwrap_or_else(|| "_t0".to_string());
5098
5099 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
5100 let table_func =
5101 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
5102 let flatten_aliased = Expression::Alias(Box::new(Alias {
5103 this: table_func,
5104 alias: Identifier::new(&table_alias_name),
5105 column_aliases: vec![
5106 Identifier::new("seq"),
5107 Identifier::new("key"),
5108 Identifier::new("path"),
5109 Identifier::new("index"),
5110 Identifier::new(&col_name),
5111 Identifier::new("this"),
5112 ],
5113 alias_explicit_as: false,
5114 alias_keyword: None,
5115 pre_alias_comments: vec![],
5116 trailing_comments: vec![],
5117 inferred_type: None,
5118 }));
5119
5120 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
5121 let dateadd_expr = Expression::Function(Box::new(Function::new(
5122 "DATEADD".to_string(),
5123 vec![
5124 Expression::boxed_column(Column {
5125 name: Identifier::new(&unit_str),
5126 table: None,
5127 join_mark: false,
5128 trailing_comments: vec![],
5129 span: None,
5130 inferred_type: None,
5131 }),
5132 Expression::Cast(Box::new(Cast {
5133 this: Expression::boxed_column(Column {
5134 name: Identifier::new(&col_name),
5135 table: None,
5136 join_mark: false,
5137 trailing_comments: vec![],
5138 span: None,
5139 inferred_type: None,
5140 }),
5141 to: DataType::Int {
5142 length: None,
5143 integer_spelling: false,
5144 },
5145 trailing_comments: vec![],
5146 double_colon_syntax: false,
5147 format: None,
5148 default: None,
5149 inferred_type: None,
5150 })),
5151 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
5152 start_expr.clone(),
5153 ],
5154 )));
5155 let dateadd_aliased = Expression::Alias(Box::new(Alias {
5156 this: dateadd_expr,
5157 alias: Identifier::new(&col_name),
5158 column_aliases: vec![],
5159 alias_explicit_as: false,
5160 alias_keyword: None,
5161 pre_alias_comments: vec![],
5162 trailing_comments: vec![],
5163 inferred_type: None,
5164 }));
5165
5166 // Build inner SELECT
5167 let mut inner_select = Select::new();
5168 inner_select.expressions = vec![dateadd_aliased];
5169 inner_select.from = Some(From {
5170 expressions: vec![flatten_aliased],
5171 });
5172
5173 let inner_select_expr = Expression::Select(Box::new(inner_select));
5174 let subquery = Expression::Subquery(Box::new(Subquery {
5175 this: inner_select_expr,
5176 alias: None,
5177 column_aliases: vec![],
5178 alias_explicit_as: false,
5179 alias_keyword: None,
5180 order_by: None,
5181 limit: None,
5182 offset: None,
5183 distribute_by: None,
5184 sort_by: None,
5185 cluster_by: None,
5186 lateral: false,
5187 modifiers_inside: false,
5188 trailing_comments: vec![],
5189 inferred_type: None,
5190 }));
5191
5192 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
5193 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
5194 Expression::Alias(Box::new(Alias {
5195 this: subquery,
5196 alias: Identifier::new(&alias_name),
5197 column_aliases: col_aliases,
5198 alias_explicit_as: false,
5199 alias_keyword: None,
5200 pre_alias_comments: vec![],
5201 trailing_comments: vec![],
5202 inferred_type: None,
5203 }))
5204 } else {
5205 subquery
5206 };
5207
5208 // Replace the FROM expression
5209 if let Some(ref mut from) = sel.from {
5210 from.expressions[from_idx] = replacement;
5211 }
5212
5213 Ok(Expression::Select(sel))
5214 }
5215
5216 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
5217 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
5218 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(...))))
5219 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
5220 use crate::expressions::*;
5221
5222 let start_expr = f.args[0].clone();
5223 let end_expr = f.args[1].clone();
5224 let step = f.args.get(2).cloned();
5225 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
5226 let col_name = "value";
5227
5228 // Build the inner subquery: same as try_transform_from_gda_snowflake
5229 let datediff = Expression::Function(Box::new(Function::new(
5230 "DATEDIFF".to_string(),
5231 vec![
5232 Expression::boxed_column(Column {
5233 name: Identifier::new(&unit_str),
5234 table: None,
5235 join_mark: false,
5236 trailing_comments: vec![],
5237 span: None,
5238 inferred_type: None,
5239 }),
5240 start_expr.clone(),
5241 end_expr.clone(),
5242 ],
5243 )));
5244 // DATEDIFF(...) + 1
5245 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
5246 left: datediff,
5247 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
5248 left_comments: vec![],
5249 operator_comments: vec![],
5250 trailing_comments: vec![],
5251 inferred_type: None,
5252 }));
5253
5254 let array_gen_range = Expression::Function(Box::new(Function::new(
5255 "ARRAY_GENERATE_RANGE".to_string(),
5256 vec![
5257 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
5258 datediff_plus_one,
5259 ],
5260 )));
5261
5262 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
5263 name: Identifier::new("INPUT"),
5264 value: array_gen_range,
5265 separator: crate::expressions::NamedArgSeparator::DArrow,
5266 }));
5267 let flatten = Expression::Function(Box::new(Function::new(
5268 "FLATTEN".to_string(),
5269 vec![flatten_input],
5270 )));
5271
5272 let table_func =
5273 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
5274 let flatten_aliased = Expression::Alias(Box::new(Alias {
5275 this: table_func,
5276 alias: Identifier::new("_t0"),
5277 column_aliases: vec![
5278 Identifier::new("seq"),
5279 Identifier::new("key"),
5280 Identifier::new("path"),
5281 Identifier::new("index"),
5282 Identifier::new(col_name),
5283 Identifier::new("this"),
5284 ],
5285 alias_explicit_as: false,
5286 alias_keyword: None,
5287 pre_alias_comments: vec![],
5288 trailing_comments: vec![],
5289 inferred_type: None,
5290 }));
5291
5292 let dateadd_expr = Expression::Function(Box::new(Function::new(
5293 "DATEADD".to_string(),
5294 vec![
5295 Expression::boxed_column(Column {
5296 name: Identifier::new(&unit_str),
5297 table: None,
5298 join_mark: false,
5299 trailing_comments: vec![],
5300 span: None,
5301 inferred_type: None,
5302 }),
5303 Expression::Cast(Box::new(Cast {
5304 this: Expression::boxed_column(Column {
5305 name: Identifier::new(col_name),
5306 table: None,
5307 join_mark: false,
5308 trailing_comments: vec![],
5309 span: None,
5310 inferred_type: None,
5311 }),
5312 to: DataType::Int {
5313 length: None,
5314 integer_spelling: false,
5315 },
5316 trailing_comments: vec![],
5317 double_colon_syntax: false,
5318 format: None,
5319 default: None,
5320 inferred_type: None,
5321 })),
5322 start_expr.clone(),
5323 ],
5324 )));
5325 let dateadd_aliased = Expression::Alias(Box::new(Alias {
5326 this: dateadd_expr,
5327 alias: Identifier::new(col_name),
5328 column_aliases: vec![],
5329 alias_explicit_as: false,
5330 alias_keyword: None,
5331 pre_alias_comments: vec![],
5332 trailing_comments: vec![],
5333 inferred_type: None,
5334 }));
5335
5336 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
5337 let mut inner_select = Select::new();
5338 inner_select.expressions = vec![dateadd_aliased];
5339 inner_select.from = Some(From {
5340 expressions: vec![flatten_aliased],
5341 });
5342
5343 // Wrap in subquery for the inner part
5344 let inner_subquery = Expression::Subquery(Box::new(Subquery {
5345 this: Expression::Select(Box::new(inner_select)),
5346 alias: None,
5347 column_aliases: vec![],
5348 alias_explicit_as: false,
5349 alias_keyword: None,
5350 order_by: None,
5351 limit: None,
5352 offset: None,
5353 distribute_by: None,
5354 sort_by: None,
5355 cluster_by: None,
5356 lateral: false,
5357 modifiers_inside: false,
5358 trailing_comments: vec![],
5359 inferred_type: None,
5360 }));
5361
5362 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
5363 let star = Expression::Star(Star {
5364 table: None,
5365 except: None,
5366 replace: None,
5367 rename: None,
5368 trailing_comments: vec![],
5369 span: None,
5370 });
5371 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
5372 this: star,
5373 distinct: false,
5374 filter: None,
5375 order_by: vec![],
5376 name: Some("ARRAY_AGG".to_string()),
5377 ignore_nulls: None,
5378 having_max: None,
5379 limit: None,
5380 inferred_type: None,
5381 }));
5382
5383 let mut outer_select = Select::new();
5384 outer_select.expressions = vec![array_agg];
5385 outer_select.from = Some(From {
5386 expressions: vec![inner_subquery],
5387 });
5388
5389 // Wrap in a subquery
5390 let outer_subquery = Expression::Subquery(Box::new(Subquery {
5391 this: Expression::Select(Box::new(outer_select)),
5392 alias: None,
5393 column_aliases: vec![],
5394 alias_explicit_as: false,
5395 alias_keyword: None,
5396 order_by: None,
5397 limit: None,
5398 offset: None,
5399 distribute_by: None,
5400 sort_by: None,
5401 cluster_by: None,
5402 lateral: false,
5403 modifiers_inside: false,
5404 trailing_comments: vec![],
5405 inferred_type: None,
5406 }));
5407
5408 // ARRAY_SIZE(subquery)
5409 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
5410 outer_subquery,
5411 ))))
5412 }
5413
5414 /// Extract interval unit string from an optional step expression.
5415 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
5416 use crate::expressions::*;
5417 if let Some(Expression::Interval(ref iv)) = step {
5418 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
5419 return Some(format!("{:?}", unit).to_ascii_uppercase());
5420 }
5421 if let Some(ref this) = iv.this {
5422 if let Expression::Literal(lit) = this {
5423 if let Literal::String(ref s) = lit.as_ref() {
5424 let parts: Vec<&str> = s.split_whitespace().collect();
5425 if parts.len() == 2 {
5426 return Some(parts[1].to_ascii_uppercase());
5427 } else if parts.len() == 1 {
5428 let upper = parts[0].to_ascii_uppercase();
5429 if matches!(
5430 upper.as_str(),
5431 "YEAR"
5432 | "QUARTER"
5433 | "MONTH"
5434 | "WEEK"
5435 | "DAY"
5436 | "HOUR"
5437 | "MINUTE"
5438 | "SECOND"
5439 ) {
5440 return Some(upper);
5441 }
5442 }
5443 }
5444 }
5445 }
5446 }
5447 // Default to DAY if no step or no interval
5448 if step.is_none() {
5449 return Some("DAY".to_string());
5450 }
5451 None
5452 }
5453
5454 fn normalize_snowflake_pretty(mut sql: String) -> String {
5455 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
5456 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
5457 {
5458 sql = sql.replace(
5459 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
5460 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
5461 );
5462
5463 sql = sql.replace(
5464 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
5465 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
5466 );
5467
5468 sql = sql.replace(
5469 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
5470 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
5471 );
5472 }
5473
5474 sql
5475 }
5476
5477 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
5478 /// This handles cases where the same syntax has different semantics across dialects.
5479 fn cross_dialect_normalize(
5480 expr: Expression,
5481 source: DialectType,
5482 target: DialectType,
5483 ) -> Result<Expression> {
5484 use crate::expressions::{
5485 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
5486 Function, Identifier, IsNull, Literal, Null, Paren,
5487 };
5488
5489 // Helper to tag which kind of transform to apply
5490 #[derive(Debug)]
5491 enum Action {
5492 None,
5493 GreatestLeastNull,
5494 ArrayGenerateRange,
5495 Div0TypedDivision,
5496 ArrayAggCollectList,
5497 ArrayAggWithinGroupFilter,
5498 ArrayAggFilter,
5499 CastTimestampToDatetime,
5500 DateTruncWrapCast,
5501 ToDateToCast,
5502 ConvertTimezoneToExpr,
5503 SetToVariable,
5504 RegexpReplaceSnowflakeToDuckDB,
5505 BigQueryFunctionNormalize,
5506 BigQuerySafeDivide,
5507 BigQueryCastType,
5508 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
5509 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
5510 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
5511 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
5512 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
5513 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
5514 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
5515 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
5516 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target
5517 EpochConvert, // Expression::Epoch -> target-specific epoch function
5518 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
5519 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
5520 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
5521 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
5522 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
5523 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
5524 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
5525 TempTableHash, // TSQL #table -> temp table normalization
5526 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
5527 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
5528 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
5529 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
5530 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
5531 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
5532 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
5533 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
5534 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
5535 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake; also DuckDB CAST(x AS JSON)
5536 DuckDBCastJsonToVariant, // DuckDB CAST(x AS JSON) -> CAST(x AS VARIANT) for Snowflake
5537 DuckDBTryCastJsonToTryJsonParse, // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
5538 DuckDBJsonFuncToJsonParse, // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
5539 DuckDBJsonValidToIsJson, // DuckDB json_valid(x) -> x IS JSON for Trino/Presto/Athena
5540 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
5541 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
5542 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
5543 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
5544 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
5545 ArrayAggToGroupConcat, // ARRAY_AGG(x) -> GROUP_CONCAT(x) for MySQL-like targets
5546 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
5547 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
5548 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
5549 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
5550 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
5551 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
5552 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
5553 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
5554 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
5555 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
5556 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
5557 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
5558 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
5559 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
5560 DollarParamConvert, // $foo -> @foo for BigQuery
5561 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
5562 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
5563 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
5564 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
5565 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
5566 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
5567 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
5568 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
5569 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
5570 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
5571 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
5572 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
5573 RespectNullsConvert, // RESPECT NULLS window function handling
5574 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
5575 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
5576 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
5577 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
5578 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
5579 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
5580 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
5581 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
5582 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
5583 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
5584 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
5585 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
5586 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
5587 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
5588 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
5589 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
5590 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
5591 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
5592 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
5593 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
5594 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
5595 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
5596 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
5597 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
5598 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
5599 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
5600 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
5601 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
5602 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
5603 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
5604 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5605 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
5606 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
5607 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
5608 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
5609 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
5610 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
5611 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
5612 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
5613 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
5614 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
5615 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
5616 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
5617 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
5618 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
5619 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
5620 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
5621 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
5622 DecodeSimplify, // DECODE with null-safe -> simple = comparison
5623 ArraySumConvert, // ARRAY_SUM -> target-specific
5624 ArraySizeConvert, // ARRAY_SIZE -> target-specific
5625 ArrayAnyConvert, // ARRAY_ANY -> target-specific
5626 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
5627 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
5628 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
5629 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
5630 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
5631 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
5632 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
5633 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
5634 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
5635 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
5636 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
5637 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
5638 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
5639 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
5640 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
5641 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
5642 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
5643 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
5644 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
5645 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
5646 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
5647 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
5648 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
5649 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
5650 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
5651 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
5652 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
5653 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
5654 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
5655 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
5656 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
5657 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
5658 RegexpSubstrSnowflakeToDuckDB, // REGEXP_SUBSTR(s, p, ...) -> REGEXP_EXTRACT variants for DuckDB
5659 RegexpSubstrSnowflakeIdentity, // REGEXP_SUBSTR/REGEXP_SUBSTR_ALL strip trailing group=0 for Snowflake identity
5660 RegexpSubstrAllSnowflakeToDuckDB, // REGEXP_SUBSTR_ALL(s, p, ...) -> REGEXP_EXTRACT_ALL variants for DuckDB
5661 RegexpCountSnowflakeToDuckDB, // REGEXP_COUNT(s, p, ...) -> LENGTH(REGEXP_EXTRACT_ALL(...)) for DuckDB
5662 RegexpInstrSnowflakeToDuckDB, // REGEXP_INSTR(s, p, ...) -> complex CASE expression for DuckDB
5663 RegexpReplacePositionSnowflakeToDuckDB, // REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
5664 RlikeSnowflakeToDuckDB, // RLIKE(a, b[, flags]) -> REGEXP_FULL_MATCH(a, b[, flags]) for DuckDB
5665 RegexpExtractAllToSnowflake, // BigQuery REGEXP_EXTRACT_ALL -> REGEXP_SUBSTR_ALL for Snowflake
5666 ArrayExceptConvert, // ARRAY_EXCEPT -> DuckDB complex CASE / Snowflake ARRAY_EXCEPT / Presto ARRAY_EXCEPT
5667 ArrayPositionSnowflakeSwap, // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
5668 RegexpLikeExasolAnchor, // RegexpLike -> Exasol REGEXP_LIKE with .*pattern.* anchoring
5669 ArrayDistinctConvert, // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
5670 ArrayDistinctClickHouse, // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
5671 ArrayContainsDuckDBConvert, // ARRAY_CONTAINS -> DuckDB CASE with NULL-aware check
5672 SnowflakeWindowFrameStrip, // Strip default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for Snowflake target
5673 SnowflakeWindowFrameAdd, // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for non-Snowflake target
5674 SnowflakeArrayPositionToDuckDB, // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB
5675 }
5676
5677 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
5678 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
5679 Self::transform_select_into(expr, source, target)
5680 } else {
5681 expr
5682 };
5683
5684 // Strip OFFSET ROWS for non-TSQL/Oracle targets
5685 let expr = if !matches!(
5686 target,
5687 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
5688 ) {
5689 if let Expression::Select(mut select) = expr {
5690 if let Some(ref mut offset) = select.offset {
5691 offset.rows = None;
5692 }
5693 Expression::Select(select)
5694 } else {
5695 expr
5696 }
5697 } else {
5698 expr
5699 };
5700
5701 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
5702 let expr = if matches!(target, DialectType::Oracle) {
5703 if let Expression::Select(mut select) = expr {
5704 if let Some(limit) = select.limit.take() {
5705 // Convert LIMIT to FETCH FIRST n ROWS ONLY
5706 select.fetch = Some(crate::expressions::Fetch {
5707 direction: "FIRST".to_string(),
5708 count: Some(limit.this),
5709 percent: false,
5710 rows: true,
5711 with_ties: false,
5712 });
5713 }
5714 // Add ROWS to OFFSET if present
5715 if let Some(ref mut offset) = select.offset {
5716 offset.rows = Some(true);
5717 }
5718 Expression::Select(select)
5719 } else {
5720 expr
5721 }
5722 } else {
5723 expr
5724 };
5725
5726 // Handle CreateTable WITH properties transformation before recursive transforms
5727 let expr = if let Expression::CreateTable(mut ct) = expr {
5728 Self::transform_create_table_properties(&mut ct, source, target);
5729
5730 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
5731 // When the PARTITIONED BY clause contains column definitions, merge them into the
5732 // main column list and adjust the PARTITIONED BY clause for the target dialect.
5733 if matches!(
5734 source,
5735 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5736 ) {
5737 let mut partition_col_names: Vec<String> = Vec::new();
5738 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
5739 let mut has_col_def_partitions = false;
5740
5741 // Check if any PARTITIONED BY property contains ColumnDef expressions
5742 for prop in &ct.properties {
5743 if let Expression::PartitionedByProperty(ref pbp) = prop {
5744 if let Expression::Tuple(ref tuple) = *pbp.this {
5745 for expr in &tuple.expressions {
5746 if let Expression::ColumnDef(ref cd) = expr {
5747 has_col_def_partitions = true;
5748 partition_col_names.push(cd.name.name.clone());
5749 partition_col_defs.push(*cd.clone());
5750 }
5751 }
5752 }
5753 }
5754 }
5755
5756 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
5757 // Merge partition columns into main column list
5758 for cd in partition_col_defs {
5759 ct.columns.push(cd);
5760 }
5761
5762 // Replace PARTITIONED BY property with column-name-only version
5763 ct.properties
5764 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
5765
5766 if matches!(
5767 target,
5768 DialectType::Presto | DialectType::Trino | DialectType::Athena
5769 ) {
5770 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
5771 let array_elements: Vec<String> = partition_col_names
5772 .iter()
5773 .map(|n| format!("'{}'", n))
5774 .collect();
5775 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
5776 ct.with_properties
5777 .push(("PARTITIONED_BY".to_string(), array_value));
5778 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
5779 // Spark: PARTITIONED BY (y, z) - just column names
5780 let name_exprs: Vec<Expression> = partition_col_names
5781 .iter()
5782 .map(|n| {
5783 Expression::Column(Box::new(crate::expressions::Column {
5784 name: crate::expressions::Identifier::new(n.clone()),
5785 table: None,
5786 join_mark: false,
5787 trailing_comments: Vec::new(),
5788 span: None,
5789 inferred_type: None,
5790 }))
5791 })
5792 .collect();
5793 ct.properties.insert(
5794 0,
5795 Expression::PartitionedByProperty(Box::new(
5796 crate::expressions::PartitionedByProperty {
5797 this: Box::new(Expression::Tuple(Box::new(
5798 crate::expressions::Tuple {
5799 expressions: name_exprs,
5800 },
5801 ))),
5802 },
5803 )),
5804 );
5805 }
5806 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
5807 }
5808
5809 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
5810 // are handled by transform_create_table_properties which runs first
5811 }
5812
5813 // Strip LOCATION property for Presto/Trino (not supported)
5814 if matches!(
5815 target,
5816 DialectType::Presto | DialectType::Trino | DialectType::Athena
5817 ) {
5818 ct.properties
5819 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
5820 }
5821
5822 // Strip table-level constraints for Spark/Hive/Databricks
5823 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
5824 if matches!(
5825 target,
5826 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5827 ) {
5828 ct.constraints.retain(|c| {
5829 matches!(
5830 c,
5831 crate::expressions::TableConstraint::PrimaryKey { .. }
5832 | crate::expressions::TableConstraint::Like { .. }
5833 )
5834 });
5835 for constraint in &mut ct.constraints {
5836 if let crate::expressions::TableConstraint::PrimaryKey {
5837 columns,
5838 modifiers,
5839 ..
5840 } = constraint
5841 {
5842 // Strip ASC/DESC from column names
5843 for col in columns.iter_mut() {
5844 if col.name.ends_with(" ASC") {
5845 col.name = col.name[..col.name.len() - 4].to_string();
5846 } else if col.name.ends_with(" DESC") {
5847 col.name = col.name[..col.name.len() - 5].to_string();
5848 }
5849 }
5850 // Strip TSQL-specific modifiers
5851 modifiers.clustered = None;
5852 modifiers.with_options.clear();
5853 modifiers.on_filegroup = None;
5854 }
5855 }
5856 }
5857
5858 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
5859 if matches!(target, DialectType::Databricks) {
5860 for col in &mut ct.columns {
5861 if col.auto_increment {
5862 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
5863 col.data_type = crate::expressions::DataType::BigInt { length: None };
5864 }
5865 }
5866 }
5867 }
5868
5869 // Spark/Databricks: INTEGER -> INT in column definitions
5870 // Python sqlglot always outputs INT for Spark/Databricks
5871 if matches!(target, DialectType::Spark | DialectType::Databricks) {
5872 for col in &mut ct.columns {
5873 if let crate::expressions::DataType::Int {
5874 integer_spelling, ..
5875 } = &mut col.data_type
5876 {
5877 *integer_spelling = false;
5878 }
5879 }
5880 }
5881
5882 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
5883 if matches!(target, DialectType::Hive | DialectType::Spark) {
5884 for col in &mut ct.columns {
5885 // If nullable is explicitly true (NULL), change to None (omit it)
5886 if col.nullable == Some(true) {
5887 col.nullable = None;
5888 }
5889 // Also remove from constraints if stored there
5890 col.constraints
5891 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
5892 }
5893 }
5894
5895 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
5896 if ct.on_property.is_some()
5897 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
5898 {
5899 ct.on_property = None;
5900 }
5901
5902 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
5903 // Snowflake doesn't support typed arrays in DDL
5904 if matches!(target, DialectType::Snowflake) {
5905 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
5906 if let crate::expressions::DataType::Array { .. } = dt {
5907 *dt = crate::expressions::DataType::Custom {
5908 name: "ARRAY".to_string(),
5909 };
5910 }
5911 }
5912 for col in &mut ct.columns {
5913 strip_array_type_params(&mut col.data_type);
5914 }
5915 }
5916
5917 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
5918 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
5919 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
5920 if matches!(target, DialectType::PostgreSQL) {
5921 for col in &mut ct.columns {
5922 if col.auto_increment && !col.constraint_order.is_empty() {
5923 use crate::expressions::ConstraintType;
5924 let has_explicit_not_null = col
5925 .constraint_order
5926 .iter()
5927 .any(|ct| *ct == ConstraintType::NotNull);
5928
5929 if has_explicit_not_null {
5930 // Source had explicit NOT NULL - preserve original order
5931 // Just ensure nullable is set
5932 if col.nullable != Some(false) {
5933 col.nullable = Some(false);
5934 }
5935 } else {
5936 // Source didn't have explicit NOT NULL - build order with
5937 // AutoIncrement + NotNull first, then remaining constraints
5938 let mut new_order = Vec::new();
5939 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
5940 new_order.push(ConstraintType::AutoIncrement);
5941 new_order.push(ConstraintType::NotNull);
5942 // Add remaining constraints in original order (except AutoIncrement)
5943 for ct_type in &col.constraint_order {
5944 if *ct_type != ConstraintType::AutoIncrement {
5945 new_order.push(ct_type.clone());
5946 }
5947 }
5948 col.constraint_order = new_order;
5949 col.nullable = Some(false);
5950 }
5951 }
5952 }
5953 }
5954
5955 Expression::CreateTable(ct)
5956 } else {
5957 expr
5958 };
5959
5960 // Handle CreateView column stripping for Presto/Trino target
5961 let expr = if let Expression::CreateView(mut cv) = expr {
5962 // Presto/Trino: drop column list when view has a SELECT body
5963 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
5964 {
5965 if !matches!(&cv.query, Expression::Null(_)) {
5966 cv.columns.clear();
5967 }
5968 }
5969 Expression::CreateView(cv)
5970 } else {
5971 expr
5972 };
5973
5974 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
5975 let expr = if !matches!(
5976 target,
5977 DialectType::Presto | DialectType::Trino | DialectType::Athena
5978 ) {
5979 if let Expression::Select(mut select) = expr {
5980 if let Some(ref mut with) = select.with {
5981 for cte in &mut with.ctes {
5982 if let Expression::Values(ref vals) = cte.this {
5983 // Build: SELECT * FROM (VALUES ...) AS _values
5984 let values_subquery =
5985 Expression::Subquery(Box::new(crate::expressions::Subquery {
5986 this: Expression::Values(vals.clone()),
5987 alias: Some(Identifier::new("_values".to_string())),
5988 column_aliases: Vec::new(),
5989 alias_explicit_as: false,
5990 alias_keyword: None,
5991 order_by: None,
5992 limit: None,
5993 offset: None,
5994 distribute_by: None,
5995 sort_by: None,
5996 cluster_by: None,
5997 lateral: false,
5998 modifiers_inside: false,
5999 trailing_comments: Vec::new(),
6000 inferred_type: None,
6001 }));
6002 let mut new_select = crate::expressions::Select::new();
6003 new_select.expressions =
6004 vec![Expression::Star(crate::expressions::Star {
6005 table: None,
6006 except: None,
6007 replace: None,
6008 rename: None,
6009 trailing_comments: Vec::new(),
6010 span: None,
6011 })];
6012 new_select.from = Some(crate::expressions::From {
6013 expressions: vec![values_subquery],
6014 });
6015 cte.this = Expression::Select(Box::new(new_select));
6016 }
6017 }
6018 }
6019 Expression::Select(select)
6020 } else {
6021 expr
6022 }
6023 } else {
6024 expr
6025 };
6026
6027 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
6028 let expr = if matches!(target, DialectType::PostgreSQL) {
6029 if let Expression::CreateIndex(mut ci) = expr {
6030 for col in &mut ci.columns {
6031 if col.nulls_first.is_none() {
6032 col.nulls_first = Some(true);
6033 }
6034 }
6035 Expression::CreateIndex(ci)
6036 } else {
6037 expr
6038 }
6039 } else {
6040 expr
6041 };
6042
6043 transform_recursive(expr, &|e| {
6044 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
6045 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
6046 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
6047 if let Expression::Cast(ref c) = e {
6048 // Check if this is a CAST of an array to a struct array type
6049 let is_struct_array_cast =
6050 matches!(&c.to, crate::expressions::DataType::Array { .. });
6051 if is_struct_array_cast {
6052 let has_auto_named_structs = match &c.this {
6053 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
6054 if let Expression::Struct(s) = elem {
6055 s.fields.iter().all(|(name, _)| {
6056 name.as_ref().map_or(true, |n| {
6057 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
6058 })
6059 })
6060 } else {
6061 false
6062 }
6063 }),
6064 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
6065 if let Expression::Struct(s) = elem {
6066 s.fields.iter().all(|(name, _)| {
6067 name.as_ref().map_or(true, |n| {
6068 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
6069 })
6070 })
6071 } else {
6072 false
6073 }
6074 }),
6075 _ => false,
6076 };
6077 if has_auto_named_structs {
6078 let convert_struct_to_row = |elem: Expression| -> Expression {
6079 if let Expression::Struct(s) = elem {
6080 let row_args: Vec<Expression> =
6081 s.fields.into_iter().map(|(_, v)| v).collect();
6082 Expression::Function(Box::new(Function::new(
6083 "ROW".to_string(),
6084 row_args,
6085 )))
6086 } else {
6087 elem
6088 }
6089 };
6090 let mut c_clone = c.as_ref().clone();
6091 match &mut c_clone.this {
6092 Expression::Array(arr) => {
6093 arr.expressions = arr
6094 .expressions
6095 .drain(..)
6096 .map(convert_struct_to_row)
6097 .collect();
6098 }
6099 Expression::ArrayFunc(arr) => {
6100 arr.expressions = arr
6101 .expressions
6102 .drain(..)
6103 .map(convert_struct_to_row)
6104 .collect();
6105 }
6106 _ => {}
6107 }
6108 return Ok(Expression::Cast(Box::new(c_clone)));
6109 }
6110 }
6111 }
6112 }
6113
6114 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
6115 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
6116 if let Expression::Select(ref sel) = e {
6117 if sel.kind.as_deref() == Some("STRUCT") {
6118 let mut fields = Vec::new();
6119 for expr in &sel.expressions {
6120 match expr {
6121 Expression::Alias(a) => {
6122 fields.push((Some(a.alias.name.clone()), a.this.clone()));
6123 }
6124 Expression::Column(c) => {
6125 fields.push((Some(c.name.name.clone()), expr.clone()));
6126 }
6127 _ => {
6128 fields.push((None, expr.clone()));
6129 }
6130 }
6131 }
6132 let struct_lit =
6133 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
6134 let mut new_select = sel.as_ref().clone();
6135 new_select.kind = None;
6136 new_select.expressions = vec![struct_lit];
6137 return Ok(Expression::Select(Box::new(new_select)));
6138 }
6139 }
6140 }
6141
6142 // Convert @variable -> ${variable} for Spark/Hive/Databricks
6143 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6144 && matches!(
6145 target,
6146 DialectType::Spark | DialectType::Databricks | DialectType::Hive
6147 )
6148 {
6149 if let Expression::Parameter(ref p) = e {
6150 if p.style == crate::expressions::ParameterStyle::At {
6151 if let Some(ref name) = p.name {
6152 return Ok(Expression::Parameter(Box::new(
6153 crate::expressions::Parameter {
6154 name: Some(name.clone()),
6155 index: p.index,
6156 style: crate::expressions::ParameterStyle::DollarBrace,
6157 quoted: p.quoted,
6158 string_quoted: p.string_quoted,
6159 expression: None,
6160 },
6161 )));
6162 }
6163 }
6164 }
6165 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
6166 if let Expression::Column(ref col) = e {
6167 if col.name.name.starts_with('@') && col.table.is_none() {
6168 let var_name = col.name.name.trim_start_matches('@').to_string();
6169 return Ok(Expression::Parameter(Box::new(
6170 crate::expressions::Parameter {
6171 name: Some(var_name),
6172 index: None,
6173 style: crate::expressions::ParameterStyle::DollarBrace,
6174 quoted: false,
6175 string_quoted: false,
6176 expression: None,
6177 },
6178 )));
6179 }
6180 }
6181 }
6182
6183 // Convert @variable -> variable in SET statements for Spark/Databricks
6184 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6185 && matches!(target, DialectType::Spark | DialectType::Databricks)
6186 {
6187 if let Expression::SetStatement(ref s) = e {
6188 let mut new_items = s.items.clone();
6189 let mut changed = false;
6190 for item in &mut new_items {
6191 // Strip @ from the SET name (Parameter style)
6192 if let Expression::Parameter(ref p) = item.name {
6193 if p.style == crate::expressions::ParameterStyle::At {
6194 if let Some(ref name) = p.name {
6195 item.name = Expression::Identifier(Identifier::new(name));
6196 changed = true;
6197 }
6198 }
6199 }
6200 // Strip @ from the SET name (Identifier style - SET parser)
6201 if let Expression::Identifier(ref id) = item.name {
6202 if id.name.starts_with('@') {
6203 let var_name = id.name.trim_start_matches('@').to_string();
6204 item.name = Expression::Identifier(Identifier::new(&var_name));
6205 changed = true;
6206 }
6207 }
6208 // Strip @ from the SET name (Column style - alternative parsing)
6209 if let Expression::Column(ref col) = item.name {
6210 if col.name.name.starts_with('@') && col.table.is_none() {
6211 let var_name = col.name.name.trim_start_matches('@').to_string();
6212 item.name = Expression::Identifier(Identifier::new(&var_name));
6213 changed = true;
6214 }
6215 }
6216 }
6217 if changed {
6218 let mut new_set = (**s).clone();
6219 new_set.items = new_items;
6220 return Ok(Expression::SetStatement(Box::new(new_set)));
6221 }
6222 }
6223 }
6224
6225 // Strip NOLOCK hint for non-TSQL targets
6226 if matches!(source, DialectType::TSQL | DialectType::Fabric)
6227 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
6228 {
6229 if let Expression::Table(ref tr) = e {
6230 if !tr.hints.is_empty() {
6231 let mut new_tr = tr.clone();
6232 new_tr.hints.clear();
6233 return Ok(Expression::Table(new_tr));
6234 }
6235 }
6236 }
6237
6238 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
6239 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
6240 if matches!(target, DialectType::Snowflake) {
6241 if let Expression::IsTrue(ref itf) = e {
6242 if let Expression::Boolean(ref b) = itf.this {
6243 if !itf.not {
6244 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6245 value: b.value,
6246 }));
6247 } else {
6248 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6249 value: !b.value,
6250 }));
6251 }
6252 }
6253 }
6254 if let Expression::IsFalse(ref itf) = e {
6255 if let Expression::Boolean(ref b) = itf.this {
6256 if !itf.not {
6257 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6258 value: !b.value,
6259 }));
6260 } else {
6261 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
6262 value: b.value,
6263 }));
6264 }
6265 }
6266 }
6267 }
6268
6269 // BigQuery: split dotted backtick identifiers in table names
6270 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
6271 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
6272 if let Expression::CreateTable(ref ct) = e {
6273 let mut changed = false;
6274 let mut new_ct = ct.clone();
6275 // Split the table name
6276 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
6277 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
6278 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
6279 let was_quoted = ct.name.name.quoted;
6280 let mk_id = |s: &str| {
6281 if was_quoted {
6282 Identifier::quoted(s)
6283 } else {
6284 Identifier::new(s)
6285 }
6286 };
6287 if parts.len() == 3 {
6288 new_ct.name.catalog = Some(mk_id(parts[0]));
6289 new_ct.name.schema = Some(mk_id(parts[1]));
6290 new_ct.name.name = mk_id(parts[2]);
6291 changed = true;
6292 } else if parts.len() == 2 {
6293 new_ct.name.schema = Some(mk_id(parts[0]));
6294 new_ct.name.name = mk_id(parts[1]);
6295 changed = true;
6296 }
6297 }
6298 // Split the clone source name
6299 if let Some(ref clone_src) = ct.clone_source {
6300 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
6301 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
6302 let was_quoted = clone_src.name.quoted;
6303 let mk_id = |s: &str| {
6304 if was_quoted {
6305 Identifier::quoted(s)
6306 } else {
6307 Identifier::new(s)
6308 }
6309 };
6310 let mut new_src = clone_src.clone();
6311 if parts.len() == 3 {
6312 new_src.catalog = Some(mk_id(parts[0]));
6313 new_src.schema = Some(mk_id(parts[1]));
6314 new_src.name = mk_id(parts[2]);
6315 new_ct.clone_source = Some(new_src);
6316 changed = true;
6317 } else if parts.len() == 2 {
6318 new_src.schema = Some(mk_id(parts[0]));
6319 new_src.name = mk_id(parts[1]);
6320 new_ct.clone_source = Some(new_src);
6321 changed = true;
6322 }
6323 }
6324 }
6325 if changed {
6326 return Ok(Expression::CreateTable(new_ct));
6327 }
6328 }
6329 }
6330
6331 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
6332 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
6333 if matches!(source, DialectType::BigQuery)
6334 && matches!(
6335 target,
6336 DialectType::DuckDB
6337 | DialectType::Presto
6338 | DialectType::Trino
6339 | DialectType::Athena
6340 )
6341 {
6342 if let Expression::Subscript(ref sub) = e {
6343 let (new_index, is_safe) = match &sub.index {
6344 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
6345 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
6346 let Literal::Number(n) = lit.as_ref() else {
6347 unreachable!()
6348 };
6349 if let Ok(val) = n.parse::<i64>() {
6350 (
6351 Some(Expression::Literal(Box::new(Literal::Number(
6352 (val + 1).to_string(),
6353 )))),
6354 false,
6355 )
6356 } else {
6357 (None, false)
6358 }
6359 }
6360 // OFFSET(n) -> n+1 (0-based)
6361 Expression::Function(ref f)
6362 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
6363 {
6364 if let Expression::Literal(lit) = &f.args[0] {
6365 if let Literal::Number(n) = lit.as_ref() {
6366 if let Ok(val) = n.parse::<i64>() {
6367 (
6368 Some(Expression::Literal(Box::new(Literal::Number(
6369 (val + 1).to_string(),
6370 )))),
6371 false,
6372 )
6373 } else {
6374 (
6375 Some(Expression::Add(Box::new(
6376 crate::expressions::BinaryOp::new(
6377 f.args[0].clone(),
6378 Expression::number(1),
6379 ),
6380 ))),
6381 false,
6382 )
6383 }
6384 } else {
6385 (None, false)
6386 }
6387 } else {
6388 (
6389 Some(Expression::Add(Box::new(
6390 crate::expressions::BinaryOp::new(
6391 f.args[0].clone(),
6392 Expression::number(1),
6393 ),
6394 ))),
6395 false,
6396 )
6397 }
6398 }
6399 // ORDINAL(n) -> n (already 1-based)
6400 Expression::Function(ref f)
6401 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
6402 {
6403 (Some(f.args[0].clone()), false)
6404 }
6405 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
6406 Expression::Function(ref f)
6407 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
6408 {
6409 if let Expression::Literal(lit) = &f.args[0] {
6410 if let Literal::Number(n) = lit.as_ref() {
6411 if let Ok(val) = n.parse::<i64>() {
6412 (
6413 Some(Expression::Literal(Box::new(Literal::Number(
6414 (val + 1).to_string(),
6415 )))),
6416 true,
6417 )
6418 } else {
6419 (
6420 Some(Expression::Add(Box::new(
6421 crate::expressions::BinaryOp::new(
6422 f.args[0].clone(),
6423 Expression::number(1),
6424 ),
6425 ))),
6426 true,
6427 )
6428 }
6429 } else {
6430 (None, false)
6431 }
6432 } else {
6433 (
6434 Some(Expression::Add(Box::new(
6435 crate::expressions::BinaryOp::new(
6436 f.args[0].clone(),
6437 Expression::number(1),
6438 ),
6439 ))),
6440 true,
6441 )
6442 }
6443 }
6444 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
6445 Expression::Function(ref f)
6446 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
6447 {
6448 (Some(f.args[0].clone()), true)
6449 }
6450 _ => (None, false),
6451 };
6452 if let Some(idx) = new_index {
6453 if is_safe
6454 && matches!(
6455 target,
6456 DialectType::Presto | DialectType::Trino | DialectType::Athena
6457 )
6458 {
6459 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
6460 return Ok(Expression::Function(Box::new(Function::new(
6461 "ELEMENT_AT".to_string(),
6462 vec![sub.this.clone(), idx],
6463 ))));
6464 } else {
6465 // DuckDB or non-safe: just use subscript with converted index
6466 return Ok(Expression::Subscript(Box::new(
6467 crate::expressions::Subscript {
6468 this: sub.this.clone(),
6469 index: idx,
6470 },
6471 )));
6472 }
6473 }
6474 }
6475 }
6476
6477 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
6478 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
6479 if let Expression::Length(ref uf) = e {
6480 let arg = uf.this.clone();
6481 let typeof_func = Expression::Function(Box::new(Function::new(
6482 "TYPEOF".to_string(),
6483 vec![arg.clone()],
6484 )));
6485 let blob_cast = Expression::Cast(Box::new(Cast {
6486 this: arg.clone(),
6487 to: DataType::VarBinary { length: None },
6488 trailing_comments: vec![],
6489 double_colon_syntax: false,
6490 format: None,
6491 default: None,
6492 inferred_type: None,
6493 }));
6494 let octet_length = Expression::Function(Box::new(Function::new(
6495 "OCTET_LENGTH".to_string(),
6496 vec![blob_cast],
6497 )));
6498 let text_cast = Expression::Cast(Box::new(Cast {
6499 this: arg,
6500 to: DataType::Text,
6501 trailing_comments: vec![],
6502 double_colon_syntax: false,
6503 format: None,
6504 default: None,
6505 inferred_type: None,
6506 }));
6507 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
6508 this: text_cast,
6509 original_name: None,
6510 inferred_type: None,
6511 }));
6512 return Ok(Expression::Case(Box::new(Case {
6513 operand: Some(typeof_func),
6514 whens: vec![(
6515 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
6516 octet_length,
6517 )],
6518 else_: Some(length_text),
6519 comments: Vec::new(),
6520 inferred_type: None,
6521 })));
6522 }
6523 }
6524
6525 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
6526 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
6527 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
6528 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
6529 if let Expression::Alias(ref a) = e {
6530 if matches!(&a.this, Expression::Unnest(_)) {
6531 if a.column_aliases.is_empty() {
6532 // Drop the entire alias, return just the UNNEST expression
6533 return Ok(a.this.clone());
6534 } else {
6535 // Use first column alias as the main alias
6536 let mut new_alias = a.as_ref().clone();
6537 new_alias.alias = a.column_aliases[0].clone();
6538 new_alias.column_aliases.clear();
6539 return Ok(Expression::Alias(Box::new(new_alias)));
6540 }
6541 }
6542 }
6543 }
6544
6545 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
6546 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
6547 if let Expression::In(ref in_expr) = e {
6548 if let Some(ref unnest_inner) = in_expr.unnest {
6549 // Build the function call for the target dialect
6550 let func_expr = if matches!(
6551 target,
6552 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6553 ) {
6554 // Use EXPLODE for Hive/Spark
6555 Expression::Function(Box::new(Function::new(
6556 "EXPLODE".to_string(),
6557 vec![*unnest_inner.clone()],
6558 )))
6559 } else {
6560 // Use UNNEST for Presto/Trino/DuckDB/etc.
6561 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
6562 this: *unnest_inner.clone(),
6563 expressions: Vec::new(),
6564 with_ordinality: false,
6565 alias: None,
6566 offset_alias: None,
6567 }))
6568 };
6569
6570 // Wrap in SELECT
6571 let mut inner_select = crate::expressions::Select::new();
6572 inner_select.expressions = vec![func_expr];
6573
6574 let subquery_expr = Expression::Select(Box::new(inner_select));
6575
6576 return Ok(Expression::In(Box::new(crate::expressions::In {
6577 this: in_expr.this.clone(),
6578 expressions: Vec::new(),
6579 query: Some(subquery_expr),
6580 not: in_expr.not,
6581 global: in_expr.global,
6582 unnest: None,
6583 is_field: false,
6584 })));
6585 }
6586 }
6587 }
6588
6589 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
6590 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
6591 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
6592 if let Expression::Alias(ref a) = e {
6593 if let Expression::Function(ref f) = a.this {
6594 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
6595 && !a.column_aliases.is_empty()
6596 {
6597 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
6598 let col_alias = a.column_aliases[0].clone();
6599 let mut inner_select = crate::expressions::Select::new();
6600 inner_select.expressions =
6601 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
6602 Expression::Identifier(Identifier::new("value".to_string())),
6603 col_alias,
6604 )))];
6605 inner_select.from = Some(crate::expressions::From {
6606 expressions: vec![a.this.clone()],
6607 });
6608 let subquery =
6609 Expression::Subquery(Box::new(crate::expressions::Subquery {
6610 this: Expression::Select(Box::new(inner_select)),
6611 alias: Some(a.alias.clone()),
6612 column_aliases: Vec::new(),
6613 alias_explicit_as: false,
6614 alias_keyword: None,
6615 order_by: None,
6616 limit: None,
6617 offset: None,
6618 lateral: false,
6619 modifiers_inside: false,
6620 trailing_comments: Vec::new(),
6621 distribute_by: None,
6622 sort_by: None,
6623 cluster_by: None,
6624 inferred_type: None,
6625 }));
6626 return Ok(subquery);
6627 }
6628 }
6629 }
6630 }
6631
6632 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
6633 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
6634 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
6635 if matches!(source, DialectType::BigQuery) {
6636 if let Expression::Select(ref s) = e {
6637 if let Some(ref from) = s.from {
6638 if from.expressions.len() >= 2 {
6639 // Collect table names from first expression
6640 let first_tables: Vec<String> = from
6641 .expressions
6642 .iter()
6643 .take(1)
6644 .filter_map(|expr| {
6645 if let Expression::Table(t) = expr {
6646 Some(t.name.name.to_ascii_lowercase())
6647 } else {
6648 None
6649 }
6650 })
6651 .collect();
6652
6653 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
6654 // or have a dotted name matching a table
6655 let mut needs_rewrite = false;
6656 for expr in from.expressions.iter().skip(1) {
6657 if let Expression::Table(t) = expr {
6658 if let Some(ref schema) = t.schema {
6659 if first_tables.contains(&schema.name.to_ascii_lowercase())
6660 {
6661 needs_rewrite = true;
6662 break;
6663 }
6664 }
6665 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
6666 if t.schema.is_none() && t.name.name.contains('.') {
6667 let parts: Vec<&str> = t.name.name.split('.').collect();
6668 if parts.len() >= 2
6669 && first_tables.contains(&parts[0].to_ascii_lowercase())
6670 {
6671 needs_rewrite = true;
6672 break;
6673 }
6674 }
6675 }
6676 }
6677
6678 if needs_rewrite {
6679 let mut new_select = s.clone();
6680 let mut new_from_exprs = vec![from.expressions[0].clone()];
6681 let mut new_joins = s.joins.clone();
6682
6683 for expr in from.expressions.iter().skip(1) {
6684 if let Expression::Table(ref t) = expr {
6685 if let Some(ref schema) = t.schema {
6686 if first_tables
6687 .contains(&schema.name.to_ascii_lowercase())
6688 {
6689 // This is an array path reference, convert to CROSS JOIN UNNEST
6690 let col_expr = Expression::Column(Box::new(
6691 crate::expressions::Column {
6692 name: t.name.clone(),
6693 table: Some(schema.clone()),
6694 join_mark: false,
6695 trailing_comments: vec![],
6696 span: None,
6697 inferred_type: None,
6698 },
6699 ));
6700 let unnest_expr = Expression::Unnest(Box::new(
6701 crate::expressions::UnnestFunc {
6702 this: col_expr,
6703 expressions: Vec::new(),
6704 with_ordinality: false,
6705 alias: None,
6706 offset_alias: None,
6707 },
6708 ));
6709 let join_this = if let Some(ref alias) = t.alias {
6710 if matches!(
6711 target,
6712 DialectType::Presto
6713 | DialectType::Trino
6714 | DialectType::Athena
6715 ) {
6716 // Presto: UNNEST(x) AS _t0(results)
6717 Expression::Alias(Box::new(
6718 crate::expressions::Alias {
6719 this: unnest_expr,
6720 alias: Identifier::new("_t0"),
6721 column_aliases: vec![alias.clone()],
6722 alias_explicit_as: false,
6723 alias_keyword: None,
6724 pre_alias_comments: vec![],
6725 trailing_comments: vec![],
6726 inferred_type: None,
6727 },
6728 ))
6729 } else {
6730 // BigQuery: UNNEST(x) AS results
6731 Expression::Alias(Box::new(
6732 crate::expressions::Alias {
6733 this: unnest_expr,
6734 alias: alias.clone(),
6735 column_aliases: vec![],
6736 alias_explicit_as: false,
6737 alias_keyword: None,
6738 pre_alias_comments: vec![],
6739 trailing_comments: vec![],
6740 inferred_type: None,
6741 },
6742 ))
6743 }
6744 } else {
6745 unnest_expr
6746 };
6747 new_joins.push(crate::expressions::Join {
6748 kind: crate::expressions::JoinKind::Cross,
6749 this: join_this,
6750 on: None,
6751 using: Vec::new(),
6752 use_inner_keyword: false,
6753 use_outer_keyword: false,
6754 deferred_condition: false,
6755 join_hint: None,
6756 match_condition: None,
6757 pivots: Vec::new(),
6758 comments: Vec::new(),
6759 nesting_group: 0,
6760 directed: false,
6761 });
6762 } else {
6763 new_from_exprs.push(expr.clone());
6764 }
6765 } else if t.schema.is_none() && t.name.name.contains('.') {
6766 // Dotted name in quoted identifier: `Coordinates.position`
6767 let parts: Vec<&str> = t.name.name.split('.').collect();
6768 if parts.len() >= 2
6769 && first_tables
6770 .contains(&parts[0].to_ascii_lowercase())
6771 {
6772 let join_this =
6773 if matches!(target, DialectType::BigQuery) {
6774 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
6775 Expression::Table(t.clone())
6776 } else {
6777 // Other targets: split into "schema"."name"
6778 let mut new_t = t.clone();
6779 new_t.schema =
6780 Some(Identifier::quoted(parts[0]));
6781 new_t.name = Identifier::quoted(parts[1]);
6782 Expression::Table(new_t)
6783 };
6784 new_joins.push(crate::expressions::Join {
6785 kind: crate::expressions::JoinKind::Cross,
6786 this: join_this,
6787 on: None,
6788 using: Vec::new(),
6789 use_inner_keyword: false,
6790 use_outer_keyword: false,
6791 deferred_condition: false,
6792 join_hint: None,
6793 match_condition: None,
6794 pivots: Vec::new(),
6795 comments: Vec::new(),
6796 nesting_group: 0,
6797 directed: false,
6798 });
6799 } else {
6800 new_from_exprs.push(expr.clone());
6801 }
6802 } else {
6803 new_from_exprs.push(expr.clone());
6804 }
6805 } else {
6806 new_from_exprs.push(expr.clone());
6807 }
6808 }
6809
6810 new_select.from = Some(crate::expressions::From {
6811 expressions: new_from_exprs,
6812 ..from.clone()
6813 });
6814 new_select.joins = new_joins;
6815 return Ok(Expression::Select(new_select));
6816 }
6817 }
6818 }
6819 }
6820 }
6821
6822 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
6823 if matches!(
6824 target,
6825 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6826 ) {
6827 if let Expression::Select(ref s) = e {
6828 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
6829 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
6830 matches!(expr, Expression::Unnest(_))
6831 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
6832 };
6833 let has_unnest_join = s.joins.iter().any(|j| {
6834 j.kind == crate::expressions::JoinKind::Cross && (
6835 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
6836 || is_unnest_or_explode_expr(&j.this)
6837 )
6838 });
6839 if has_unnest_join {
6840 let mut select = s.clone();
6841 let mut new_joins = Vec::new();
6842 for join in select.joins.drain(..) {
6843 if join.kind == crate::expressions::JoinKind::Cross {
6844 // Extract the UNNEST/EXPLODE from the join
6845 let (func_expr, table_alias, col_aliases) = match &join.this {
6846 Expression::Alias(a) => {
6847 let ta = if a.alias.is_empty() {
6848 None
6849 } else {
6850 Some(a.alias.clone())
6851 };
6852 let cas = a.column_aliases.clone();
6853 match &a.this {
6854 Expression::Unnest(u) => {
6855 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
6856 if !u.expressions.is_empty() {
6857 let mut all_args = vec![u.this.clone()];
6858 all_args.extend(u.expressions.clone());
6859 let arrays_zip =
6860 Expression::Function(Box::new(
6861 crate::expressions::Function::new(
6862 "ARRAYS_ZIP".to_string(),
6863 all_args,
6864 ),
6865 ));
6866 let inline = Expression::Function(Box::new(
6867 crate::expressions::Function::new(
6868 "INLINE".to_string(),
6869 vec![arrays_zip],
6870 ),
6871 ));
6872 (Some(inline), ta, a.column_aliases.clone())
6873 } else {
6874 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
6875 let func_name = if u.with_ordinality {
6876 "POSEXPLODE"
6877 } else {
6878 "EXPLODE"
6879 };
6880 let explode = Expression::Function(Box::new(
6881 crate::expressions::Function::new(
6882 func_name.to_string(),
6883 vec![u.this.clone()],
6884 ),
6885 ));
6886 // For POSEXPLODE, add 'pos' to column aliases
6887 let cas = if u.with_ordinality {
6888 let mut pos_aliases =
6889 vec![Identifier::new(
6890 "pos".to_string(),
6891 )];
6892 pos_aliases
6893 .extend(a.column_aliases.clone());
6894 pos_aliases
6895 } else {
6896 a.column_aliases.clone()
6897 };
6898 (Some(explode), ta, cas)
6899 }
6900 }
6901 Expression::Function(f)
6902 if f.name.eq_ignore_ascii_case("EXPLODE") =>
6903 {
6904 (Some(Expression::Function(f.clone())), ta, cas)
6905 }
6906 _ => (None, None, Vec::new()),
6907 }
6908 }
6909 Expression::Unnest(u) => {
6910 let func_name = if u.with_ordinality {
6911 "POSEXPLODE"
6912 } else {
6913 "EXPLODE"
6914 };
6915 let explode = Expression::Function(Box::new(
6916 crate::expressions::Function::new(
6917 func_name.to_string(),
6918 vec![u.this.clone()],
6919 ),
6920 ));
6921 let ta = u.alias.clone();
6922 let col_aliases = if u.with_ordinality {
6923 vec![Identifier::new("pos".to_string())]
6924 } else {
6925 Vec::new()
6926 };
6927 (Some(explode), ta, col_aliases)
6928 }
6929 _ => (None, None, Vec::new()),
6930 };
6931 if let Some(func) = func_expr {
6932 select.lateral_views.push(crate::expressions::LateralView {
6933 this: func,
6934 table_alias,
6935 column_aliases: col_aliases,
6936 outer: false,
6937 });
6938 } else {
6939 new_joins.push(join);
6940 }
6941 } else {
6942 new_joins.push(join);
6943 }
6944 }
6945 select.joins = new_joins;
6946 return Ok(Expression::Select(select));
6947 }
6948 }
6949 }
6950
6951 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
6952 // for BigQuery, Presto/Trino, Snowflake
6953 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
6954 && matches!(
6955 target,
6956 DialectType::BigQuery
6957 | DialectType::Presto
6958 | DialectType::Trino
6959 | DialectType::Snowflake
6960 )
6961 {
6962 if let Expression::Select(ref s) = e {
6963 // Check if any SELECT expressions contain UNNEST
6964 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
6965 let has_unnest_in_select = s.expressions.iter().any(|expr| {
6966 fn contains_unnest(e: &Expression) -> bool {
6967 match e {
6968 Expression::Unnest(_) => true,
6969 Expression::Function(f)
6970 if f.name.eq_ignore_ascii_case("UNNEST") =>
6971 {
6972 true
6973 }
6974 Expression::Alias(a) => contains_unnest(&a.this),
6975 Expression::Add(op)
6976 | Expression::Sub(op)
6977 | Expression::Mul(op)
6978 | Expression::Div(op) => {
6979 contains_unnest(&op.left) || contains_unnest(&op.right)
6980 }
6981 _ => false,
6982 }
6983 }
6984 contains_unnest(expr)
6985 });
6986
6987 if has_unnest_in_select {
6988 let rewritten = Self::rewrite_unnest_expansion(s, target);
6989 if let Some(new_select) = rewritten {
6990 return Ok(Expression::Select(Box::new(new_select)));
6991 }
6992 }
6993 }
6994 }
6995
6996 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
6997 // BigQuery '\n' -> PostgreSQL literal newline in string
6998 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
6999 {
7000 if let Expression::Literal(ref lit) = e {
7001 if let Literal::String(ref s) = lit.as_ref() {
7002 if s.contains("\\n")
7003 || s.contains("\\t")
7004 || s.contains("\\r")
7005 || s.contains("\\\\")
7006 {
7007 let converted = s
7008 .replace("\\n", "\n")
7009 .replace("\\t", "\t")
7010 .replace("\\r", "\r")
7011 .replace("\\\\", "\\");
7012 return Ok(Expression::Literal(Box::new(Literal::String(converted))));
7013 }
7014 }
7015 }
7016 }
7017
7018 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
7019 // when source != target (identity tests keep the Literal::Timestamp for native handling)
7020 if source != target {
7021 if let Expression::Literal(ref lit) = e {
7022 if let Literal::Timestamp(ref s) = lit.as_ref() {
7023 let s = s.clone();
7024 // MySQL: TIMESTAMP handling depends on source dialect
7025 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
7026 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
7027 if matches!(target, DialectType::MySQL) {
7028 if matches!(source, DialectType::BigQuery) {
7029 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
7030 return Ok(Expression::Function(Box::new(Function::new(
7031 "TIMESTAMP".to_string(),
7032 vec![Expression::Literal(Box::new(Literal::String(s)))],
7033 ))));
7034 } else {
7035 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
7036 return Ok(Expression::Cast(Box::new(Cast {
7037 this: Expression::Literal(Box::new(Literal::String(s))),
7038 to: DataType::Custom {
7039 name: "DATETIME".to_string(),
7040 },
7041 trailing_comments: Vec::new(),
7042 double_colon_syntax: false,
7043 format: None,
7044 default: None,
7045 inferred_type: None,
7046 })));
7047 }
7048 }
7049 let dt = match target {
7050 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
7051 name: "DATETIME".to_string(),
7052 },
7053 DialectType::Snowflake => {
7054 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
7055 if matches!(source, DialectType::BigQuery) {
7056 DataType::Custom {
7057 name: "TIMESTAMPTZ".to_string(),
7058 }
7059 } else if matches!(
7060 source,
7061 DialectType::PostgreSQL
7062 | DialectType::Redshift
7063 | DialectType::Snowflake
7064 ) {
7065 DataType::Timestamp {
7066 precision: None,
7067 timezone: false,
7068 }
7069 } else {
7070 DataType::Custom {
7071 name: "TIMESTAMPNTZ".to_string(),
7072 }
7073 }
7074 }
7075 DialectType::Spark | DialectType::Databricks => {
7076 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
7077 if matches!(source, DialectType::BigQuery) {
7078 DataType::Timestamp {
7079 precision: None,
7080 timezone: false,
7081 }
7082 } else {
7083 DataType::Custom {
7084 name: "TIMESTAMP_NTZ".to_string(),
7085 }
7086 }
7087 }
7088 DialectType::ClickHouse => DataType::Nullable {
7089 inner: Box::new(DataType::Custom {
7090 name: "DateTime".to_string(),
7091 }),
7092 },
7093 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
7094 name: "DATETIME2".to_string(),
7095 },
7096 DialectType::DuckDB => {
7097 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
7098 // or when the timestamp string explicitly has timezone info
7099 if matches!(source, DialectType::BigQuery)
7100 || Self::timestamp_string_has_timezone(&s)
7101 {
7102 DataType::Custom {
7103 name: "TIMESTAMPTZ".to_string(),
7104 }
7105 } else {
7106 DataType::Timestamp {
7107 precision: None,
7108 timezone: false,
7109 }
7110 }
7111 }
7112 _ => DataType::Timestamp {
7113 precision: None,
7114 timezone: false,
7115 },
7116 };
7117 return Ok(Expression::Cast(Box::new(Cast {
7118 this: Expression::Literal(Box::new(Literal::String(s))),
7119 to: dt,
7120 trailing_comments: vec![],
7121 double_colon_syntax: false,
7122 format: None,
7123 default: None,
7124 inferred_type: None,
7125 })));
7126 }
7127 }
7128 }
7129
7130 // PostgreSQL DELETE requires explicit AS for table aliases
7131 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
7132 if let Expression::Delete(ref del) = e {
7133 if del.alias.is_some() && !del.alias_explicit_as {
7134 let mut new_del = del.clone();
7135 new_del.alias_explicit_as = true;
7136 return Ok(Expression::Delete(new_del));
7137 }
7138 }
7139 }
7140
7141 // UNION/INTERSECT/EXCEPT DISTINCT handling:
7142 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
7143 // while others don't support it (Presto, Spark, DuckDB, etc.)
7144 {
7145 let needs_distinct =
7146 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
7147 let drop_distinct = matches!(
7148 target,
7149 DialectType::Presto
7150 | DialectType::Trino
7151 | DialectType::Athena
7152 | DialectType::Spark
7153 | DialectType::Databricks
7154 | DialectType::DuckDB
7155 | DialectType::Hive
7156 | DialectType::MySQL
7157 | DialectType::PostgreSQL
7158 | DialectType::SQLite
7159 | DialectType::TSQL
7160 | DialectType::Redshift
7161 | DialectType::Snowflake
7162 | DialectType::Oracle
7163 | DialectType::Teradata
7164 | DialectType::Drill
7165 | DialectType::Doris
7166 | DialectType::StarRocks
7167 );
7168 match &e {
7169 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
7170 let mut new_u = (**u).clone();
7171 new_u.distinct = true;
7172 return Ok(Expression::Union(Box::new(new_u)));
7173 }
7174 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
7175 let mut new_i = (**i).clone();
7176 new_i.distinct = true;
7177 return Ok(Expression::Intersect(Box::new(new_i)));
7178 }
7179 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
7180 let mut new_ex = (**ex).clone();
7181 new_ex.distinct = true;
7182 return Ok(Expression::Except(Box::new(new_ex)));
7183 }
7184 Expression::Union(u) if u.distinct && drop_distinct => {
7185 let mut new_u = (**u).clone();
7186 new_u.distinct = false;
7187 return Ok(Expression::Union(Box::new(new_u)));
7188 }
7189 Expression::Intersect(i) if i.distinct && drop_distinct => {
7190 let mut new_i = (**i).clone();
7191 new_i.distinct = false;
7192 return Ok(Expression::Intersect(Box::new(new_i)));
7193 }
7194 Expression::Except(ex) if ex.distinct && drop_distinct => {
7195 let mut new_ex = (**ex).clone();
7196 new_ex.distinct = false;
7197 return Ok(Expression::Except(Box::new(new_ex)));
7198 }
7199 _ => {}
7200 }
7201 }
7202
7203 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
7204 if matches!(target, DialectType::ClickHouse) {
7205 if let Expression::Function(ref f) = e {
7206 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
7207 let mut new_f = f.as_ref().clone();
7208 new_f.name = "map".to_string();
7209 return Ok(Expression::Function(Box::new(new_f)));
7210 }
7211 }
7212 }
7213
7214 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
7215 if matches!(target, DialectType::ClickHouse) {
7216 if let Expression::Intersect(ref i) = e {
7217 if i.all {
7218 let mut new_i = (**i).clone();
7219 new_i.all = false;
7220 return Ok(Expression::Intersect(Box::new(new_i)));
7221 }
7222 }
7223 }
7224
7225 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
7226 // Only from Generic source, to prevent double-wrapping
7227 if matches!(source, DialectType::Generic) {
7228 if let Expression::Div(ref op) = e {
7229 let cast_type = match target {
7230 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
7231 precision: None,
7232 scale: None,
7233 real_spelling: false,
7234 }),
7235 DialectType::Drill
7236 | DialectType::Trino
7237 | DialectType::Athena
7238 | DialectType::Presto => Some(DataType::Double {
7239 precision: None,
7240 scale: None,
7241 }),
7242 DialectType::PostgreSQL
7243 | DialectType::Redshift
7244 | DialectType::Materialize
7245 | DialectType::Teradata
7246 | DialectType::RisingWave => Some(DataType::Double {
7247 precision: None,
7248 scale: None,
7249 }),
7250 _ => None,
7251 };
7252 if let Some(dt) = cast_type {
7253 let cast_left = Expression::Cast(Box::new(Cast {
7254 this: op.left.clone(),
7255 to: dt,
7256 double_colon_syntax: false,
7257 trailing_comments: Vec::new(),
7258 format: None,
7259 default: None,
7260 inferred_type: None,
7261 }));
7262 let new_op = crate::expressions::BinaryOp {
7263 left: cast_left,
7264 right: op.right.clone(),
7265 left_comments: op.left_comments.clone(),
7266 operator_comments: op.operator_comments.clone(),
7267 trailing_comments: op.trailing_comments.clone(),
7268 inferred_type: None,
7269 };
7270 return Ok(Expression::Div(Box::new(new_op)));
7271 }
7272 }
7273 }
7274
7275 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
7276 if matches!(target, DialectType::DuckDB) {
7277 if let Expression::CreateDatabase(db) = e {
7278 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
7279 schema.if_not_exists = db.if_not_exists;
7280 return Ok(Expression::CreateSchema(Box::new(schema)));
7281 }
7282 if let Expression::DropDatabase(db) = e {
7283 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
7284 schema.if_exists = db.if_exists;
7285 return Ok(Expression::DropSchema(Box::new(schema)));
7286 }
7287 }
7288
7289 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
7290 if matches!(source, DialectType::ClickHouse)
7291 && !matches!(target, DialectType::ClickHouse)
7292 {
7293 if let Expression::Cast(ref c) = e {
7294 if let DataType::Custom { ref name } = c.to {
7295 if name.len() >= 9
7296 && name[..9].eq_ignore_ascii_case("NULLABLE(")
7297 && name.ends_with(")")
7298 {
7299 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
7300 let inner_upper = inner.to_ascii_uppercase();
7301 let new_dt = match inner_upper.as_str() {
7302 "DATETIME" | "DATETIME64" => DataType::Timestamp {
7303 precision: None,
7304 timezone: false,
7305 },
7306 "DATE" => DataType::Date,
7307 "INT64" | "BIGINT" => DataType::BigInt { length: None },
7308 "INT32" | "INT" | "INTEGER" => DataType::Int {
7309 length: None,
7310 integer_spelling: false,
7311 },
7312 "FLOAT64" | "DOUBLE" => DataType::Double {
7313 precision: None,
7314 scale: None,
7315 },
7316 "STRING" => DataType::Text,
7317 _ => DataType::Custom {
7318 name: inner.to_string(),
7319 },
7320 };
7321 let mut new_cast = c.clone();
7322 new_cast.to = new_dt;
7323 return Ok(Expression::Cast(new_cast));
7324 }
7325 }
7326 }
7327 }
7328
7329 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
7330 if matches!(target, DialectType::Snowflake) {
7331 if let Expression::ArrayConcatAgg(ref agg) = e {
7332 let mut agg_clone = agg.as_ref().clone();
7333 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
7334 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
7335 let flatten = Expression::Function(Box::new(Function::new(
7336 "ARRAY_FLATTEN".to_string(),
7337 vec![array_agg],
7338 )));
7339 return Ok(flatten);
7340 }
7341 }
7342
7343 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
7344 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
7345 if let Expression::ArrayConcatAgg(agg) = e {
7346 let arg = agg.this;
7347 return Ok(Expression::Function(Box::new(Function::new(
7348 "ARRAY_CONCAT_AGG".to_string(),
7349 vec![arg],
7350 ))));
7351 }
7352 }
7353
7354 // Determine what action to take by inspecting e immutably
7355 let action = {
7356 let source_propagates_nulls =
7357 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
7358 let target_ignores_nulls =
7359 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
7360
7361 match &e {
7362 Expression::Function(f) => {
7363 let name = f.name.to_ascii_uppercase();
7364 // DuckDB json(x) is a synonym for CAST(x AS JSON) — parses a string.
7365 // Map to JSON_PARSE(x) for Trino/Presto/Athena to preserve semantics.
7366 if name == "JSON"
7367 && f.args.len() == 1
7368 && matches!(source, DialectType::DuckDB)
7369 && matches!(
7370 target,
7371 DialectType::Presto | DialectType::Trino | DialectType::Athena
7372 )
7373 {
7374 Action::DuckDBJsonFuncToJsonParse
7375 // DuckDB json_valid(x) has no direct Trino equivalent; emit the
7376 // SQL:2016 `x IS JSON` predicate which has matching semantics.
7377 } else if name == "JSON_VALID"
7378 && f.args.len() == 1
7379 && matches!(source, DialectType::DuckDB)
7380 && matches!(
7381 target,
7382 DialectType::Presto | DialectType::Trino | DialectType::Athena
7383 )
7384 {
7385 Action::DuckDBJsonValidToIsJson
7386 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
7387 } else if (name == "DATE_PART" || name == "DATEPART")
7388 && f.args.len() == 2
7389 && matches!(target, DialectType::Snowflake)
7390 && !matches!(source, DialectType::Snowflake)
7391 && matches!(
7392 &f.args[0],
7393 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7394 )
7395 {
7396 Action::DatePartUnquote
7397 } else if source_propagates_nulls
7398 && target_ignores_nulls
7399 && (name == "GREATEST" || name == "LEAST")
7400 && f.args.len() >= 2
7401 {
7402 Action::GreatestLeastNull
7403 } else if matches!(source, DialectType::Snowflake)
7404 && name == "ARRAY_GENERATE_RANGE"
7405 && f.args.len() >= 2
7406 {
7407 Action::ArrayGenerateRange
7408 } else if matches!(source, DialectType::Snowflake)
7409 && matches!(target, DialectType::DuckDB)
7410 && name == "DATE_TRUNC"
7411 && f.args.len() == 2
7412 {
7413 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
7414 // Logic based on Python sqlglot's input_type_preserved flag:
7415 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
7416 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
7417 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
7418 let unit_str = match &f.args[0] {
7419 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
7420 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
7421 Some(s.to_ascii_uppercase())
7422 }
7423 _ => None,
7424 };
7425 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
7426 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
7427 });
7428 match &f.args[1] {
7429 Expression::Cast(c) => match &c.to {
7430 DataType::Time { .. } => Action::DateTruncWrapCast,
7431 DataType::Custom { name }
7432 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
7433 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
7434 {
7435 Action::DateTruncWrapCast
7436 }
7437 DataType::Timestamp { timezone: true, .. } => {
7438 Action::DateTruncWrapCast
7439 }
7440 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
7441 DataType::Timestamp {
7442 timezone: false, ..
7443 } if is_date_unit => Action::DateTruncWrapCast,
7444 _ => Action::None,
7445 },
7446 _ => Action::None,
7447 }
7448 } else if matches!(source, DialectType::Snowflake)
7449 && matches!(target, DialectType::DuckDB)
7450 && name == "TO_DATE"
7451 && f.args.len() == 1
7452 && !matches!(
7453 &f.args[0],
7454 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7455 )
7456 {
7457 Action::ToDateToCast
7458 } else if !matches!(source, DialectType::Redshift)
7459 && matches!(target, DialectType::Redshift)
7460 && name == "CONVERT_TIMEZONE"
7461 && (f.args.len() == 2 || f.args.len() == 3)
7462 {
7463 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
7464 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
7465 // The Redshift parser adds 'UTC' as default source_tz, but when
7466 // transpiling from other dialects, we should preserve the original form.
7467 Action::ConvertTimezoneToExpr
7468 } else if matches!(source, DialectType::Snowflake)
7469 && matches!(target, DialectType::DuckDB)
7470 && name == "REGEXP_REPLACE"
7471 && f.args.len() == 4
7472 && !matches!(
7473 &f.args[3],
7474 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7475 )
7476 {
7477 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
7478 Action::RegexpReplaceSnowflakeToDuckDB
7479 } else if matches!(source, DialectType::Snowflake)
7480 && matches!(target, DialectType::DuckDB)
7481 && name == "REGEXP_REPLACE"
7482 && f.args.len() == 5
7483 {
7484 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB
7485 Action::RegexpReplacePositionSnowflakeToDuckDB
7486 } else if matches!(source, DialectType::Snowflake)
7487 && matches!(target, DialectType::DuckDB)
7488 && name == "REGEXP_SUBSTR"
7489 {
7490 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
7491 Action::RegexpSubstrSnowflakeToDuckDB
7492 } else if matches!(source, DialectType::Snowflake)
7493 && matches!(target, DialectType::Snowflake)
7494 && (name == "REGEXP_SUBSTR" || name == "REGEXP_SUBSTR_ALL")
7495 && f.args.len() == 6
7496 {
7497 // Snowflake identity: strip trailing group=0
7498 Action::RegexpSubstrSnowflakeIdentity
7499 } else if matches!(source, DialectType::Snowflake)
7500 && matches!(target, DialectType::DuckDB)
7501 && name == "REGEXP_SUBSTR_ALL"
7502 {
7503 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
7504 Action::RegexpSubstrAllSnowflakeToDuckDB
7505 } else if matches!(source, DialectType::Snowflake)
7506 && matches!(target, DialectType::DuckDB)
7507 && name == "REGEXP_COUNT"
7508 {
7509 // Snowflake REGEXP_COUNT -> DuckDB LENGTH(REGEXP_EXTRACT_ALL(...))
7510 Action::RegexpCountSnowflakeToDuckDB
7511 } else if matches!(source, DialectType::Snowflake)
7512 && matches!(target, DialectType::DuckDB)
7513 && name == "REGEXP_INSTR"
7514 {
7515 // Snowflake REGEXP_INSTR -> DuckDB complex CASE expression
7516 Action::RegexpInstrSnowflakeToDuckDB
7517 } else if matches!(source, DialectType::BigQuery)
7518 && matches!(target, DialectType::Snowflake)
7519 && name == "REGEXP_EXTRACT_ALL"
7520 {
7521 // BigQuery REGEXP_EXTRACT_ALL -> Snowflake REGEXP_SUBSTR_ALL
7522 Action::RegexpExtractAllToSnowflake
7523 } else if name == "_BQ_TO_HEX" {
7524 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
7525 Action::BigQueryToHexBare
7526 } else if matches!(source, DialectType::BigQuery)
7527 && !matches!(target, DialectType::BigQuery)
7528 {
7529 // BigQuery-specific functions that need to be converted to standard forms
7530 match name.as_str() {
7531 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
7532 | "DATE_DIFF"
7533 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
7534 | "DATETIME_ADD" | "DATETIME_SUB"
7535 | "TIME_ADD" | "TIME_SUB"
7536 | "DATE_ADD" | "DATE_SUB"
7537 | "SAFE_DIVIDE"
7538 | "GENERATE_UUID"
7539 | "COUNTIF"
7540 | "EDIT_DISTANCE"
7541 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
7542 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
7543 | "TO_HEX"
7544 | "TO_JSON_STRING"
7545 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
7546 | "DIV"
7547 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
7548 | "LAST_DAY"
7549 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
7550 | "REGEXP_CONTAINS"
7551 | "CONTAINS_SUBSTR"
7552 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
7553 | "SAFE_CAST"
7554 | "GENERATE_DATE_ARRAY"
7555 | "PARSE_DATE" | "PARSE_TIMESTAMP"
7556 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
7557 | "ARRAY_CONCAT"
7558 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
7559 | "INSTR"
7560 | "MD5" | "SHA1" | "SHA256" | "SHA512"
7561 | "GENERATE_UUID()" // just in case
7562 | "REGEXP_EXTRACT_ALL"
7563 | "REGEXP_EXTRACT"
7564 | "INT64"
7565 | "ARRAY_CONCAT_AGG"
7566 | "DATE_DIFF(" // just in case
7567 | "TO_HEX_MD5" // internal
7568 | "MOD"
7569 | "CONCAT"
7570 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
7571 | "STRUCT"
7572 | "ROUND"
7573 | "MAKE_INTERVAL"
7574 | "ARRAY_TO_STRING"
7575 | "PERCENTILE_CONT"
7576 => Action::BigQueryFunctionNormalize,
7577 "ARRAY" if matches!(target, DialectType::Snowflake)
7578 && f.args.len() == 1
7579 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
7580 => Action::BigQueryArraySelectAsStructToSnowflake,
7581 _ => Action::None,
7582 }
7583 } else if matches!(source, DialectType::BigQuery)
7584 && matches!(target, DialectType::BigQuery)
7585 {
7586 // BigQuery -> BigQuery normalizations
7587 match name.as_str() {
7588 "TIMESTAMP_DIFF"
7589 | "DATETIME_DIFF"
7590 | "TIME_DIFF"
7591 | "DATE_DIFF"
7592 | "DATE_ADD"
7593 | "TO_HEX"
7594 | "CURRENT_TIMESTAMP"
7595 | "CURRENT_DATE"
7596 | "CURRENT_TIME"
7597 | "CURRENT_DATETIME"
7598 | "GENERATE_DATE_ARRAY"
7599 | "INSTR"
7600 | "FORMAT_DATETIME"
7601 | "DATETIME"
7602 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
7603 _ => Action::None,
7604 }
7605 } else {
7606 // Generic function normalization for non-BigQuery sources
7607 match name.as_str() {
7608 "ARBITRARY" | "AGGREGATE"
7609 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
7610 | "STRUCT_EXTRACT"
7611 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
7612 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
7613 | "SUBSTRINGINDEX"
7614 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
7615 | "UNICODE"
7616 | "XOR"
7617 | "ARRAY_REVERSE_SORT"
7618 | "ENCODE" | "DECODE"
7619 | "QUANTILE"
7620 | "EPOCH" | "EPOCH_MS"
7621 | "HASHBYTES"
7622 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
7623 | "APPROX_DISTINCT"
7624 | "DATE_PARSE" | "FORMAT_DATETIME"
7625 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
7626 | "RLIKE"
7627 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
7628 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
7629 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
7630 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
7631 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
7632 | "MAP" | "MAP_FROM_ENTRIES"
7633 | "COLLECT_LIST" | "COLLECT_SET"
7634 | "ISNAN" | "IS_NAN"
7635 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
7636 | "FORMAT_NUMBER"
7637 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
7638 | "ELEMENT_AT"
7639 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
7640 | "SPLIT_PART"
7641 // GENERATE_SERIES: handled separately below
7642 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
7643 | "JSON_QUERY" | "JSON_VALUE"
7644 | "JSON_SEARCH"
7645 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
7646 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
7647 | "CURDATE" | "CURTIME"
7648 | "ARRAY_TO_STRING"
7649 | "ARRAY_SORT" | "SORT_ARRAY"
7650 | "LEFT" | "RIGHT"
7651 | "MAP_FROM_ARRAYS"
7652 | "LIKE" | "ILIKE"
7653 | "ARRAY_CONCAT" | "LIST_CONCAT"
7654 | "QUANTILE_CONT" | "QUANTILE_DISC"
7655 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
7656 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
7657 | "LOCATE" | "STRPOS" | "INSTR"
7658 | "CHAR"
7659 // CONCAT: handled separately for COALESCE wrapping
7660 | "ARRAY_JOIN"
7661 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
7662 | "ISNULL"
7663 | "MONTHNAME"
7664 | "TO_TIMESTAMP"
7665 | "TO_DATE"
7666 | "TO_JSON"
7667 | "REGEXP_SPLIT"
7668 | "SPLIT"
7669 | "FORMATDATETIME"
7670 | "ARRAYJOIN"
7671 | "SPLITBYSTRING" | "SPLITBYREGEXP"
7672 | "NVL"
7673 | "TO_CHAR"
7674 | "DBMS_RANDOM.VALUE"
7675 | "REGEXP_LIKE"
7676 | "REPLICATE"
7677 | "LEN"
7678 | "COUNT_BIG"
7679 | "DATEFROMPARTS"
7680 | "DATETIMEFROMPARTS"
7681 | "CONVERT" | "TRY_CONVERT"
7682 | "STRFTIME" | "STRPTIME"
7683 | "DATE_FORMAT" | "FORMAT_DATE"
7684 | "PARSE_TIMESTAMP" | "PARSE_DATE"
7685 | "FROM_BASE64" | "TO_BASE64"
7686 | "GETDATE"
7687 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
7688 | "TO_UTF8" | "FROM_UTF8"
7689 | "STARTS_WITH" | "STARTSWITH"
7690 | "APPROX_COUNT_DISTINCT"
7691 | "JSON_FORMAT"
7692 | "SYSDATE"
7693 | "LOGICAL_OR" | "LOGICAL_AND"
7694 | "MONTHS_ADD"
7695 | "SCHEMA_NAME"
7696 | "STRTOL"
7697 | "EDITDIST3"
7698 | "FORMAT"
7699 | "LIST_CONTAINS" | "LIST_HAS"
7700 | "VARIANCE" | "STDDEV"
7701 | "ISINF"
7702 | "TO_UNIXTIME"
7703 | "FROM_UNIXTIME"
7704 | "DATEPART" | "DATE_PART"
7705 | "DATENAME"
7706 | "STRING_AGG"
7707 | "JSON_ARRAYAGG"
7708 | "APPROX_QUANTILE"
7709 | "MAKE_DATE"
7710 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
7711 | "RANGE"
7712 | "TRY_ELEMENT_AT"
7713 | "STR_TO_MAP"
7714 | "STRING"
7715 | "STR_TO_TIME"
7716 | "CURRENT_SCHEMA"
7717 | "LTRIM" | "RTRIM"
7718 | "UUID"
7719 | "FARM_FINGERPRINT"
7720 | "JSON_KEYS"
7721 | "WEEKOFYEAR"
7722 | "CONCAT_WS"
7723 | "TRY_DIVIDE"
7724 | "ARRAY_SLICE"
7725 | "ARRAY_PREPEND"
7726 | "ARRAY_REMOVE"
7727 | "GENERATE_DATE_ARRAY"
7728 | "PARSE_JSON"
7729 | "JSON_REMOVE"
7730 | "JSON_SET"
7731 | "LEVENSHTEIN"
7732 | "CURRENT_VERSION"
7733 | "ARRAY_MAX"
7734 | "ARRAY_MIN"
7735 | "JAROWINKLER_SIMILARITY"
7736 | "CURRENT_SCHEMAS"
7737 | "TO_VARIANT"
7738 | "JSON_GROUP_ARRAY" | "JSON_GROUP_OBJECT"
7739 | "ARRAYS_OVERLAP" | "ARRAY_INTERSECTION"
7740 => Action::GenericFunctionNormalize,
7741 // Canonical date functions -> dialect-specific
7742 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
7743 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
7744 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
7745 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
7746 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
7747 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
7748 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
7749 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
7750 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
7751 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
7752 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
7753 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
7754 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
7755 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
7756 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
7757 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
7758 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
7759 // STR_TO_DATE(x, fmt) -> dialect-specific
7760 "STR_TO_DATE" if f.args.len() == 2
7761 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
7762 "STR_TO_DATE" => Action::GenericFunctionNormalize,
7763 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
7764 "TS_OR_DS_ADD" if f.args.len() == 3
7765 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
7766 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
7767 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
7768 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
7769 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
7770 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
7771 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
7772 // IS_ASCII(x) -> dialect-specific
7773 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
7774 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
7775 "STR_POSITION" => Action::StrPositionConvert,
7776 // ARRAY_SUM -> dialect-specific
7777 "ARRAY_SUM" => Action::ArraySumConvert,
7778 // ARRAY_SIZE -> dialect-specific (Drill only)
7779 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
7780 // ARRAY_ANY -> dialect-specific
7781 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
7782 // Functions needing specific cross-dialect transforms
7783 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
7784 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
7785 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
7786 "ARRAY" if matches!(source, DialectType::BigQuery)
7787 && matches!(target, DialectType::Snowflake)
7788 && f.args.len() == 1
7789 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
7790 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
7791 "TRUNC" if f.args.len() == 2 && matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
7792 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 && !f.args.get(1).map_or(false, |a| matches!(a, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) => Action::GenericFunctionNormalize,
7793 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
7794 "DATE_TRUNC" if f.args.len() == 2
7795 && matches!(source, DialectType::Generic)
7796 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
7797 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
7798 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
7799 "TIMESTAMP_TRUNC" if f.args.len() >= 2
7800 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
7801 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
7802 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
7803 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7804 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
7805 // GENERATE_SERIES with interval normalization for PG target
7806 "GENERATE_SERIES" if f.args.len() >= 3
7807 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7808 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
7809 "GENERATE_SERIES" => Action::None, // passthrough for other cases
7810 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
7811 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7812 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
7813 "CONCAT" => Action::GenericFunctionNormalize,
7814 // DIV(a, b) -> target-specific integer division
7815 "DIV" if f.args.len() == 2
7816 && matches!(source, DialectType::PostgreSQL)
7817 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
7818 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
7819 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
7820 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
7821 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
7822 "JSONB_EXISTS" if f.args.len() == 2
7823 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
7824 // DATE_BIN -> TIME_BUCKET for DuckDB
7825 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
7826 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
7827 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
7828 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
7829 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
7830 // ClickHouse any -> ANY_VALUE for other dialects
7831 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
7832 _ => Action::None,
7833 }
7834 }
7835 }
7836 Expression::AggregateFunction(af) => {
7837 let name = af.name.to_ascii_uppercase();
7838 match name.as_str() {
7839 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
7840 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
7841 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
7842 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
7843 if matches!(target, DialectType::DuckDB) =>
7844 {
7845 Action::JsonObjectAggConvert
7846 }
7847 "ARRAY_AGG"
7848 if matches!(
7849 target,
7850 DialectType::Hive
7851 | DialectType::Spark
7852 | DialectType::Databricks
7853 ) =>
7854 {
7855 Action::ArrayAggToCollectList
7856 }
7857 "MAX_BY" | "MIN_BY"
7858 if matches!(
7859 target,
7860 DialectType::ClickHouse
7861 | DialectType::Spark
7862 | DialectType::Databricks
7863 | DialectType::DuckDB
7864 ) =>
7865 {
7866 Action::MaxByMinByConvert
7867 }
7868 "COLLECT_LIST"
7869 if matches!(
7870 target,
7871 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
7872 ) =>
7873 {
7874 Action::CollectListToArrayAgg
7875 }
7876 "COLLECT_SET"
7877 if matches!(
7878 target,
7879 DialectType::Presto
7880 | DialectType::Trino
7881 | DialectType::Snowflake
7882 | DialectType::DuckDB
7883 ) =>
7884 {
7885 Action::CollectSetConvert
7886 }
7887 "PERCENTILE"
7888 if matches!(
7889 target,
7890 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
7891 ) =>
7892 {
7893 Action::PercentileConvert
7894 }
7895 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
7896 "CORR"
7897 if matches!(target, DialectType::DuckDB)
7898 && matches!(source, DialectType::Snowflake) =>
7899 {
7900 Action::CorrIsnanWrap
7901 }
7902 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
7903 "APPROX_QUANTILES"
7904 if matches!(source, DialectType::BigQuery)
7905 && matches!(target, DialectType::DuckDB) =>
7906 {
7907 Action::BigQueryApproxQuantiles
7908 }
7909 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
7910 "PERCENTILE_CONT"
7911 if matches!(source, DialectType::BigQuery)
7912 && matches!(target, DialectType::DuckDB)
7913 && af.args.len() >= 2 =>
7914 {
7915 Action::BigQueryPercentileContToDuckDB
7916 }
7917 _ => Action::None,
7918 }
7919 }
7920 Expression::JSONArrayAgg(_) => match target {
7921 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
7922 _ => Action::None,
7923 },
7924 Expression::ToNumber(tn) => {
7925 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
7926 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
7927 match target {
7928 DialectType::Oracle
7929 | DialectType::Snowflake
7930 | DialectType::Teradata => Action::None,
7931 _ => Action::GenericFunctionNormalize,
7932 }
7933 } else {
7934 Action::None
7935 }
7936 }
7937 Expression::Nvl2(_) => {
7938 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
7939 // Keep as NVL2 for dialects that support it natively
7940 match target {
7941 DialectType::Oracle
7942 | DialectType::Snowflake
7943 | DialectType::Teradata
7944 | DialectType::Spark
7945 | DialectType::Databricks
7946 | DialectType::Redshift => Action::None,
7947 _ => Action::Nvl2Expand,
7948 }
7949 }
7950 Expression::Decode(_) | Expression::DecodeCase(_) => {
7951 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
7952 // Keep as DECODE for Oracle/Snowflake
7953 match target {
7954 DialectType::Oracle | DialectType::Snowflake => Action::None,
7955 _ => Action::DecodeSimplify,
7956 }
7957 }
7958 Expression::Coalesce(ref cf) => {
7959 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
7960 // BigQuery keeps IFNULL natively when source is also BigQuery
7961 if cf.original_name.as_deref() == Some("IFNULL")
7962 && !(matches!(source, DialectType::BigQuery)
7963 && matches!(target, DialectType::BigQuery))
7964 {
7965 Action::IfnullToCoalesce
7966 } else {
7967 Action::None
7968 }
7969 }
7970 Expression::IfFunc(if_func) => {
7971 if matches!(source, DialectType::Snowflake)
7972 && matches!(
7973 target,
7974 DialectType::Presto | DialectType::Trino | DialectType::SQLite
7975 )
7976 && matches!(if_func.false_value, Some(Expression::Div(_)))
7977 {
7978 Action::Div0TypedDivision
7979 } else {
7980 Action::None
7981 }
7982 }
7983 Expression::ToJson(_) => match target {
7984 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
7985 DialectType::BigQuery => Action::ToJsonConvert,
7986 DialectType::DuckDB => Action::ToJsonConvert,
7987 _ => Action::None,
7988 },
7989 Expression::ArrayAgg(ref agg) => {
7990 if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
7991 Action::ArrayAggToGroupConcat
7992 } else if matches!(
7993 target,
7994 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7995 ) {
7996 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
7997 Action::ArrayAggToCollectList
7998 } else if matches!(
7999 source,
8000 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8001 ) && matches!(target, DialectType::DuckDB)
8002 && agg.filter.is_some()
8003 {
8004 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
8005 // Need to add NOT x IS NULL to existing filter
8006 Action::ArrayAggNullFilter
8007 } else if matches!(target, DialectType::DuckDB)
8008 && agg.ignore_nulls == Some(true)
8009 && !agg.order_by.is_empty()
8010 {
8011 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
8012 Action::ArrayAggIgnoreNullsDuckDB
8013 } else if !matches!(source, DialectType::Snowflake) {
8014 Action::None
8015 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
8016 let is_array_agg = agg.name.as_deref().map_or(false, |n| n.eq_ignore_ascii_case("ARRAY_AGG"))
8017 || agg.name.is_none();
8018 if is_array_agg {
8019 Action::ArrayAggCollectList
8020 } else {
8021 Action::None
8022 }
8023 } else if matches!(
8024 target,
8025 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
8026 ) && agg.filter.is_none()
8027 {
8028 Action::ArrayAggFilter
8029 } else {
8030 Action::None
8031 }
8032 }
8033 Expression::WithinGroup(wg) => {
8034 if matches!(source, DialectType::Snowflake)
8035 && matches!(
8036 target,
8037 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
8038 )
8039 && matches!(wg.this, Expression::ArrayAgg(_))
8040 {
8041 Action::ArrayAggWithinGroupFilter
8042 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
8043 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
8044 || matches!(&wg.this, Expression::StringAgg(_))
8045 {
8046 Action::StringAggConvert
8047 } else if matches!(
8048 target,
8049 DialectType::Presto
8050 | DialectType::Trino
8051 | DialectType::Athena
8052 | DialectType::Spark
8053 | DialectType::Databricks
8054 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
8055 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
8056 || matches!(&wg.this, Expression::PercentileCont(_)))
8057 {
8058 Action::PercentileContConvert
8059 } else {
8060 Action::None
8061 }
8062 }
8063 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
8064 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
8065 // DATETIME is the timezone-unaware type
8066 Expression::Cast(ref c) => {
8067 if c.format.is_some()
8068 && (matches!(source, DialectType::BigQuery)
8069 || matches!(source, DialectType::Teradata))
8070 {
8071 Action::BigQueryCastFormat
8072 } else if matches!(target, DialectType::BigQuery)
8073 && !matches!(source, DialectType::BigQuery)
8074 && matches!(
8075 c.to,
8076 DataType::Timestamp {
8077 timezone: false,
8078 ..
8079 }
8080 )
8081 {
8082 Action::CastTimestampToDatetime
8083 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
8084 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
8085 && matches!(
8086 c.to,
8087 DataType::Timestamp {
8088 timezone: false,
8089 ..
8090 }
8091 )
8092 {
8093 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
8094 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
8095 Action::CastTimestampToDatetime
8096 } else if matches!(
8097 source,
8098 DialectType::Hive | DialectType::Spark | DialectType::Databricks
8099 ) && matches!(
8100 target,
8101 DialectType::Presto
8102 | DialectType::Trino
8103 | DialectType::Athena
8104 | DialectType::DuckDB
8105 | DialectType::Snowflake
8106 | DialectType::BigQuery
8107 | DialectType::Databricks
8108 | DialectType::TSQL
8109 ) {
8110 Action::HiveCastToTryCast
8111 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
8112 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
8113 {
8114 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
8115 Action::CastTimestamptzToFunc
8116 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
8117 && matches!(
8118 target,
8119 DialectType::Hive
8120 | DialectType::Spark
8121 | DialectType::Databricks
8122 | DialectType::BigQuery
8123 )
8124 {
8125 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
8126 Action::CastTimestampStripTz
8127 } else if matches!(&c.to, DataType::Json)
8128 && matches!(source, DialectType::DuckDB)
8129 && matches!(target, DialectType::Snowflake)
8130 {
8131 Action::DuckDBCastJsonToVariant
8132 } else if matches!(&c.to, DataType::Json)
8133 && matches!(&c.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
8134 && matches!(
8135 target,
8136 DialectType::Presto
8137 | DialectType::Trino
8138 | DialectType::Athena
8139 | DialectType::Snowflake
8140 )
8141 {
8142 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
8143 // Only when the input is a string literal (JSON 'value' syntax)
8144 Action::JsonLiteralToJsonParse
8145 } else if matches!(&c.to, DataType::Json)
8146 && matches!(source, DialectType::DuckDB)
8147 && matches!(
8148 target,
8149 DialectType::Presto | DialectType::Trino | DialectType::Athena
8150 )
8151 {
8152 // DuckDB's CAST(x AS JSON) parses the string value into a JSON value.
8153 // Trino/Presto/Athena's CAST(x AS JSON) instead wraps the value as a
8154 // JSON string (no parsing) — different semantics. Use JSON_PARSE(x)
8155 // in the target to preserve DuckDB's parse semantics.
8156 Action::JsonLiteralToJsonParse
8157 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
8158 && matches!(target, DialectType::Spark | DialectType::Databricks)
8159 {
8160 // CAST(x AS JSON) -> TO_JSON(x) for Spark
8161 Action::CastToJsonForSpark
8162 } else if (matches!(
8163 &c.to,
8164 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
8165 )) && matches!(
8166 target,
8167 DialectType::Spark | DialectType::Databricks
8168 ) && (matches!(&c.this, Expression::ParseJson(_))
8169 || matches!(
8170 &c.this,
8171 Expression::Function(f)
8172 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
8173 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
8174 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
8175 ))
8176 {
8177 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
8178 // -> FROM_JSON(..., type_string) for Spark
8179 Action::CastJsonToFromJson
8180 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
8181 && matches!(
8182 c.to,
8183 DataType::Timestamp {
8184 timezone: false,
8185 ..
8186 }
8187 )
8188 && matches!(source, DialectType::DuckDB)
8189 {
8190 Action::StrftimeCastTimestamp
8191 } else if matches!(source, DialectType::DuckDB)
8192 && matches!(
8193 c.to,
8194 DataType::Decimal {
8195 precision: None,
8196 ..
8197 }
8198 )
8199 {
8200 Action::DecimalDefaultPrecision
8201 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
8202 && matches!(c.to, DataType::Char { length: None })
8203 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
8204 {
8205 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
8206 Action::MysqlCastCharToText
8207 } else if matches!(
8208 source,
8209 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8210 ) && matches!(
8211 target,
8212 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8213 ) && Self::has_varchar_char_type(&c.to)
8214 {
8215 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
8216 Action::SparkCastVarcharToString
8217 } else {
8218 Action::None
8219 }
8220 }
8221 Expression::SafeCast(ref c) => {
8222 if c.format.is_some()
8223 && matches!(source, DialectType::BigQuery)
8224 && !matches!(target, DialectType::BigQuery)
8225 {
8226 Action::BigQueryCastFormat
8227 } else {
8228 Action::None
8229 }
8230 }
8231 Expression::TryCast(ref c) => {
8232 if matches!(&c.to, DataType::Json)
8233 && matches!(source, DialectType::DuckDB)
8234 && matches!(
8235 target,
8236 DialectType::Presto | DialectType::Trino | DialectType::Athena
8237 )
8238 {
8239 // DuckDB's TRY_CAST(x AS JSON) tries to parse x as JSON, returning
8240 // NULL on parse failure. Trino/Presto/Athena's TRY_CAST(x AS JSON)
8241 // wraps the value as a JSON string (no parse). Emit TRY(JSON_PARSE(x))
8242 // to preserve DuckDB's parse-or-null semantics.
8243 Action::DuckDBTryCastJsonToTryJsonParse
8244 } else {
8245 Action::None
8246 }
8247 }
8248 Expression::JSONArray(ref ja)
8249 if matches!(target, DialectType::Snowflake)
8250 && ja.null_handling.is_none()
8251 && ja.return_type.is_none()
8252 && ja.strict.is_none() =>
8253 {
8254 Action::GenericFunctionNormalize
8255 }
8256 Expression::JsonArray(_) if matches!(target, DialectType::Snowflake) => {
8257 Action::GenericFunctionNormalize
8258 }
8259 // For DuckDB: DATE_TRUNC should preserve the input type
8260 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
8261 if matches!(source, DialectType::Snowflake)
8262 && matches!(target, DialectType::DuckDB)
8263 {
8264 Action::DateTruncWrapCast
8265 } else {
8266 Action::None
8267 }
8268 }
8269 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
8270 Expression::SetStatement(s) => {
8271 if matches!(target, DialectType::DuckDB)
8272 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
8273 && s.items.iter().any(|item| item.kind.is_none())
8274 {
8275 Action::SetToVariable
8276 } else {
8277 Action::None
8278 }
8279 }
8280 // Cross-dialect NULL ordering normalization.
8281 // When nulls_first is not specified, fill in the source dialect's implied
8282 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
8283 Expression::Ordered(o) => {
8284 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
8285 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
8286 Action::MysqlNullsOrdering
8287 } else {
8288 // Skip targets that don't support NULLS FIRST/LAST syntax unless
8289 // the generator can preserve semantics with a CASE sort key.
8290 let target_rewrites_nulls =
8291 matches!(target, DialectType::TSQL | DialectType::Fabric);
8292 let target_supports_nulls = !matches!(
8293 target,
8294 DialectType::MySQL
8295 | DialectType::TSQL
8296 | DialectType::Fabric
8297 | DialectType::StarRocks
8298 | DialectType::Doris
8299 );
8300 if o.nulls_first.is_none()
8301 && source != target
8302 && (target_supports_nulls || target_rewrites_nulls)
8303 {
8304 Action::NullsOrdering
8305 } else {
8306 Action::None
8307 }
8308 }
8309 }
8310 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
8311 Expression::DataType(dt) => {
8312 if matches!(source, DialectType::BigQuery)
8313 && !matches!(target, DialectType::BigQuery)
8314 {
8315 match dt {
8316 DataType::Custom { ref name }
8317 if name.eq_ignore_ascii_case("INT64")
8318 || name.eq_ignore_ascii_case("FLOAT64")
8319 || name.eq_ignore_ascii_case("BOOL")
8320 || name.eq_ignore_ascii_case("BYTES")
8321 || name.eq_ignore_ascii_case("NUMERIC")
8322 || name.eq_ignore_ascii_case("STRING")
8323 || name.eq_ignore_ascii_case("DATETIME") =>
8324 {
8325 Action::BigQueryCastType
8326 }
8327 _ => Action::None,
8328 }
8329 } else if matches!(source, DialectType::TSQL) {
8330 // For TSQL source -> any target (including TSQL itself for REAL)
8331 match dt {
8332 // REAL -> FLOAT even for TSQL->TSQL
8333 DataType::Custom { ref name }
8334 if name.eq_ignore_ascii_case("REAL") =>
8335 {
8336 Action::TSQLTypeNormalize
8337 }
8338 DataType::Float {
8339 real_spelling: true,
8340 ..
8341 } => Action::TSQLTypeNormalize,
8342 // Other TSQL type normalizations only for non-TSQL targets
8343 DataType::Custom { ref name }
8344 if !matches!(target, DialectType::TSQL)
8345 && (name.eq_ignore_ascii_case("MONEY")
8346 || name.eq_ignore_ascii_case("SMALLMONEY")
8347 || name.eq_ignore_ascii_case("DATETIME2")
8348 || name.eq_ignore_ascii_case("IMAGE")
8349 || name.eq_ignore_ascii_case("BIT")
8350 || name.eq_ignore_ascii_case("ROWVERSION")
8351 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
8352 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
8353 || (name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC"))
8354 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("DATETIME2("))
8355 || (name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME("))) =>
8356 {
8357 Action::TSQLTypeNormalize
8358 }
8359 DataType::Float {
8360 precision: Some(_), ..
8361 } if !matches!(target, DialectType::TSQL) => {
8362 Action::TSQLTypeNormalize
8363 }
8364 DataType::TinyInt { .. }
8365 if !matches!(target, DialectType::TSQL) =>
8366 {
8367 Action::TSQLTypeNormalize
8368 }
8369 // INTEGER -> INT for Databricks/Spark targets
8370 DataType::Int {
8371 integer_spelling: true,
8372 ..
8373 } if matches!(
8374 target,
8375 DialectType::Databricks | DialectType::Spark
8376 ) =>
8377 {
8378 Action::TSQLTypeNormalize
8379 }
8380 _ => Action::None,
8381 }
8382 } else if (matches!(source, DialectType::Oracle)
8383 || matches!(source, DialectType::Generic))
8384 && !matches!(target, DialectType::Oracle)
8385 {
8386 match dt {
8387 DataType::Custom { ref name }
8388 if (name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2("))
8389 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2("))
8390 || name.eq_ignore_ascii_case("VARCHAR2")
8391 || name.eq_ignore_ascii_case("NVARCHAR2") =>
8392 {
8393 Action::OracleVarchar2ToVarchar
8394 }
8395 _ => Action::None,
8396 }
8397 } else if matches!(target, DialectType::Snowflake)
8398 && !matches!(source, DialectType::Snowflake)
8399 {
8400 // When target is Snowflake but source is NOT Snowflake,
8401 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
8402 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
8403 // should keep their FLOAT spelling.
8404 match dt {
8405 DataType::Float { .. } => Action::SnowflakeFloatProtect,
8406 _ => Action::None,
8407 }
8408 } else {
8409 Action::None
8410 }
8411 }
8412 // LOWER patterns from BigQuery TO_HEX conversions:
8413 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
8414 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
8415 Expression::Lower(uf) => {
8416 if matches!(source, DialectType::BigQuery) {
8417 match &uf.this {
8418 Expression::Lower(_) => Action::BigQueryToHexLower,
8419 Expression::Function(f)
8420 if f.name == "TO_HEX"
8421 && matches!(target, DialectType::BigQuery) =>
8422 {
8423 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
8424 Action::BigQueryToHexLower
8425 }
8426 _ => Action::None,
8427 }
8428 } else {
8429 Action::None
8430 }
8431 }
8432 // UPPER patterns from BigQuery TO_HEX conversions:
8433 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
8434 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
8435 Expression::Upper(uf) => {
8436 if matches!(source, DialectType::BigQuery) {
8437 match &uf.this {
8438 Expression::Lower(_) => Action::BigQueryToHexUpper,
8439 _ => Action::None,
8440 }
8441 } else {
8442 Action::None
8443 }
8444 }
8445 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
8446 // Snowflake supports LAST_DAY with unit, so keep it there
8447 Expression::LastDay(ld) => {
8448 if matches!(source, DialectType::BigQuery)
8449 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
8450 && ld.unit.is_some()
8451 {
8452 Action::BigQueryLastDayStripUnit
8453 } else {
8454 Action::None
8455 }
8456 }
8457 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
8458 Expression::SafeDivide(_) => {
8459 if matches!(source, DialectType::BigQuery)
8460 && !matches!(target, DialectType::BigQuery)
8461 {
8462 Action::BigQuerySafeDivide
8463 } else {
8464 Action::None
8465 }
8466 }
8467 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
8468 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
8469 Expression::AnyValue(ref agg) => {
8470 if matches!(source, DialectType::BigQuery)
8471 && matches!(target, DialectType::DuckDB)
8472 && agg.having_max.is_some()
8473 {
8474 Action::BigQueryAnyValueHaving
8475 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
8476 && !matches!(source, DialectType::Spark | DialectType::Databricks)
8477 && agg.ignore_nulls.is_none()
8478 {
8479 Action::AnyValueIgnoreNulls
8480 } else {
8481 Action::None
8482 }
8483 }
8484 Expression::Any(ref q) => {
8485 if matches!(source, DialectType::PostgreSQL)
8486 && matches!(
8487 target,
8488 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8489 )
8490 && q.op.is_some()
8491 && !matches!(
8492 q.subquery,
8493 Expression::Select(_) | Expression::Subquery(_)
8494 )
8495 {
8496 Action::AnyToExists
8497 } else {
8498 Action::None
8499 }
8500 }
8501 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
8502 // Snowflake RLIKE does full-string match; DuckDB REGEXP_FULL_MATCH also does full-string match
8503 Expression::RegexpLike(_)
8504 if matches!(source, DialectType::Snowflake)
8505 && matches!(target, DialectType::DuckDB) =>
8506 {
8507 Action::RlikeSnowflakeToDuckDB
8508 }
8509 // RegexpLike from non-DuckDB/non-Snowflake sources -> REGEXP_MATCHES for DuckDB target
8510 Expression::RegexpLike(_)
8511 if !matches!(source, DialectType::DuckDB)
8512 && matches!(target, DialectType::DuckDB) =>
8513 {
8514 Action::RegexpLikeToDuckDB
8515 }
8516 // RegexpLike -> Exasol: anchor pattern with .*...*
8517 Expression::RegexpLike(_)
8518 if matches!(target, DialectType::Exasol) =>
8519 {
8520 Action::RegexpLikeExasolAnchor
8521 }
8522 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
8523 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
8524 Expression::Div(ref op)
8525 if matches!(
8526 source,
8527 DialectType::MySQL
8528 | DialectType::DuckDB
8529 | DialectType::SingleStore
8530 | DialectType::TiDB
8531 | DialectType::ClickHouse
8532 | DialectType::Doris
8533 ) && matches!(
8534 target,
8535 DialectType::PostgreSQL
8536 | DialectType::Redshift
8537 | DialectType::Drill
8538 | DialectType::Trino
8539 | DialectType::Presto
8540 | DialectType::Athena
8541 | DialectType::TSQL
8542 | DialectType::Teradata
8543 | DialectType::SQLite
8544 | DialectType::BigQuery
8545 | DialectType::Snowflake
8546 | DialectType::Databricks
8547 | DialectType::Oracle
8548 | DialectType::Materialize
8549 | DialectType::RisingWave
8550 ) =>
8551 {
8552 // Only wrap if RHS is not already NULLIF
8553 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
8554 {
8555 Action::MySQLSafeDivide
8556 } else {
8557 Action::None
8558 }
8559 }
8560 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
8561 // For TSQL/Fabric, convert to sp_rename instead
8562 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
8563 if let Some(crate::expressions::AlterTableAction::RenameTable(
8564 ref new_tbl,
8565 )) = at.actions.first()
8566 {
8567 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
8568 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
8569 Action::AlterTableToSpRename
8570 } else if new_tbl.schema.is_some()
8571 && matches!(
8572 target,
8573 DialectType::BigQuery
8574 | DialectType::Doris
8575 | DialectType::StarRocks
8576 | DialectType::DuckDB
8577 | DialectType::PostgreSQL
8578 | DialectType::Redshift
8579 )
8580 {
8581 Action::AlterTableRenameStripSchema
8582 } else {
8583 Action::None
8584 }
8585 } else {
8586 Action::None
8587 }
8588 }
8589 // EPOCH(x) expression -> target-specific epoch conversion
8590 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
8591 Action::EpochConvert
8592 }
8593 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
8594 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
8595 Action::EpochMsConvert
8596 }
8597 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
8598 Expression::StringAgg(_) => {
8599 if matches!(
8600 target,
8601 DialectType::MySQL
8602 | DialectType::SingleStore
8603 | DialectType::Doris
8604 | DialectType::StarRocks
8605 | DialectType::SQLite
8606 ) {
8607 Action::StringAggConvert
8608 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
8609 Action::StringAggConvert
8610 } else {
8611 Action::None
8612 }
8613 }
8614 Expression::CombinedParameterizedAgg(_) => Action::GenericFunctionNormalize,
8615 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
8616 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
8617 Expression::GroupConcat(_) => Action::GroupConcatConvert,
8618 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
8619 // DuckDB CARDINALITY -> keep as CARDINALITY for DuckDB target (used for maps)
8620 Expression::Cardinality(_)
8621 if matches!(source, DialectType::DuckDB)
8622 && matches!(target, DialectType::DuckDB) =>
8623 {
8624 Action::None
8625 }
8626 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
8627 Action::ArrayLengthConvert
8628 }
8629 Expression::ArraySize(_) => {
8630 if matches!(target, DialectType::Drill) {
8631 Action::ArraySizeDrill
8632 } else {
8633 Action::ArrayLengthConvert
8634 }
8635 }
8636 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
8637 Expression::ArrayRemove(_) => match target {
8638 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
8639 Action::ArrayRemoveConvert
8640 }
8641 _ => Action::None,
8642 },
8643 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
8644 Expression::ArrayReverse(_) => match target {
8645 DialectType::ClickHouse => Action::ArrayReverseConvert,
8646 _ => Action::None,
8647 },
8648 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
8649 Expression::JsonKeys(_) => match target {
8650 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
8651 Action::JsonKeysConvert
8652 }
8653 _ => Action::None,
8654 },
8655 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
8656 Expression::ParseJson(_) => match target {
8657 DialectType::SQLite
8658 | DialectType::Doris
8659 | DialectType::MySQL
8660 | DialectType::StarRocks => Action::ParseJsonStrip,
8661 _ => Action::None,
8662 },
8663 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
8664 Expression::WeekOfYear(_)
8665 if matches!(target, DialectType::Snowflake)
8666 && !matches!(source, DialectType::Snowflake) =>
8667 {
8668 Action::WeekOfYearToWeekIso
8669 }
8670 // NVL: clear original_name so generator uses dialect-specific function names
8671 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
8672 // XOR: expand for dialects that don't support the XOR keyword
8673 Expression::Xor(_) => {
8674 let target_supports_xor = matches!(
8675 target,
8676 DialectType::MySQL
8677 | DialectType::SingleStore
8678 | DialectType::Doris
8679 | DialectType::StarRocks
8680 );
8681 if !target_supports_xor {
8682 Action::XorExpand
8683 } else {
8684 Action::None
8685 }
8686 }
8687 // TSQL #table -> temp table normalization (CREATE TABLE)
8688 Expression::CreateTable(ct)
8689 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8690 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8691 && ct.name.name.name.starts_with('#') =>
8692 {
8693 Action::TempTableHash
8694 }
8695 // TSQL #table -> strip # from table references in SELECT/etc.
8696 Expression::Table(tr)
8697 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8698 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8699 && tr.name.name.starts_with('#') =>
8700 {
8701 Action::TempTableHash
8702 }
8703 // TSQL #table -> strip # from DROP TABLE names
8704 Expression::DropTable(ref dt)
8705 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8706 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8707 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
8708 {
8709 Action::TempTableHash
8710 }
8711 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
8712 Expression::JsonExtract(_)
8713 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
8714 {
8715 Action::JsonExtractToTsql
8716 }
8717 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
8718 Expression::JsonExtractScalar(_)
8719 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
8720 {
8721 Action::JsonExtractToTsql
8722 }
8723 // JSON_EXTRACT -> JSONExtractString for ClickHouse
8724 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
8725 Action::JsonExtractToClickHouse
8726 }
8727 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
8728 Expression::JsonExtractScalar(_)
8729 if matches!(target, DialectType::ClickHouse) =>
8730 {
8731 Action::JsonExtractToClickHouse
8732 }
8733 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
8734 Expression::JsonExtract(ref f)
8735 if !f.arrow_syntax
8736 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
8737 {
8738 Action::JsonExtractToArrow
8739 }
8740 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
8741 Expression::JsonExtract(ref f)
8742 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
8743 && !matches!(
8744 source,
8745 DialectType::PostgreSQL
8746 | DialectType::Redshift
8747 | DialectType::Materialize
8748 )
8749 && matches!(&f.path, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$'))) =>
8750 {
8751 Action::JsonExtractToGetJsonObject
8752 }
8753 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
8754 Expression::JsonExtract(_)
8755 if matches!(
8756 target,
8757 DialectType::Hive | DialectType::Spark | DialectType::Databricks
8758 ) =>
8759 {
8760 Action::JsonExtractToGetJsonObject
8761 }
8762 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
8763 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
8764 Expression::JsonExtractScalar(ref f)
8765 if !f.arrow_syntax
8766 && !f.hash_arrow_syntax
8767 && matches!(
8768 target,
8769 DialectType::PostgreSQL
8770 | DialectType::Redshift
8771 | DialectType::Snowflake
8772 | DialectType::SQLite
8773 | DialectType::DuckDB
8774 ) =>
8775 {
8776 Action::JsonExtractScalarConvert
8777 }
8778 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
8779 Expression::JsonExtractScalar(_)
8780 if matches!(
8781 target,
8782 DialectType::Hive | DialectType::Spark | DialectType::Databricks
8783 ) =>
8784 {
8785 Action::JsonExtractScalarToGetJsonObject
8786 }
8787 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
8788 Expression::JsonExtract(ref f)
8789 if !f.arrow_syntax
8790 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
8791 {
8792 Action::JsonPathNormalize
8793 }
8794 // JsonQuery (parsed JSON_QUERY) -> target-specific
8795 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
8796 // JsonValue (parsed JSON_VALUE) -> target-specific
8797 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
8798 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
8799 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
8800 Expression::AtTimeZone(_)
8801 if matches!(
8802 target,
8803 DialectType::Presto
8804 | DialectType::Trino
8805 | DialectType::Athena
8806 | DialectType::Spark
8807 | DialectType::Databricks
8808 | DialectType::BigQuery
8809 | DialectType::Snowflake
8810 ) =>
8811 {
8812 Action::AtTimeZoneConvert
8813 }
8814 // DAY_OF_WEEK -> dialect-specific
8815 Expression::DayOfWeek(_)
8816 if matches!(
8817 target,
8818 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
8819 ) =>
8820 {
8821 Action::DayOfWeekConvert
8822 }
8823 // CURRENT_USER -> CURRENT_USER() for Snowflake
8824 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
8825 Action::CurrentUserParens
8826 }
8827 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
8828 Expression::ElementAt(_)
8829 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
8830 {
8831 Action::ElementAtConvert
8832 }
8833 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
8834 Expression::ArrayFunc(ref arr)
8835 if !arr.bracket_notation
8836 && matches!(
8837 target,
8838 DialectType::Spark
8839 | DialectType::Databricks
8840 | DialectType::Hive
8841 | DialectType::BigQuery
8842 | DialectType::DuckDB
8843 | DialectType::Snowflake
8844 | DialectType::Presto
8845 | DialectType::Trino
8846 | DialectType::Athena
8847 | DialectType::ClickHouse
8848 | DialectType::StarRocks
8849 ) =>
8850 {
8851 Action::ArraySyntaxConvert
8852 }
8853 // VARIANCE expression -> varSamp for ClickHouse
8854 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
8855 Action::VarianceToClickHouse
8856 }
8857 // STDDEV expression -> stddevSamp for ClickHouse
8858 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
8859 Action::StddevToClickHouse
8860 }
8861 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
8862 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
8863 Action::ApproxQuantileConvert
8864 }
8865 // MonthsBetween -> target-specific
8866 Expression::MonthsBetween(_)
8867 if !matches!(
8868 target,
8869 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8870 ) =>
8871 {
8872 Action::MonthsBetweenConvert
8873 }
8874 // AddMonths -> target-specific DATEADD/DATE_ADD
8875 Expression::AddMonths(_) => Action::AddMonthsConvert,
8876 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
8877 Expression::MapFromArrays(_)
8878 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
8879 {
8880 Action::MapFromArraysConvert
8881 }
8882 // CURRENT_USER -> CURRENT_USER() for Spark
8883 Expression::CurrentUser(_)
8884 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
8885 {
8886 Action::CurrentUserSparkParens
8887 }
8888 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
8889 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
8890 if matches!(
8891 source,
8892 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8893 ) && matches!(&f.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
8894 && matches!(
8895 target,
8896 DialectType::DuckDB
8897 | DialectType::Presto
8898 | DialectType::Trino
8899 | DialectType::Athena
8900 | DialectType::PostgreSQL
8901 | DialectType::Redshift
8902 ) =>
8903 {
8904 Action::SparkDateFuncCast
8905 }
8906 // $parameter -> @parameter for BigQuery
8907 Expression::Parameter(ref p)
8908 if matches!(target, DialectType::BigQuery)
8909 && matches!(source, DialectType::DuckDB)
8910 && (p.style == crate::expressions::ParameterStyle::Dollar
8911 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
8912 {
8913 Action::DollarParamConvert
8914 }
8915 // EscapeString literal: normalize literal newlines to \n
8916 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::EscapeString(ref s) if s.contains('\n') || s.contains('\r') || s.contains('\t'))
8917 =>
8918 {
8919 Action::EscapeStringNormalize
8920 }
8921 // straight_join: keep lowercase for DuckDB, quote for MySQL
8922 Expression::Column(ref col)
8923 if col.name.name == "STRAIGHT_JOIN"
8924 && col.table.is_none()
8925 && matches!(source, DialectType::DuckDB)
8926 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
8927 {
8928 Action::StraightJoinCase
8929 }
8930 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
8931 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
8932 Expression::Interval(ref iv)
8933 if matches!(
8934 target,
8935 DialectType::Snowflake
8936 | DialectType::PostgreSQL
8937 | DialectType::Redshift
8938 ) && iv.unit.is_some()
8939 && iv.this.as_ref().map_or(false, |t| matches!(t, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) =>
8940 {
8941 Action::SnowflakeIntervalFormat
8942 }
8943 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
8944 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
8945 if let Some(ref sample) = ts.sample {
8946 if !sample.explicit_method {
8947 Action::TablesampleReservoir
8948 } else {
8949 Action::None
8950 }
8951 } else {
8952 Action::None
8953 }
8954 }
8955 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
8956 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
8957 Expression::TableSample(ref ts)
8958 if matches!(target, DialectType::Snowflake)
8959 && !matches!(source, DialectType::Snowflake)
8960 && ts.sample.is_some() =>
8961 {
8962 if let Some(ref sample) = ts.sample {
8963 if !sample.explicit_method {
8964 Action::TablesampleSnowflakeStrip
8965 } else {
8966 Action::None
8967 }
8968 } else {
8969 Action::None
8970 }
8971 }
8972 Expression::Table(ref t)
8973 if matches!(target, DialectType::Snowflake)
8974 && !matches!(source, DialectType::Snowflake)
8975 && t.table_sample.is_some() =>
8976 {
8977 if let Some(ref sample) = t.table_sample {
8978 if !sample.explicit_method {
8979 Action::TablesampleSnowflakeStrip
8980 } else {
8981 Action::None
8982 }
8983 } else {
8984 Action::None
8985 }
8986 }
8987 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
8988 Expression::AlterTable(ref at)
8989 if matches!(target, DialectType::TSQL | DialectType::Fabric)
8990 && !at.actions.is_empty()
8991 && matches!(
8992 at.actions.first(),
8993 Some(crate::expressions::AlterTableAction::RenameTable(_))
8994 ) =>
8995 {
8996 Action::AlterTableToSpRename
8997 }
8998 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
8999 Expression::Subscript(ref sub)
9000 if matches!(
9001 target,
9002 DialectType::BigQuery
9003 | DialectType::Hive
9004 | DialectType::Spark
9005 | DialectType::Databricks
9006 ) && matches!(
9007 source,
9008 DialectType::DuckDB
9009 | DialectType::PostgreSQL
9010 | DialectType::Presto
9011 | DialectType::Trino
9012 | DialectType::Redshift
9013 | DialectType::ClickHouse
9014 ) && matches!(&sub.index, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(ref n) if n.parse::<i64>().unwrap_or(0) > 0)) =>
9015 {
9016 Action::ArrayIndexConvert
9017 }
9018 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
9019 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
9020 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
9021 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
9022 Expression::WindowFunction(ref wf) => {
9023 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
9024 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
9025 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
9026 if matches!(target, DialectType::BigQuery)
9027 && !is_row_number
9028 && !wf.over.order_by.is_empty()
9029 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
9030 {
9031 Action::BigQueryNullsOrdering
9032 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
9033 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
9034 } else {
9035 let source_nulls_last = matches!(source, DialectType::DuckDB);
9036 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
9037 matches!(
9038 f.kind,
9039 crate::expressions::WindowFrameKind::Range
9040 | crate::expressions::WindowFrameKind::Groups
9041 )
9042 });
9043 if source_nulls_last
9044 && matches!(target, DialectType::MySQL)
9045 && !wf.over.order_by.is_empty()
9046 && wf.over.order_by.iter().any(|o| !o.desc)
9047 && !has_range_frame
9048 {
9049 Action::MysqlNullsLastRewrite
9050 } else {
9051 // Check for Snowflake window frame handling for FIRST_VALUE/LAST_VALUE/NTH_VALUE
9052 let is_ranking_window_func = matches!(
9053 &wf.this,
9054 Expression::FirstValue(_)
9055 | Expression::LastValue(_)
9056 | Expression::NthValue(_)
9057 );
9058 let has_full_unbounded_frame = wf.over.frame.as_ref().map_or(false, |f| {
9059 matches!(f.kind, crate::expressions::WindowFrameKind::Rows)
9060 && matches!(f.start, crate::expressions::WindowFrameBound::UnboundedPreceding)
9061 && matches!(f.end, Some(crate::expressions::WindowFrameBound::UnboundedFollowing))
9062 && f.exclude.is_none()
9063 });
9064 if is_ranking_window_func && matches!(source, DialectType::Snowflake) {
9065 if has_full_unbounded_frame && matches!(target, DialectType::Snowflake) {
9066 // Strip the default frame for Snowflake target
9067 Action::SnowflakeWindowFrameStrip
9068 } else if !has_full_unbounded_frame && wf.over.frame.is_none() && !matches!(target, DialectType::Snowflake) {
9069 // Add default frame for non-Snowflake target
9070 Action::SnowflakeWindowFrameAdd
9071 } else {
9072 match &wf.this {
9073 Expression::FirstValue(ref vf)
9074 | Expression::LastValue(ref vf)
9075 if vf.ignore_nulls == Some(false) =>
9076 {
9077 match target {
9078 DialectType::SQLite => Action::RespectNullsConvert,
9079 _ => Action::None,
9080 }
9081 }
9082 _ => Action::None,
9083 }
9084 }
9085 } else {
9086 match &wf.this {
9087 Expression::FirstValue(ref vf)
9088 | Expression::LastValue(ref vf)
9089 if vf.ignore_nulls == Some(false) =>
9090 {
9091 // RESPECT NULLS
9092 match target {
9093 DialectType::SQLite | DialectType::PostgreSQL => {
9094 Action::RespectNullsConvert
9095 }
9096 _ => Action::None,
9097 }
9098 }
9099 _ => Action::None,
9100 }
9101 }
9102 }
9103 }
9104 }
9105 // CREATE TABLE a LIKE b -> dialect-specific transformations
9106 Expression::CreateTable(ref ct)
9107 if ct.columns.is_empty()
9108 && ct.constraints.iter().any(|c| {
9109 matches!(c, crate::expressions::TableConstraint::Like { .. })
9110 })
9111 && matches!(
9112 target,
9113 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
9114 ) =>
9115 {
9116 Action::CreateTableLikeToCtas
9117 }
9118 Expression::CreateTable(ref ct)
9119 if ct.columns.is_empty()
9120 && ct.constraints.iter().any(|c| {
9121 matches!(c, crate::expressions::TableConstraint::Like { .. })
9122 })
9123 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
9124 {
9125 Action::CreateTableLikeToSelectInto
9126 }
9127 Expression::CreateTable(ref ct)
9128 if ct.columns.is_empty()
9129 && ct.constraints.iter().any(|c| {
9130 matches!(c, crate::expressions::TableConstraint::Like { .. })
9131 })
9132 && matches!(target, DialectType::ClickHouse) =>
9133 {
9134 Action::CreateTableLikeToAs
9135 }
9136 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
9137 Expression::CreateTable(ref ct)
9138 if matches!(target, DialectType::DuckDB)
9139 && matches!(
9140 source,
9141 DialectType::DuckDB
9142 | DialectType::Spark
9143 | DialectType::Databricks
9144 | DialectType::Hive
9145 ) =>
9146 {
9147 let has_comment = ct.columns.iter().any(|c| {
9148 c.comment.is_some()
9149 || c.constraints.iter().any(|con| {
9150 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
9151 })
9152 });
9153 let has_props = !ct.properties.is_empty();
9154 if has_comment || has_props {
9155 Action::CreateTableStripComment
9156 } else {
9157 Action::None
9158 }
9159 }
9160 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
9161 Expression::Array(_)
9162 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
9163 {
9164 Action::ArrayConcatBracketConvert
9165 }
9166 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
9167 Expression::ArrayFunc(ref arr)
9168 if arr.bracket_notation
9169 && matches!(source, DialectType::BigQuery)
9170 && matches!(target, DialectType::Redshift) =>
9171 {
9172 Action::ArrayConcatBracketConvert
9173 }
9174 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
9175 Expression::BitwiseOrAgg(ref f)
9176 | Expression::BitwiseAndAgg(ref f)
9177 | Expression::BitwiseXorAgg(ref f) => {
9178 if matches!(target, DialectType::DuckDB) {
9179 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
9180 if let Expression::Cast(ref c) = f.this {
9181 match &c.to {
9182 DataType::Float { .. }
9183 | DataType::Double { .. }
9184 | DataType::Decimal { .. } => Action::BitAggFloatCast,
9185 DataType::Custom { ref name }
9186 if name.eq_ignore_ascii_case("REAL") =>
9187 {
9188 Action::BitAggFloatCast
9189 }
9190 _ => Action::None,
9191 }
9192 } else {
9193 Action::None
9194 }
9195 } else if matches!(target, DialectType::Snowflake) {
9196 Action::BitAggSnowflakeRename
9197 } else {
9198 Action::None
9199 }
9200 }
9201 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
9202 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
9203 Action::FilterToIff
9204 }
9205 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
9206 Expression::Avg(ref f)
9207 | Expression::Sum(ref f)
9208 | Expression::Min(ref f)
9209 | Expression::Max(ref f)
9210 | Expression::CountIf(ref f)
9211 | Expression::Stddev(ref f)
9212 | Expression::StddevPop(ref f)
9213 | Expression::StddevSamp(ref f)
9214 | Expression::Variance(ref f)
9215 | Expression::VarPop(ref f)
9216 | Expression::VarSamp(ref f)
9217 | Expression::Median(ref f)
9218 | Expression::Mode(ref f)
9219 | Expression::First(ref f)
9220 | Expression::Last(ref f)
9221 | Expression::ApproxDistinct(ref f)
9222 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
9223 {
9224 Action::AggFilterToIff
9225 }
9226 Expression::Count(ref c)
9227 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
9228 {
9229 Action::AggFilterToIff
9230 }
9231 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
9232 Expression::Count(ref c)
9233 if c.distinct
9234 && matches!(&c.this, Some(Expression::Tuple(_)))
9235 && matches!(
9236 target,
9237 DialectType::Presto
9238 | DialectType::Trino
9239 | DialectType::DuckDB
9240 | DialectType::PostgreSQL
9241 ) =>
9242 {
9243 Action::CountDistinctMultiArg
9244 }
9245 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
9246 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
9247 Action::JsonToGetPath
9248 }
9249 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
9250 Expression::Struct(_)
9251 if matches!(
9252 target,
9253 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
9254 ) && matches!(source, DialectType::DuckDB) =>
9255 {
9256 Action::StructToRow
9257 }
9258 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
9259 Expression::MapFunc(ref m)
9260 if m.curly_brace_syntax
9261 && matches!(
9262 target,
9263 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
9264 )
9265 && matches!(source, DialectType::DuckDB) =>
9266 {
9267 Action::StructToRow
9268 }
9269 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
9270 Expression::ApproxCountDistinct(_)
9271 if matches!(
9272 target,
9273 DialectType::Presto | DialectType::Trino | DialectType::Athena
9274 ) =>
9275 {
9276 Action::ApproxCountDistinctToApproxDistinct
9277 }
9278 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
9279 Expression::ArrayContains(_)
9280 if matches!(
9281 target,
9282 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
9283 ) && !(matches!(source, DialectType::Snowflake) && matches!(target, DialectType::Snowflake)) =>
9284 {
9285 Action::ArrayContainsConvert
9286 }
9287 // ARRAY_CONTAINS -> DuckDB NULL-aware CASE (from Snowflake source with check_null semantics)
9288 Expression::ArrayContains(_)
9289 if matches!(target, DialectType::DuckDB)
9290 && matches!(source, DialectType::Snowflake) =>
9291 {
9292 Action::ArrayContainsDuckDBConvert
9293 }
9294 // ARRAY_EXCEPT -> target-specific conversion
9295 Expression::ArrayExcept(_)
9296 if matches!(
9297 target,
9298 DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena
9299 ) =>
9300 {
9301 Action::ArrayExceptConvert
9302 }
9303 // ARRAY_POSITION -> swap args for Snowflake target (only when source is not Snowflake)
9304 Expression::ArrayPosition(_)
9305 if matches!(target, DialectType::Snowflake)
9306 && !matches!(source, DialectType::Snowflake) =>
9307 {
9308 Action::ArrayPositionSnowflakeSwap
9309 }
9310 // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB from Snowflake source
9311 Expression::ArrayPosition(_)
9312 if matches!(target, DialectType::DuckDB)
9313 && matches!(source, DialectType::Snowflake) =>
9314 {
9315 Action::SnowflakeArrayPositionToDuckDB
9316 }
9317 // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
9318 Expression::ArrayDistinct(_)
9319 if matches!(target, DialectType::ClickHouse) =>
9320 {
9321 Action::ArrayDistinctClickHouse
9322 }
9323 // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
9324 Expression::ArrayDistinct(_)
9325 if matches!(target, DialectType::DuckDB)
9326 && matches!(source, DialectType::Snowflake) =>
9327 {
9328 Action::ArrayDistinctConvert
9329 }
9330 // StrPosition with position -> complex expansion for Presto/DuckDB
9331 // STRPOS doesn't support a position arg in these dialects
9332 Expression::StrPosition(ref sp)
9333 if sp.position.is_some()
9334 && matches!(
9335 target,
9336 DialectType::Presto
9337 | DialectType::Trino
9338 | DialectType::Athena
9339 | DialectType::DuckDB
9340 ) =>
9341 {
9342 Action::StrPositionExpand
9343 }
9344 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
9345 Expression::First(ref f)
9346 if f.ignore_nulls == Some(true)
9347 && matches!(target, DialectType::DuckDB) =>
9348 {
9349 Action::FirstToAnyValue
9350 }
9351 // BEGIN -> START TRANSACTION for Presto/Trino
9352 Expression::Command(ref cmd)
9353 if cmd.this.eq_ignore_ascii_case("BEGIN")
9354 && matches!(
9355 target,
9356 DialectType::Presto | DialectType::Trino | DialectType::Athena
9357 ) =>
9358 {
9359 // Handled inline below
9360 Action::None // We'll handle it directly
9361 }
9362 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
9363 // PostgreSQL # is parsed as BitwiseXor (which is correct).
9364 // a || b (Concat operator) -> CONCAT function for Presto/Trino
9365 Expression::Concat(ref _op)
9366 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
9367 && matches!(target, DialectType::Presto | DialectType::Trino) =>
9368 {
9369 Action::PipeConcatToConcat
9370 }
9371 _ => Action::None,
9372 }
9373 };
9374
9375 match action {
9376 Action::None => {
9377 // Handle inline transforms that don't need a dedicated action
9378 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
9379 if let Some(rewritten) = Self::rewrite_tsql_interval_arithmetic(&e) {
9380 return Ok(rewritten);
9381 }
9382 }
9383
9384 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
9385 if let Expression::Between(ref b) = e {
9386 if let Some(sym) = b.symmetric {
9387 let keeps_symmetric =
9388 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
9389 if !keeps_symmetric {
9390 if sym {
9391 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
9392 let b = if let Expression::Between(b) = e {
9393 *b
9394 } else {
9395 unreachable!()
9396 };
9397 let between1 = Expression::Between(Box::new(
9398 crate::expressions::Between {
9399 this: b.this.clone(),
9400 low: b.low.clone(),
9401 high: b.high.clone(),
9402 not: b.not,
9403 symmetric: None,
9404 },
9405 ));
9406 let between2 = Expression::Between(Box::new(
9407 crate::expressions::Between {
9408 this: b.this,
9409 low: b.high,
9410 high: b.low,
9411 not: b.not,
9412 symmetric: None,
9413 },
9414 ));
9415 return Ok(Expression::Paren(Box::new(
9416 crate::expressions::Paren {
9417 this: Expression::Or(Box::new(
9418 crate::expressions::BinaryOp::new(
9419 between1, between2,
9420 ),
9421 )),
9422 trailing_comments: vec![],
9423 },
9424 )));
9425 } else {
9426 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
9427 let b = if let Expression::Between(b) = e {
9428 *b
9429 } else {
9430 unreachable!()
9431 };
9432 return Ok(Expression::Between(Box::new(
9433 crate::expressions::Between {
9434 this: b.this,
9435 low: b.low,
9436 high: b.high,
9437 not: b.not,
9438 symmetric: None,
9439 },
9440 )));
9441 }
9442 }
9443 }
9444 }
9445
9446 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
9447 if let Expression::ILike(ref _like) = e {
9448 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
9449 let like = if let Expression::ILike(l) = e {
9450 *l
9451 } else {
9452 unreachable!()
9453 };
9454 let lower_left = Expression::Function(Box::new(Function::new(
9455 "LOWER".to_string(),
9456 vec![like.left],
9457 )));
9458 let lower_right = Expression::Function(Box::new(Function::new(
9459 "LOWER".to_string(),
9460 vec![like.right],
9461 )));
9462 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
9463 left: lower_left,
9464 right: lower_right,
9465 escape: like.escape,
9466 quantifier: like.quantifier,
9467 inferred_type: None,
9468 })));
9469 }
9470 }
9471
9472 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
9473 if let Expression::MethodCall(ref mc) = e {
9474 if matches!(source, DialectType::Oracle)
9475 && mc.method.name.eq_ignore_ascii_case("VALUE")
9476 && mc.args.is_empty()
9477 {
9478 let is_dbms_random = match &mc.this {
9479 Expression::Identifier(id) => {
9480 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
9481 }
9482 Expression::Column(col) => {
9483 col.table.is_none()
9484 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
9485 }
9486 _ => false,
9487 };
9488 if is_dbms_random {
9489 let func_name = match target {
9490 DialectType::PostgreSQL
9491 | DialectType::Redshift
9492 | DialectType::DuckDB
9493 | DialectType::SQLite => "RANDOM",
9494 DialectType::Oracle => "DBMS_RANDOM.VALUE",
9495 _ => "RAND",
9496 };
9497 return Ok(Expression::Function(Box::new(Function::new(
9498 func_name.to_string(),
9499 vec![],
9500 ))));
9501 }
9502 }
9503 }
9504 // TRIM without explicit position -> add BOTH for ClickHouse
9505 if let Expression::Trim(ref trim) = e {
9506 if matches!(target, DialectType::ClickHouse)
9507 && trim.sql_standard_syntax
9508 && trim.characters.is_some()
9509 && !trim.position_explicit
9510 {
9511 let mut new_trim = (**trim).clone();
9512 new_trim.position_explicit = true;
9513 return Ok(Expression::Trim(Box::new(new_trim)));
9514 }
9515 }
9516 // BEGIN -> START TRANSACTION for Presto/Trino
9517 if let Expression::Transaction(ref txn) = e {
9518 if matches!(
9519 target,
9520 DialectType::Presto | DialectType::Trino | DialectType::Athena
9521 ) {
9522 // Convert BEGIN to START TRANSACTION by setting mark to "START"
9523 let mut txn = txn.clone();
9524 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
9525 "START".to_string(),
9526 ))));
9527 return Ok(Expression::Transaction(Box::new(*txn)));
9528 }
9529 }
9530 // IS TRUE/FALSE -> simplified forms for Presto/Trino
9531 if matches!(
9532 target,
9533 DialectType::Presto | DialectType::Trino | DialectType::Athena
9534 ) {
9535 match &e {
9536 Expression::IsTrue(itf) if !itf.not => {
9537 // x IS TRUE -> x
9538 return Ok(itf.this.clone());
9539 }
9540 Expression::IsTrue(itf) if itf.not => {
9541 // x IS NOT TRUE -> NOT x
9542 return Ok(Expression::Not(Box::new(
9543 crate::expressions::UnaryOp {
9544 this: itf.this.clone(),
9545 inferred_type: None,
9546 },
9547 )));
9548 }
9549 Expression::IsFalse(itf) if !itf.not => {
9550 // x IS FALSE -> NOT x
9551 return Ok(Expression::Not(Box::new(
9552 crate::expressions::UnaryOp {
9553 this: itf.this.clone(),
9554 inferred_type: None,
9555 },
9556 )));
9557 }
9558 Expression::IsFalse(itf) if itf.not => {
9559 // x IS NOT FALSE -> NOT NOT x
9560 let not_x =
9561 Expression::Not(Box::new(crate::expressions::UnaryOp {
9562 this: itf.this.clone(),
9563 inferred_type: None,
9564 }));
9565 return Ok(Expression::Not(Box::new(
9566 crate::expressions::UnaryOp {
9567 this: not_x,
9568 inferred_type: None,
9569 },
9570 )));
9571 }
9572 _ => {}
9573 }
9574 }
9575 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
9576 if matches!(target, DialectType::Redshift) {
9577 if let Expression::IsFalse(ref itf) = e {
9578 if itf.not {
9579 return Ok(Expression::Not(Box::new(
9580 crate::expressions::UnaryOp {
9581 this: Expression::IsFalse(Box::new(
9582 crate::expressions::IsTrueFalse {
9583 this: itf.this.clone(),
9584 not: false,
9585 },
9586 )),
9587 inferred_type: None,
9588 },
9589 )));
9590 }
9591 }
9592 }
9593 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
9594 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
9595 if let Expression::Function(ref f) = e {
9596 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
9597 && matches!(source, DialectType::Snowflake)
9598 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
9599 {
9600 if f.args.len() == 3 {
9601 let mut args = f.args.clone();
9602 args.push(Expression::string("g"));
9603 return Ok(Expression::Function(Box::new(Function::new(
9604 "REGEXP_REPLACE".to_string(),
9605 args,
9606 ))));
9607 } else if f.args.len() == 4 {
9608 // 4th arg might be position, add 'g' as 5th
9609 let mut args = f.args.clone();
9610 args.push(Expression::string("g"));
9611 return Ok(Expression::Function(Box::new(Function::new(
9612 "REGEXP_REPLACE".to_string(),
9613 args,
9614 ))));
9615 }
9616 }
9617 }
9618 Ok(e)
9619 }
9620
9621 Action::GreatestLeastNull => {
9622 let f = if let Expression::Function(f) = e {
9623 *f
9624 } else {
9625 unreachable!("action only triggered for Function expressions")
9626 };
9627 let mut null_checks: Vec<Expression> = f
9628 .args
9629 .iter()
9630 .map(|a| {
9631 Expression::IsNull(Box::new(IsNull {
9632 this: a.clone(),
9633 not: false,
9634 postfix_form: false,
9635 }))
9636 })
9637 .collect();
9638 let condition = if null_checks.len() == 1 {
9639 null_checks.remove(0)
9640 } else {
9641 let first = null_checks.remove(0);
9642 null_checks.into_iter().fold(first, |acc, check| {
9643 Expression::Or(Box::new(BinaryOp::new(acc, check)))
9644 })
9645 };
9646 Ok(Expression::Case(Box::new(Case {
9647 operand: None,
9648 whens: vec![(condition, Expression::Null(Null))],
9649 else_: Some(Expression::Function(Box::new(Function::new(
9650 f.name, f.args,
9651 )))),
9652 comments: Vec::new(),
9653 inferred_type: None,
9654 })))
9655 }
9656
9657 Action::ArrayGenerateRange => {
9658 let f = if let Expression::Function(f) = e {
9659 *f
9660 } else {
9661 unreachable!("action only triggered for Function expressions")
9662 };
9663 let start = f.args[0].clone();
9664 let end = f.args[1].clone();
9665 let step = f.args.get(2).cloned();
9666
9667 // Helper: compute end - 1 for converting exclusive→inclusive end.
9668 // When end is a literal number, simplify to a computed literal.
9669 fn exclusive_to_inclusive_end(end: &Expression) -> Expression {
9670 // Try to simplify literal numbers
9671 match end {
9672 Expression::Literal(lit)
9673 if matches!(lit.as_ref(), Literal::Number(_)) =>
9674 {
9675 let Literal::Number(n) = lit.as_ref() else {
9676 unreachable!()
9677 };
9678 if let Ok(val) = n.parse::<i64>() {
9679 return Expression::number(val - 1);
9680 }
9681 }
9682 Expression::Neg(u) => {
9683 if let Expression::Literal(lit) = &u.this {
9684 if let Literal::Number(n) = lit.as_ref() {
9685 if let Ok(val) = n.parse::<i64>() {
9686 return Expression::number(-val - 1);
9687 }
9688 }
9689 }
9690 }
9691 _ => {}
9692 }
9693 // Non-literal: produce end - 1 expression
9694 Expression::Sub(Box::new(BinaryOp::new(end.clone(), Expression::number(1))))
9695 }
9696
9697 match target {
9698 // Snowflake ARRAY_GENERATE_RANGE and DuckDB RANGE both use exclusive end,
9699 // so no adjustment needed — just rename the function.
9700 DialectType::Snowflake => {
9701 let mut args = vec![start, end];
9702 if let Some(s) = step {
9703 args.push(s);
9704 }
9705 Ok(Expression::Function(Box::new(Function::new(
9706 "ARRAY_GENERATE_RANGE".to_string(),
9707 args,
9708 ))))
9709 }
9710 DialectType::DuckDB => {
9711 let mut args = vec![start, end];
9712 if let Some(s) = step {
9713 args.push(s);
9714 }
9715 Ok(Expression::Function(Box::new(Function::new(
9716 "RANGE".to_string(),
9717 args,
9718 ))))
9719 }
9720 // These dialects use inclusive end, so convert exclusive→inclusive.
9721 // Presto/Trino: simplify literal numbers (3 → 2).
9722 DialectType::Presto | DialectType::Trino => {
9723 let end_inclusive = exclusive_to_inclusive_end(&end);
9724 let mut args = vec![start, end_inclusive];
9725 if let Some(s) = step {
9726 args.push(s);
9727 }
9728 Ok(Expression::Function(Box::new(Function::new(
9729 "SEQUENCE".to_string(),
9730 args,
9731 ))))
9732 }
9733 // PostgreSQL, Redshift, BigQuery: keep as end - 1 expression form.
9734 DialectType::PostgreSQL | DialectType::Redshift => {
9735 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
9736 end.clone(),
9737 Expression::number(1),
9738 )));
9739 let mut args = vec![start, end_minus_1];
9740 if let Some(s) = step {
9741 args.push(s);
9742 }
9743 Ok(Expression::Function(Box::new(Function::new(
9744 "GENERATE_SERIES".to_string(),
9745 args,
9746 ))))
9747 }
9748 DialectType::BigQuery => {
9749 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
9750 end.clone(),
9751 Expression::number(1),
9752 )));
9753 let mut args = vec![start, end_minus_1];
9754 if let Some(s) = step {
9755 args.push(s);
9756 }
9757 Ok(Expression::Function(Box::new(Function::new(
9758 "GENERATE_ARRAY".to_string(),
9759 args,
9760 ))))
9761 }
9762 _ => Ok(Expression::Function(Box::new(Function::new(
9763 f.name, f.args,
9764 )))),
9765 }
9766 }
9767
9768 Action::Div0TypedDivision => {
9769 let if_func = if let Expression::IfFunc(f) = e {
9770 *f
9771 } else {
9772 unreachable!("action only triggered for IfFunc expressions")
9773 };
9774 if let Some(Expression::Div(div)) = if_func.false_value {
9775 let cast_type = if matches!(target, DialectType::SQLite) {
9776 DataType::Float {
9777 precision: None,
9778 scale: None,
9779 real_spelling: true,
9780 }
9781 } else {
9782 DataType::Double {
9783 precision: None,
9784 scale: None,
9785 }
9786 };
9787 let casted_left = Expression::Cast(Box::new(Cast {
9788 this: div.left,
9789 to: cast_type,
9790 trailing_comments: vec![],
9791 double_colon_syntax: false,
9792 format: None,
9793 default: None,
9794 inferred_type: None,
9795 }));
9796 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9797 condition: if_func.condition,
9798 true_value: if_func.true_value,
9799 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
9800 casted_left,
9801 div.right,
9802 )))),
9803 original_name: if_func.original_name,
9804 inferred_type: None,
9805 })))
9806 } else {
9807 // Not actually a Div, reconstruct
9808 Ok(Expression::IfFunc(Box::new(if_func)))
9809 }
9810 }
9811
9812 Action::ArrayAggCollectList => {
9813 let agg = if let Expression::ArrayAgg(a) = e {
9814 *a
9815 } else {
9816 unreachable!("action only triggered for ArrayAgg expressions")
9817 };
9818 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9819 name: Some("COLLECT_LIST".to_string()),
9820 ..agg
9821 })))
9822 }
9823
9824 Action::ArrayAggToGroupConcat => {
9825 let agg = if let Expression::ArrayAgg(a) = e {
9826 *a
9827 } else {
9828 unreachable!("action only triggered for ArrayAgg expressions")
9829 };
9830 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9831 name: Some("GROUP_CONCAT".to_string()),
9832 ..agg
9833 })))
9834 }
9835
9836 Action::ArrayAggWithinGroupFilter => {
9837 let wg = if let Expression::WithinGroup(w) = e {
9838 *w
9839 } else {
9840 unreachable!("action only triggered for WithinGroup expressions")
9841 };
9842 if let Expression::ArrayAgg(inner_agg) = wg.this {
9843 let col = inner_agg.this.clone();
9844 let filter = Expression::IsNull(Box::new(IsNull {
9845 this: col,
9846 not: true,
9847 postfix_form: false,
9848 }));
9849 // For DuckDB, add explicit NULLS FIRST for DESC ordering
9850 let order_by = if matches!(target, DialectType::DuckDB) {
9851 wg.order_by
9852 .into_iter()
9853 .map(|mut o| {
9854 if o.desc && o.nulls_first.is_none() {
9855 o.nulls_first = Some(true);
9856 }
9857 o
9858 })
9859 .collect()
9860 } else {
9861 wg.order_by
9862 };
9863 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9864 this: inner_agg.this,
9865 distinct: inner_agg.distinct,
9866 filter: Some(filter),
9867 order_by,
9868 name: inner_agg.name,
9869 ignore_nulls: inner_agg.ignore_nulls,
9870 having_max: inner_agg.having_max,
9871 limit: inner_agg.limit,
9872 inferred_type: None,
9873 })))
9874 } else {
9875 Ok(Expression::WithinGroup(Box::new(wg)))
9876 }
9877 }
9878
9879 Action::ArrayAggFilter => {
9880 let agg = if let Expression::ArrayAgg(a) = e {
9881 *a
9882 } else {
9883 unreachable!("action only triggered for ArrayAgg expressions")
9884 };
9885 let col = agg.this.clone();
9886 let filter = Expression::IsNull(Box::new(IsNull {
9887 this: col,
9888 not: true,
9889 postfix_form: false,
9890 }));
9891 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9892 filter: Some(filter),
9893 ..agg
9894 })))
9895 }
9896
9897 Action::ArrayAggNullFilter => {
9898 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
9899 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
9900 let agg = if let Expression::ArrayAgg(a) = e {
9901 *a
9902 } else {
9903 unreachable!("action only triggered for ArrayAgg expressions")
9904 };
9905 let col = agg.this.clone();
9906 let not_null = Expression::IsNull(Box::new(IsNull {
9907 this: col,
9908 not: true,
9909 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
9910 }));
9911 let new_filter = if let Some(existing_filter) = agg.filter {
9912 // AND the NOT IS NULL with existing filter
9913 Expression::And(Box::new(crate::expressions::BinaryOp::new(
9914 existing_filter,
9915 not_null,
9916 )))
9917 } else {
9918 not_null
9919 };
9920 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9921 filter: Some(new_filter),
9922 ..agg
9923 })))
9924 }
9925
9926 Action::BigQueryArraySelectAsStructToSnowflake => {
9927 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
9928 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
9929 if let Expression::Function(mut f) = e {
9930 let is_match = f.args.len() == 1
9931 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
9932 if is_match {
9933 let inner_select = match f.args.remove(0) {
9934 Expression::Select(s) => *s,
9935 _ => unreachable!(
9936 "argument already verified to be a Select expression"
9937 ),
9938 };
9939 // Build OBJECT_CONSTRUCT args from SELECT expressions
9940 let mut oc_args = Vec::new();
9941 for expr in &inner_select.expressions {
9942 match expr {
9943 Expression::Alias(a) => {
9944 let key = Expression::Literal(Box::new(Literal::String(
9945 a.alias.name.clone(),
9946 )));
9947 let value = a.this.clone();
9948 oc_args.push(key);
9949 oc_args.push(value);
9950 }
9951 Expression::Column(c) => {
9952 let key = Expression::Literal(Box::new(Literal::String(
9953 c.name.name.clone(),
9954 )));
9955 oc_args.push(key);
9956 oc_args.push(expr.clone());
9957 }
9958 _ => {
9959 oc_args.push(expr.clone());
9960 }
9961 }
9962 }
9963 let object_construct = Expression::Function(Box::new(Function::new(
9964 "OBJECT_CONSTRUCT".to_string(),
9965 oc_args,
9966 )));
9967 let array_agg = Expression::Function(Box::new(Function::new(
9968 "ARRAY_AGG".to_string(),
9969 vec![object_construct],
9970 )));
9971 let mut new_select = crate::expressions::Select::new();
9972 new_select.expressions = vec![array_agg];
9973 new_select.from = inner_select.from.clone();
9974 new_select.where_clause = inner_select.where_clause.clone();
9975 new_select.group_by = inner_select.group_by.clone();
9976 new_select.having = inner_select.having.clone();
9977 new_select.joins = inner_select.joins.clone();
9978 Ok(Expression::Subquery(Box::new(
9979 crate::expressions::Subquery {
9980 this: Expression::Select(Box::new(new_select)),
9981 alias: None,
9982 column_aliases: Vec::new(),
9983 alias_explicit_as: false,
9984 alias_keyword: None,
9985 order_by: None,
9986 limit: None,
9987 offset: None,
9988 distribute_by: None,
9989 sort_by: None,
9990 cluster_by: None,
9991 lateral: false,
9992 modifiers_inside: false,
9993 trailing_comments: Vec::new(),
9994 inferred_type: None,
9995 },
9996 )))
9997 } else {
9998 Ok(Expression::Function(f))
9999 }
10000 } else {
10001 Ok(e)
10002 }
10003 }
10004
10005 Action::BigQueryPercentileContToDuckDB => {
10006 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
10007 if let Expression::AggregateFunction(mut af) = e {
10008 af.name = "QUANTILE_CONT".to_string();
10009 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
10010 // Keep only first 2 args
10011 if af.args.len() > 2 {
10012 af.args.truncate(2);
10013 }
10014 Ok(Expression::AggregateFunction(af))
10015 } else {
10016 Ok(e)
10017 }
10018 }
10019
10020 Action::ArrayAggIgnoreNullsDuckDB => {
10021 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
10022 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
10023 let mut agg = if let Expression::ArrayAgg(a) = e {
10024 *a
10025 } else {
10026 unreachable!("action only triggered for ArrayAgg expressions")
10027 };
10028 agg.ignore_nulls = None; // Strip IGNORE NULLS
10029 if !agg.order_by.is_empty() {
10030 agg.order_by[0].nulls_first = Some(true);
10031 }
10032 Ok(Expression::ArrayAgg(Box::new(agg)))
10033 }
10034
10035 Action::CountDistinctMultiArg => {
10036 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
10037 if let Expression::Count(c) = e {
10038 if let Some(Expression::Tuple(t)) = c.this {
10039 let args = t.expressions;
10040 // Build CASE expression:
10041 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
10042 let mut whens = Vec::new();
10043 for arg in &args {
10044 whens.push((
10045 Expression::IsNull(Box::new(IsNull {
10046 this: arg.clone(),
10047 not: false,
10048 postfix_form: false,
10049 })),
10050 Expression::Null(crate::expressions::Null),
10051 ));
10052 }
10053 // Build the tuple for ELSE
10054 let tuple_expr =
10055 Expression::Tuple(Box::new(crate::expressions::Tuple {
10056 expressions: args,
10057 }));
10058 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
10059 operand: None,
10060 whens,
10061 else_: Some(tuple_expr),
10062 comments: Vec::new(),
10063 inferred_type: None,
10064 }));
10065 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
10066 this: Some(case_expr),
10067 star: false,
10068 distinct: true,
10069 filter: c.filter,
10070 ignore_nulls: c.ignore_nulls,
10071 original_name: c.original_name,
10072 inferred_type: None,
10073 })))
10074 } else {
10075 Ok(Expression::Count(c))
10076 }
10077 } else {
10078 Ok(e)
10079 }
10080 }
10081
10082 Action::CastTimestampToDatetime => {
10083 let c = if let Expression::Cast(c) = e {
10084 *c
10085 } else {
10086 unreachable!("action only triggered for Cast expressions")
10087 };
10088 Ok(Expression::Cast(Box::new(Cast {
10089 to: DataType::Custom {
10090 name: "DATETIME".to_string(),
10091 },
10092 ..c
10093 })))
10094 }
10095
10096 Action::CastTimestampStripTz => {
10097 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
10098 let c = if let Expression::Cast(c) = e {
10099 *c
10100 } else {
10101 unreachable!("action only triggered for Cast expressions")
10102 };
10103 Ok(Expression::Cast(Box::new(Cast {
10104 to: DataType::Timestamp {
10105 precision: None,
10106 timezone: false,
10107 },
10108 ..c
10109 })))
10110 }
10111
10112 Action::CastTimestamptzToFunc => {
10113 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
10114 let c = if let Expression::Cast(c) = e {
10115 *c
10116 } else {
10117 unreachable!("action only triggered for Cast expressions")
10118 };
10119 Ok(Expression::Function(Box::new(Function::new(
10120 "TIMESTAMP".to_string(),
10121 vec![c.this],
10122 ))))
10123 }
10124
10125 Action::ToDateToCast => {
10126 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
10127 if let Expression::Function(f) = e {
10128 let arg = f.args.into_iter().next().unwrap();
10129 Ok(Expression::Cast(Box::new(Cast {
10130 this: arg,
10131 to: DataType::Date,
10132 double_colon_syntax: false,
10133 trailing_comments: vec![],
10134 format: None,
10135 default: None,
10136 inferred_type: None,
10137 })))
10138 } else {
10139 Ok(e)
10140 }
10141 }
10142 Action::DateTruncWrapCast => {
10143 // Handle both Expression::DateTrunc/TimestampTrunc and
10144 // Expression::Function("DATE_TRUNC", [unit, expr])
10145 match e {
10146 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
10147 let input_type = match &d.this {
10148 Expression::Cast(c) => Some(c.to.clone()),
10149 _ => None,
10150 };
10151 if let Some(cast_type) = input_type {
10152 let is_time = matches!(cast_type, DataType::Time { .. });
10153 if is_time {
10154 let date_expr = Expression::Cast(Box::new(Cast {
10155 this: Expression::Literal(Box::new(
10156 crate::expressions::Literal::String(
10157 "1970-01-01".to_string(),
10158 ),
10159 )),
10160 to: DataType::Date,
10161 double_colon_syntax: false,
10162 trailing_comments: vec![],
10163 format: None,
10164 default: None,
10165 inferred_type: None,
10166 }));
10167 let add_expr =
10168 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
10169 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
10170 this: add_expr,
10171 unit: d.unit,
10172 }));
10173 Ok(Expression::Cast(Box::new(Cast {
10174 this: inner,
10175 to: cast_type,
10176 double_colon_syntax: false,
10177 trailing_comments: vec![],
10178 format: None,
10179 default: None,
10180 inferred_type: None,
10181 })))
10182 } else {
10183 let inner = Expression::DateTrunc(Box::new(*d));
10184 Ok(Expression::Cast(Box::new(Cast {
10185 this: inner,
10186 to: cast_type,
10187 double_colon_syntax: false,
10188 trailing_comments: vec![],
10189 format: None,
10190 default: None,
10191 inferred_type: None,
10192 })))
10193 }
10194 } else {
10195 Ok(Expression::DateTrunc(d))
10196 }
10197 }
10198 Expression::Function(f) if f.args.len() == 2 => {
10199 // Function-based DATE_TRUNC(unit, expr)
10200 let input_type = match &f.args[1] {
10201 Expression::Cast(c) => Some(c.to.clone()),
10202 _ => None,
10203 };
10204 if let Some(cast_type) = input_type {
10205 let is_time = matches!(cast_type, DataType::Time { .. });
10206 if is_time {
10207 let date_expr = Expression::Cast(Box::new(Cast {
10208 this: Expression::Literal(Box::new(
10209 crate::expressions::Literal::String(
10210 "1970-01-01".to_string(),
10211 ),
10212 )),
10213 to: DataType::Date,
10214 double_colon_syntax: false,
10215 trailing_comments: vec![],
10216 format: None,
10217 default: None,
10218 inferred_type: None,
10219 }));
10220 let mut args = f.args;
10221 let unit_arg = args.remove(0);
10222 let time_expr = args.remove(0);
10223 let add_expr = Expression::Add(Box::new(BinaryOp::new(
10224 date_expr, time_expr,
10225 )));
10226 let inner = Expression::Function(Box::new(Function::new(
10227 "DATE_TRUNC".to_string(),
10228 vec![unit_arg, add_expr],
10229 )));
10230 Ok(Expression::Cast(Box::new(Cast {
10231 this: inner,
10232 to: cast_type,
10233 double_colon_syntax: false,
10234 trailing_comments: vec![],
10235 format: None,
10236 default: None,
10237 inferred_type: None,
10238 })))
10239 } else {
10240 // Wrap the function in CAST
10241 Ok(Expression::Cast(Box::new(Cast {
10242 this: Expression::Function(f),
10243 to: cast_type,
10244 double_colon_syntax: false,
10245 trailing_comments: vec![],
10246 format: None,
10247 default: None,
10248 inferred_type: None,
10249 })))
10250 }
10251 } else {
10252 Ok(Expression::Function(f))
10253 }
10254 }
10255 other => Ok(other),
10256 }
10257 }
10258
10259 Action::RegexpReplaceSnowflakeToDuckDB => {
10260 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
10261 if let Expression::Function(f) = e {
10262 let mut args = f.args;
10263 let subject = args.remove(0);
10264 let pattern = args.remove(0);
10265 let replacement = args.remove(0);
10266 Ok(Expression::Function(Box::new(Function::new(
10267 "REGEXP_REPLACE".to_string(),
10268 vec![
10269 subject,
10270 pattern,
10271 replacement,
10272 Expression::Literal(Box::new(crate::expressions::Literal::String(
10273 "g".to_string(),
10274 ))),
10275 ],
10276 ))))
10277 } else {
10278 Ok(e)
10279 }
10280 }
10281
10282 Action::RegexpReplacePositionSnowflakeToDuckDB => {
10283 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
10284 // pos=1, occ=1 -> REGEXP_REPLACE(s, p, r) (single replace, no 'g')
10285 // pos>1, occ=0 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r, 'g')
10286 // pos>1, occ=1 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r)
10287 // pos=1, occ=0 -> REGEXP_REPLACE(s, p, r, 'g') (replace all)
10288 if let Expression::Function(f) = e {
10289 let mut args = f.args;
10290 let subject = args.remove(0);
10291 let pattern = args.remove(0);
10292 let replacement = args.remove(0);
10293 let position = args.remove(0);
10294 let occurrence = args.remove(0);
10295
10296 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10297 let is_occ_0 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10298 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10299
10300 if is_pos_1 && is_occ_1 {
10301 // REGEXP_REPLACE(s, p, r) - single replace, no flags
10302 Ok(Expression::Function(Box::new(Function::new(
10303 "REGEXP_REPLACE".to_string(),
10304 vec![subject, pattern, replacement],
10305 ))))
10306 } else if is_pos_1 && is_occ_0 {
10307 // REGEXP_REPLACE(s, p, r, 'g') - global replace
10308 Ok(Expression::Function(Box::new(Function::new(
10309 "REGEXP_REPLACE".to_string(),
10310 vec![
10311 subject,
10312 pattern,
10313 replacement,
10314 Expression::Literal(Box::new(Literal::String("g".to_string()))),
10315 ],
10316 ))))
10317 } else {
10318 // pos>1: SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r[, 'g'])
10319 // Pre-compute pos-1 when position is a numeric literal
10320 let pos_minus_1 = if let Expression::Literal(ref lit) = position {
10321 if let Literal::Number(ref n) = lit.as_ref() {
10322 if let Ok(val) = n.parse::<i64>() {
10323 Expression::number(val - 1)
10324 } else {
10325 Expression::Sub(Box::new(BinaryOp::new(
10326 position.clone(),
10327 Expression::number(1),
10328 )))
10329 }
10330 } else {
10331 position.clone()
10332 }
10333 } else {
10334 Expression::Sub(Box::new(BinaryOp::new(
10335 position.clone(),
10336 Expression::number(1),
10337 )))
10338 };
10339 let prefix = Expression::Function(Box::new(Function::new(
10340 "SUBSTRING".to_string(),
10341 vec![subject.clone(), Expression::number(1), pos_minus_1],
10342 )));
10343 let suffix_subject = Expression::Function(Box::new(Function::new(
10344 "SUBSTRING".to_string(),
10345 vec![subject, position],
10346 )));
10347 let mut replace_args = vec![suffix_subject, pattern, replacement];
10348 if is_occ_0 {
10349 replace_args.push(Expression::Literal(Box::new(Literal::String(
10350 "g".to_string(),
10351 ))));
10352 }
10353 let replace_expr = Expression::Function(Box::new(Function::new(
10354 "REGEXP_REPLACE".to_string(),
10355 replace_args,
10356 )));
10357 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
10358 this: Box::new(prefix),
10359 expression: Box::new(replace_expr),
10360 safe: None,
10361 })))
10362 }
10363 } else {
10364 Ok(e)
10365 }
10366 }
10367
10368 Action::RegexpSubstrSnowflakeToDuckDB => {
10369 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
10370 if let Expression::Function(f) = e {
10371 let mut args = f.args;
10372 let arg_count = args.len();
10373 match arg_count {
10374 // REGEXP_SUBSTR(s, p) -> REGEXP_EXTRACT(s, p)
10375 0..=2 => Ok(Expression::Function(Box::new(Function::new(
10376 "REGEXP_EXTRACT".to_string(),
10377 args,
10378 )))),
10379 // REGEXP_SUBSTR(s, p, pos) -> REGEXP_EXTRACT(NULLIF(SUBSTRING(s, pos), ''), p)
10380 3 => {
10381 let subject = args.remove(0);
10382 let pattern = args.remove(0);
10383 let position = args.remove(0);
10384 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10385 if is_pos_1 {
10386 Ok(Expression::Function(Box::new(Function::new(
10387 "REGEXP_EXTRACT".to_string(),
10388 vec![subject, pattern],
10389 ))))
10390 } else {
10391 let substring_expr =
10392 Expression::Function(Box::new(Function::new(
10393 "SUBSTRING".to_string(),
10394 vec![subject, position],
10395 )));
10396 let nullif_expr =
10397 Expression::Function(Box::new(Function::new(
10398 "NULLIF".to_string(),
10399 vec![
10400 substring_expr,
10401 Expression::Literal(Box::new(Literal::String(
10402 String::new(),
10403 ))),
10404 ],
10405 )));
10406 Ok(Expression::Function(Box::new(Function::new(
10407 "REGEXP_EXTRACT".to_string(),
10408 vec![nullif_expr, pattern],
10409 ))))
10410 }
10411 }
10412 // REGEXP_SUBSTR(s, p, pos, occ) -> depends on pos and occ
10413 4 => {
10414 let subject = args.remove(0);
10415 let pattern = args.remove(0);
10416 let position = args.remove(0);
10417 let occurrence = args.remove(0);
10418 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10419 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10420
10421 let effective_subject = if is_pos_1 {
10422 subject
10423 } else {
10424 let substring_expr =
10425 Expression::Function(Box::new(Function::new(
10426 "SUBSTRING".to_string(),
10427 vec![subject, position],
10428 )));
10429 Expression::Function(Box::new(Function::new(
10430 "NULLIF".to_string(),
10431 vec![
10432 substring_expr,
10433 Expression::Literal(Box::new(Literal::String(
10434 String::new(),
10435 ))),
10436 ],
10437 )))
10438 };
10439
10440 if is_occ_1 {
10441 Ok(Expression::Function(Box::new(Function::new(
10442 "REGEXP_EXTRACT".to_string(),
10443 vec![effective_subject, pattern],
10444 ))))
10445 } else {
10446 // ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, p), occ)
10447 let extract_all =
10448 Expression::Function(Box::new(Function::new(
10449 "REGEXP_EXTRACT_ALL".to_string(),
10450 vec![effective_subject, pattern],
10451 )));
10452 Ok(Expression::Function(Box::new(Function::new(
10453 "ARRAY_EXTRACT".to_string(),
10454 vec![extract_all, occurrence],
10455 ))))
10456 }
10457 }
10458 // REGEXP_SUBSTR(s, p, 1, 1, 'e') -> REGEXP_EXTRACT(s, p)
10459 5 => {
10460 let subject = args.remove(0);
10461 let pattern = args.remove(0);
10462 let _position = args.remove(0);
10463 let _occurrence = args.remove(0);
10464 let _flags = args.remove(0);
10465 // Strip 'e' flag, convert to REGEXP_EXTRACT
10466 Ok(Expression::Function(Box::new(Function::new(
10467 "REGEXP_EXTRACT".to_string(),
10468 vec![subject, pattern],
10469 ))))
10470 }
10471 // REGEXP_SUBSTR(s, p, 1, 1, 'e', group) -> REGEXP_EXTRACT(s, p[, group])
10472 _ => {
10473 let subject = args.remove(0);
10474 let pattern = args.remove(0);
10475 let _position = args.remove(0);
10476 let _occurrence = args.remove(0);
10477 let _flags = args.remove(0);
10478 let group = args.remove(0);
10479 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10480 if is_group_0 {
10481 // Strip group=0 (default)
10482 Ok(Expression::Function(Box::new(Function::new(
10483 "REGEXP_EXTRACT".to_string(),
10484 vec![subject, pattern],
10485 ))))
10486 } else {
10487 Ok(Expression::Function(Box::new(Function::new(
10488 "REGEXP_EXTRACT".to_string(),
10489 vec![subject, pattern, group],
10490 ))))
10491 }
10492 }
10493 }
10494 } else {
10495 Ok(e)
10496 }
10497 }
10498
10499 Action::RegexpSubstrSnowflakeIdentity => {
10500 // Snowflake→Snowflake: REGEXP_SUBSTR/REGEXP_SUBSTR_ALL with 6 args
10501 // Strip trailing group=0
10502 if let Expression::Function(f) = e {
10503 let func_name = f.name.clone();
10504 let mut args = f.args;
10505 if args.len() == 6 {
10506 let is_group_0 = matches!(&args[5], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10507 if is_group_0 {
10508 args.truncate(5);
10509 }
10510 }
10511 Ok(Expression::Function(Box::new(Function::new(
10512 func_name, args,
10513 ))))
10514 } else {
10515 Ok(e)
10516 }
10517 }
10518
10519 Action::RegexpSubstrAllSnowflakeToDuckDB => {
10520 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
10521 if let Expression::Function(f) = e {
10522 let mut args = f.args;
10523 let arg_count = args.len();
10524 match arg_count {
10525 // REGEXP_SUBSTR_ALL(s, p) -> REGEXP_EXTRACT_ALL(s, p)
10526 0..=2 => Ok(Expression::Function(Box::new(Function::new(
10527 "REGEXP_EXTRACT_ALL".to_string(),
10528 args,
10529 )))),
10530 // REGEXP_SUBSTR_ALL(s, p, pos) -> REGEXP_EXTRACT_ALL(SUBSTRING(s, pos), p)
10531 3 => {
10532 let subject = args.remove(0);
10533 let pattern = args.remove(0);
10534 let position = args.remove(0);
10535 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10536 if is_pos_1 {
10537 Ok(Expression::Function(Box::new(Function::new(
10538 "REGEXP_EXTRACT_ALL".to_string(),
10539 vec![subject, pattern],
10540 ))))
10541 } else {
10542 let substring_expr =
10543 Expression::Function(Box::new(Function::new(
10544 "SUBSTRING".to_string(),
10545 vec![subject, position],
10546 )));
10547 Ok(Expression::Function(Box::new(Function::new(
10548 "REGEXP_EXTRACT_ALL".to_string(),
10549 vec![substring_expr, pattern],
10550 ))))
10551 }
10552 }
10553 // REGEXP_SUBSTR_ALL(s, p, 1, occ) -> REGEXP_EXTRACT_ALL(s, p)[occ:]
10554 4 => {
10555 let subject = args.remove(0);
10556 let pattern = args.remove(0);
10557 let position = args.remove(0);
10558 let occurrence = args.remove(0);
10559 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10560 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10561
10562 let effective_subject = if is_pos_1 {
10563 subject
10564 } else {
10565 Expression::Function(Box::new(Function::new(
10566 "SUBSTRING".to_string(),
10567 vec![subject, position],
10568 )))
10569 };
10570
10571 if is_occ_1 {
10572 Ok(Expression::Function(Box::new(Function::new(
10573 "REGEXP_EXTRACT_ALL".to_string(),
10574 vec![effective_subject, pattern],
10575 ))))
10576 } else {
10577 // REGEXP_EXTRACT_ALL(s, p)[occ:]
10578 let extract_all =
10579 Expression::Function(Box::new(Function::new(
10580 "REGEXP_EXTRACT_ALL".to_string(),
10581 vec![effective_subject, pattern],
10582 )));
10583 Ok(Expression::ArraySlice(Box::new(
10584 crate::expressions::ArraySlice {
10585 this: extract_all,
10586 start: Some(occurrence),
10587 end: None,
10588 },
10589 )))
10590 }
10591 }
10592 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e') -> REGEXP_EXTRACT_ALL(s, p)
10593 5 => {
10594 let subject = args.remove(0);
10595 let pattern = args.remove(0);
10596 let _position = args.remove(0);
10597 let _occurrence = args.remove(0);
10598 let _flags = args.remove(0);
10599 Ok(Expression::Function(Box::new(Function::new(
10600 "REGEXP_EXTRACT_ALL".to_string(),
10601 vec![subject, pattern],
10602 ))))
10603 }
10604 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e', 0) -> REGEXP_EXTRACT_ALL(s, p)
10605 _ => {
10606 let subject = args.remove(0);
10607 let pattern = args.remove(0);
10608 let _position = args.remove(0);
10609 let _occurrence = args.remove(0);
10610 let _flags = args.remove(0);
10611 let group = args.remove(0);
10612 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10613 if is_group_0 {
10614 Ok(Expression::Function(Box::new(Function::new(
10615 "REGEXP_EXTRACT_ALL".to_string(),
10616 vec![subject, pattern],
10617 ))))
10618 } else {
10619 Ok(Expression::Function(Box::new(Function::new(
10620 "REGEXP_EXTRACT_ALL".to_string(),
10621 vec![subject, pattern, group],
10622 ))))
10623 }
10624 }
10625 }
10626 } else {
10627 Ok(e)
10628 }
10629 }
10630
10631 Action::RegexpCountSnowflakeToDuckDB => {
10632 // Snowflake REGEXP_COUNT(s, p[, pos[, flags]]) ->
10633 // DuckDB: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
10634 if let Expression::Function(f) = e {
10635 let mut args = f.args;
10636 let arg_count = args.len();
10637 let subject = args.remove(0);
10638 let pattern = args.remove(0);
10639
10640 // Handle position arg
10641 let effective_subject = if arg_count >= 3 {
10642 let position = args.remove(0);
10643 Expression::Function(Box::new(Function::new(
10644 "SUBSTRING".to_string(),
10645 vec![subject, position],
10646 )))
10647 } else {
10648 subject
10649 };
10650
10651 // Handle flags arg -> embed as (?flags) prefix in pattern
10652 let effective_pattern = if arg_count >= 4 {
10653 let flags = args.remove(0);
10654 match &flags {
10655 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(f_str) if !f_str.is_empty()) =>
10656 {
10657 let Literal::String(f_str) = lit.as_ref() else {
10658 unreachable!()
10659 };
10660 // Always use concatenation: '(?flags)' || pattern
10661 let prefix = Expression::Literal(Box::new(Literal::String(
10662 format!("(?{})", f_str),
10663 )));
10664 Expression::DPipe(Box::new(crate::expressions::DPipe {
10665 this: Box::new(prefix),
10666 expression: Box::new(pattern.clone()),
10667 safe: None,
10668 }))
10669 }
10670 _ => pattern.clone(),
10671 }
10672 } else {
10673 pattern.clone()
10674 };
10675
10676 // Build: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
10677 let extract_all = Expression::Function(Box::new(Function::new(
10678 "REGEXP_EXTRACT_ALL".to_string(),
10679 vec![effective_subject, effective_pattern.clone()],
10680 )));
10681 let length_expr =
10682 Expression::Length(Box::new(crate::expressions::UnaryFunc {
10683 this: extract_all,
10684 original_name: None,
10685 inferred_type: None,
10686 }));
10687 let condition = Expression::Eq(Box::new(BinaryOp::new(
10688 effective_pattern,
10689 Expression::Literal(Box::new(Literal::String(String::new()))),
10690 )));
10691 Ok(Expression::Case(Box::new(Case {
10692 operand: None,
10693 whens: vec![(condition, Expression::number(0))],
10694 else_: Some(length_expr),
10695 comments: vec![],
10696 inferred_type: None,
10697 })))
10698 } else {
10699 Ok(e)
10700 }
10701 }
10702
10703 Action::RegexpInstrSnowflakeToDuckDB => {
10704 // Snowflake REGEXP_INSTR(s, p[, pos[, occ[, option[, flags[, group]]]]]) ->
10705 // DuckDB: CASE WHEN s IS NULL OR p IS NULL [OR ...] THEN NULL
10706 // WHEN p = '' THEN 0
10707 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
10708 // ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(eff_s, eff_p)[1:occ], x -> LENGTH(x))), 0)
10709 // + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(eff_s, eff_p)[1:occ - 1], x -> LENGTH(x))), 0)
10710 // + pos_offset
10711 // END
10712 if let Expression::Function(f) = e {
10713 let mut args = f.args;
10714 let subject = args.remove(0);
10715 let pattern = if !args.is_empty() {
10716 args.remove(0)
10717 } else {
10718 Expression::Literal(Box::new(Literal::String(String::new())))
10719 };
10720
10721 // Collect all original args for NULL checks
10722 let position = if !args.is_empty() {
10723 Some(args.remove(0))
10724 } else {
10725 None
10726 };
10727 let occurrence = if !args.is_empty() {
10728 Some(args.remove(0))
10729 } else {
10730 None
10731 };
10732 let option = if !args.is_empty() {
10733 Some(args.remove(0))
10734 } else {
10735 None
10736 };
10737 let flags = if !args.is_empty() {
10738 Some(args.remove(0))
10739 } else {
10740 None
10741 };
10742 let _group = if !args.is_empty() {
10743 Some(args.remove(0))
10744 } else {
10745 None
10746 };
10747
10748 let is_pos_1 = position.as_ref().map_or(true, |p| matches!(p, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1")));
10749 let occurrence_expr = occurrence.clone().unwrap_or(Expression::number(1));
10750
10751 // Build NULL check: subject IS NULL OR pattern IS NULL [OR pos IS NULL ...]
10752 let mut null_checks: Vec<Expression> = vec![
10753 Expression::Is(Box::new(BinaryOp::new(
10754 subject.clone(),
10755 Expression::Null(Null),
10756 ))),
10757 Expression::Is(Box::new(BinaryOp::new(
10758 pattern.clone(),
10759 Expression::Null(Null),
10760 ))),
10761 ];
10762 // Add NULL checks for all provided optional args
10763 for opt_arg in [&position, &occurrence, &option, &flags].iter() {
10764 if let Some(arg) = opt_arg {
10765 null_checks.push(Expression::Is(Box::new(BinaryOp::new(
10766 (*arg).clone(),
10767 Expression::Null(Null),
10768 ))));
10769 }
10770 }
10771 // Chain with OR
10772 let null_condition = null_checks
10773 .into_iter()
10774 .reduce(|a, b| Expression::Or(Box::new(BinaryOp::new(a, b))))
10775 .unwrap();
10776
10777 // Effective subject (apply position offset)
10778 let effective_subject = if is_pos_1 {
10779 subject.clone()
10780 } else {
10781 let pos = position.clone().unwrap_or(Expression::number(1));
10782 Expression::Function(Box::new(Function::new(
10783 "SUBSTRING".to_string(),
10784 vec![subject.clone(), pos],
10785 )))
10786 };
10787
10788 // Effective pattern (apply flags if present)
10789 let effective_pattern = if let Some(ref fl) = flags {
10790 if let Expression::Literal(lit) = fl {
10791 if let Literal::String(f_str) = lit.as_ref() {
10792 if !f_str.is_empty() {
10793 let prefix = Expression::Literal(Box::new(
10794 Literal::String(format!("(?{})", f_str)),
10795 ));
10796 Expression::DPipe(Box::new(crate::expressions::DPipe {
10797 this: Box::new(prefix),
10798 expression: Box::new(pattern.clone()),
10799 safe: None,
10800 }))
10801 } else {
10802 pattern.clone()
10803 }
10804 } else {
10805 fl.clone()
10806 }
10807 } else {
10808 pattern.clone()
10809 }
10810 } else {
10811 pattern.clone()
10812 };
10813
10814 // WHEN pattern = '' THEN 0
10815 let empty_pattern_check = Expression::Eq(Box::new(BinaryOp::new(
10816 effective_pattern.clone(),
10817 Expression::Literal(Box::new(Literal::String(String::new()))),
10818 )));
10819
10820 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
10821 let match_count_check = Expression::Lt(Box::new(BinaryOp::new(
10822 Expression::Length(Box::new(crate::expressions::UnaryFunc {
10823 this: Expression::Function(Box::new(Function::new(
10824 "REGEXP_EXTRACT_ALL".to_string(),
10825 vec![effective_subject.clone(), effective_pattern.clone()],
10826 ))),
10827 original_name: None,
10828 inferred_type: None,
10829 })),
10830 occurrence_expr.clone(),
10831 )));
10832
10833 // Helper: build LENGTH lambda for LIST_TRANSFORM
10834 let make_len_lambda = || {
10835 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
10836 parameters: vec![crate::expressions::Identifier::new("x")],
10837 body: Expression::Length(Box::new(crate::expressions::UnaryFunc {
10838 this: Expression::Identifier(
10839 crate::expressions::Identifier::new("x"),
10840 ),
10841 original_name: None,
10842 inferred_type: None,
10843 })),
10844 colon: false,
10845 parameter_types: vec![],
10846 }))
10847 };
10848
10849 // COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(s, p)[1:occ], x -> LENGTH(x))), 0)
10850 let split_sliced =
10851 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
10852 this: Expression::Function(Box::new(Function::new(
10853 "STRING_SPLIT_REGEX".to_string(),
10854 vec![effective_subject.clone(), effective_pattern.clone()],
10855 ))),
10856 start: Some(Expression::number(1)),
10857 end: Some(occurrence_expr.clone()),
10858 }));
10859 let split_sum = Expression::Function(Box::new(Function::new(
10860 "COALESCE".to_string(),
10861 vec![
10862 Expression::Function(Box::new(Function::new(
10863 "LIST_SUM".to_string(),
10864 vec![Expression::Function(Box::new(Function::new(
10865 "LIST_TRANSFORM".to_string(),
10866 vec![split_sliced, make_len_lambda()],
10867 )))],
10868 ))),
10869 Expression::number(0),
10870 ],
10871 )));
10872
10873 // COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(s, p)[1:occ - 1], x -> LENGTH(x))), 0)
10874 let extract_sliced =
10875 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
10876 this: Expression::Function(Box::new(Function::new(
10877 "REGEXP_EXTRACT_ALL".to_string(),
10878 vec![effective_subject.clone(), effective_pattern.clone()],
10879 ))),
10880 start: Some(Expression::number(1)),
10881 end: Some(Expression::Sub(Box::new(BinaryOp::new(
10882 occurrence_expr.clone(),
10883 Expression::number(1),
10884 )))),
10885 }));
10886 let extract_sum = Expression::Function(Box::new(Function::new(
10887 "COALESCE".to_string(),
10888 vec![
10889 Expression::Function(Box::new(Function::new(
10890 "LIST_SUM".to_string(),
10891 vec![Expression::Function(Box::new(Function::new(
10892 "LIST_TRANSFORM".to_string(),
10893 vec![extract_sliced, make_len_lambda()],
10894 )))],
10895 ))),
10896 Expression::number(0),
10897 ],
10898 )));
10899
10900 // Position offset: pos - 1 when pos > 1, else 0
10901 let pos_offset: Expression = if !is_pos_1 {
10902 let pos = position.clone().unwrap_or(Expression::number(1));
10903 Expression::Sub(Box::new(BinaryOp::new(pos, Expression::number(1))))
10904 } else {
10905 Expression::number(0)
10906 };
10907
10908 // ELSE: 1 + split_sum + extract_sum + pos_offset
10909 let else_expr = Expression::Add(Box::new(BinaryOp::new(
10910 Expression::Add(Box::new(BinaryOp::new(
10911 Expression::Add(Box::new(BinaryOp::new(
10912 Expression::number(1),
10913 split_sum,
10914 ))),
10915 extract_sum,
10916 ))),
10917 pos_offset,
10918 )));
10919
10920 Ok(Expression::Case(Box::new(Case {
10921 operand: None,
10922 whens: vec![
10923 (null_condition, Expression::Null(Null)),
10924 (empty_pattern_check, Expression::number(0)),
10925 (match_count_check, Expression::number(0)),
10926 ],
10927 else_: Some(else_expr),
10928 comments: vec![],
10929 inferred_type: None,
10930 })))
10931 } else {
10932 Ok(e)
10933 }
10934 }
10935
10936 Action::RlikeSnowflakeToDuckDB => {
10937 // Snowflake RLIKE(a, b[, flags]) -> DuckDB REGEXP_FULL_MATCH(a, b[, flags])
10938 // Both do full-string matching, so no anchoring needed
10939 let (subject, pattern, flags) = match e {
10940 Expression::RegexpLike(ref rl) => {
10941 (rl.this.clone(), rl.pattern.clone(), rl.flags.clone())
10942 }
10943 Expression::Function(ref f) if f.args.len() >= 2 => {
10944 let s = f.args[0].clone();
10945 let p = f.args[1].clone();
10946 let fl = f.args.get(2).cloned();
10947 (s, p, fl)
10948 }
10949 _ => return Ok(e),
10950 };
10951
10952 let mut result_args = vec![subject, pattern];
10953 if let Some(fl) = flags {
10954 result_args.push(fl);
10955 }
10956 Ok(Expression::Function(Box::new(Function::new(
10957 "REGEXP_FULL_MATCH".to_string(),
10958 result_args,
10959 ))))
10960 }
10961
10962 Action::RegexpExtractAllToSnowflake => {
10963 // BigQuery REGEXP_EXTRACT_ALL(s, p) -> Snowflake REGEXP_SUBSTR_ALL(s, p)
10964 // With capture group: REGEXP_SUBSTR_ALL(s, p, 1, 1, 'c', 1)
10965 if let Expression::Function(f) = e {
10966 let mut args = f.args;
10967 if args.len() >= 2 {
10968 let str_expr = args.remove(0);
10969 let pattern = args.remove(0);
10970
10971 let has_groups = match &pattern {
10972 Expression::Literal(lit)
10973 if matches!(lit.as_ref(), Literal::String(_)) =>
10974 {
10975 let Literal::String(s) = lit.as_ref() else {
10976 unreachable!()
10977 };
10978 s.contains('(') && s.contains(')')
10979 }
10980 _ => false,
10981 };
10982
10983 if has_groups {
10984 Ok(Expression::Function(Box::new(Function::new(
10985 "REGEXP_SUBSTR_ALL".to_string(),
10986 vec![
10987 str_expr,
10988 pattern,
10989 Expression::number(1),
10990 Expression::number(1),
10991 Expression::Literal(Box::new(Literal::String(
10992 "c".to_string(),
10993 ))),
10994 Expression::number(1),
10995 ],
10996 ))))
10997 } else {
10998 Ok(Expression::Function(Box::new(Function::new(
10999 "REGEXP_SUBSTR_ALL".to_string(),
11000 vec![str_expr, pattern],
11001 ))))
11002 }
11003 } else {
11004 Ok(Expression::Function(Box::new(Function::new(
11005 "REGEXP_SUBSTR_ALL".to_string(),
11006 args,
11007 ))))
11008 }
11009 } else {
11010 Ok(e)
11011 }
11012 }
11013
11014 Action::SetToVariable => {
11015 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
11016 if let Expression::SetStatement(mut s) = e {
11017 for item in &mut s.items {
11018 if item.kind.is_none() {
11019 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
11020 let already_variable = match &item.name {
11021 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
11022 _ => false,
11023 };
11024 if already_variable {
11025 // Extract the actual name and set kind
11026 if let Expression::Identifier(ref mut id) = item.name {
11027 let actual_name = id.name["VARIABLE ".len()..].to_string();
11028 id.name = actual_name;
11029 }
11030 }
11031 item.kind = Some("VARIABLE".to_string());
11032 }
11033 }
11034 Ok(Expression::SetStatement(s))
11035 } else {
11036 Ok(e)
11037 }
11038 }
11039
11040 Action::ConvertTimezoneToExpr => {
11041 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
11042 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
11043 if let Expression::Function(f) = e {
11044 if f.args.len() == 2 {
11045 let mut args = f.args;
11046 let target_tz = args.remove(0);
11047 let timestamp = args.remove(0);
11048 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
11049 source_tz: None,
11050 target_tz: Some(Box::new(target_tz)),
11051 timestamp: Some(Box::new(timestamp)),
11052 options: vec![],
11053 })))
11054 } else if f.args.len() == 3 {
11055 let mut args = f.args;
11056 let source_tz = args.remove(0);
11057 let target_tz = args.remove(0);
11058 let timestamp = args.remove(0);
11059 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
11060 source_tz: Some(Box::new(source_tz)),
11061 target_tz: Some(Box::new(target_tz)),
11062 timestamp: Some(Box::new(timestamp)),
11063 options: vec![],
11064 })))
11065 } else {
11066 Ok(Expression::Function(f))
11067 }
11068 } else {
11069 Ok(e)
11070 }
11071 }
11072
11073 Action::BigQueryCastType => {
11074 // Convert BigQuery types to standard SQL types
11075 if let Expression::DataType(dt) = e {
11076 match dt {
11077 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
11078 Ok(Expression::DataType(DataType::BigInt { length: None }))
11079 }
11080 DataType::Custom { ref name }
11081 if name.eq_ignore_ascii_case("FLOAT64") =>
11082 {
11083 Ok(Expression::DataType(DataType::Double {
11084 precision: None,
11085 scale: None,
11086 }))
11087 }
11088 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
11089 Ok(Expression::DataType(DataType::Boolean))
11090 }
11091 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
11092 Ok(Expression::DataType(DataType::VarBinary { length: None }))
11093 }
11094 DataType::Custom { ref name }
11095 if name.eq_ignore_ascii_case("NUMERIC") =>
11096 {
11097 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
11098 // default precision (18, 3) being added to bare DECIMAL
11099 if matches!(target, DialectType::DuckDB) {
11100 Ok(Expression::DataType(DataType::Custom {
11101 name: "DECIMAL".to_string(),
11102 }))
11103 } else {
11104 Ok(Expression::DataType(DataType::Decimal {
11105 precision: None,
11106 scale: None,
11107 }))
11108 }
11109 }
11110 DataType::Custom { ref name }
11111 if name.eq_ignore_ascii_case("STRING") =>
11112 {
11113 Ok(Expression::DataType(DataType::String { length: None }))
11114 }
11115 DataType::Custom { ref name }
11116 if name.eq_ignore_ascii_case("DATETIME") =>
11117 {
11118 Ok(Expression::DataType(DataType::Timestamp {
11119 precision: None,
11120 timezone: false,
11121 }))
11122 }
11123 _ => Ok(Expression::DataType(dt)),
11124 }
11125 } else {
11126 Ok(e)
11127 }
11128 }
11129
11130 Action::BigQuerySafeDivide => {
11131 // Convert SafeDivide expression to IF/CASE form for most targets
11132 if let Expression::SafeDivide(sd) = e {
11133 let x = *sd.this;
11134 let y = *sd.expression;
11135 // Wrap x and y in parens if they're complex expressions
11136 let y_ref = match &y {
11137 Expression::Column(_)
11138 | Expression::Literal(_)
11139 | Expression::Identifier(_) => y.clone(),
11140 _ => Expression::Paren(Box::new(Paren {
11141 this: y.clone(),
11142 trailing_comments: vec![],
11143 })),
11144 };
11145 let x_ref = match &x {
11146 Expression::Column(_)
11147 | Expression::Literal(_)
11148 | Expression::Identifier(_) => x.clone(),
11149 _ => Expression::Paren(Box::new(Paren {
11150 this: x.clone(),
11151 trailing_comments: vec![],
11152 })),
11153 };
11154 let condition = Expression::Neq(Box::new(BinaryOp::new(
11155 y_ref.clone(),
11156 Expression::number(0),
11157 )));
11158 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
11159
11160 if matches!(target, DialectType::Spark | DialectType::Databricks) {
11161 Ok(Expression::Function(Box::new(Function::new(
11162 "TRY_DIVIDE".to_string(),
11163 vec![x, y],
11164 ))))
11165 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
11166 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
11167 let cast_x = Expression::Cast(Box::new(Cast {
11168 this: match &x {
11169 Expression::Column(_)
11170 | Expression::Literal(_)
11171 | Expression::Identifier(_) => x,
11172 _ => Expression::Paren(Box::new(Paren {
11173 this: x,
11174 trailing_comments: vec![],
11175 })),
11176 },
11177 to: DataType::Double {
11178 precision: None,
11179 scale: None,
11180 },
11181 trailing_comments: vec![],
11182 double_colon_syntax: false,
11183 format: None,
11184 default: None,
11185 inferred_type: None,
11186 }));
11187 let cast_div = Expression::Div(Box::new(BinaryOp::new(
11188 cast_x,
11189 match &y {
11190 Expression::Column(_)
11191 | Expression::Literal(_)
11192 | Expression::Identifier(_) => y,
11193 _ => Expression::Paren(Box::new(Paren {
11194 this: y,
11195 trailing_comments: vec![],
11196 })),
11197 },
11198 )));
11199 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
11200 condition,
11201 true_value: cast_div,
11202 false_value: Some(Expression::Null(Null)),
11203 original_name: None,
11204 inferred_type: None,
11205 })))
11206 } else if matches!(target, DialectType::PostgreSQL) {
11207 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
11208 let cast_x = Expression::Cast(Box::new(Cast {
11209 this: match &x {
11210 Expression::Column(_)
11211 | Expression::Literal(_)
11212 | Expression::Identifier(_) => x,
11213 _ => Expression::Paren(Box::new(Paren {
11214 this: x,
11215 trailing_comments: vec![],
11216 })),
11217 },
11218 to: DataType::Custom {
11219 name: "DOUBLE PRECISION".to_string(),
11220 },
11221 trailing_comments: vec![],
11222 double_colon_syntax: false,
11223 format: None,
11224 default: None,
11225 inferred_type: None,
11226 }));
11227 let y_paren = match &y {
11228 Expression::Column(_)
11229 | Expression::Literal(_)
11230 | Expression::Identifier(_) => y,
11231 _ => Expression::Paren(Box::new(Paren {
11232 this: y,
11233 trailing_comments: vec![],
11234 })),
11235 };
11236 let cast_div =
11237 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
11238 Ok(Expression::Case(Box::new(Case {
11239 operand: None,
11240 whens: vec![(condition, cast_div)],
11241 else_: Some(Expression::Null(Null)),
11242 comments: Vec::new(),
11243 inferred_type: None,
11244 })))
11245 } else if matches!(target, DialectType::DuckDB) {
11246 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
11247 Ok(Expression::Case(Box::new(Case {
11248 operand: None,
11249 whens: vec![(condition, div_expr)],
11250 else_: Some(Expression::Null(Null)),
11251 comments: Vec::new(),
11252 inferred_type: None,
11253 })))
11254 } else if matches!(target, DialectType::Snowflake) {
11255 // Snowflake: IFF(y <> 0, x / y, NULL)
11256 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
11257 condition,
11258 true_value: div_expr,
11259 false_value: Some(Expression::Null(Null)),
11260 original_name: Some("IFF".to_string()),
11261 inferred_type: None,
11262 })))
11263 } else {
11264 // All others: IF(y <> 0, x / y, NULL)
11265 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
11266 condition,
11267 true_value: div_expr,
11268 false_value: Some(Expression::Null(Null)),
11269 original_name: None,
11270 inferred_type: None,
11271 })))
11272 }
11273 } else {
11274 Ok(e)
11275 }
11276 }
11277
11278 Action::BigQueryLastDayStripUnit => {
11279 if let Expression::LastDay(mut ld) = e {
11280 ld.unit = None; // Strip the unit (MONTH is default)
11281 match target {
11282 DialectType::PostgreSQL => {
11283 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
11284 let date_trunc = Expression::Function(Box::new(Function::new(
11285 "DATE_TRUNC".to_string(),
11286 vec![
11287 Expression::Literal(Box::new(
11288 crate::expressions::Literal::String(
11289 "MONTH".to_string(),
11290 ),
11291 )),
11292 ld.this.clone(),
11293 ],
11294 )));
11295 let plus_month =
11296 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
11297 date_trunc,
11298 Expression::Interval(Box::new(
11299 crate::expressions::Interval {
11300 this: Some(Expression::Literal(Box::new(
11301 crate::expressions::Literal::String(
11302 "1 MONTH".to_string(),
11303 ),
11304 ))),
11305 unit: None,
11306 },
11307 )),
11308 )));
11309 let minus_day =
11310 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
11311 plus_month,
11312 Expression::Interval(Box::new(
11313 crate::expressions::Interval {
11314 this: Some(Expression::Literal(Box::new(
11315 crate::expressions::Literal::String(
11316 "1 DAY".to_string(),
11317 ),
11318 ))),
11319 unit: None,
11320 },
11321 )),
11322 )));
11323 Ok(Expression::Cast(Box::new(Cast {
11324 this: minus_day,
11325 to: DataType::Date,
11326 trailing_comments: vec![],
11327 double_colon_syntax: false,
11328 format: None,
11329 default: None,
11330 inferred_type: None,
11331 })))
11332 }
11333 DialectType::Presto => {
11334 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
11335 Ok(Expression::Function(Box::new(Function::new(
11336 "LAST_DAY_OF_MONTH".to_string(),
11337 vec![ld.this],
11338 ))))
11339 }
11340 DialectType::ClickHouse => {
11341 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
11342 // Need to wrap the DATE type in Nullable
11343 let nullable_date = match ld.this {
11344 Expression::Cast(mut c) => {
11345 c.to = DataType::Nullable {
11346 inner: Box::new(DataType::Date),
11347 };
11348 Expression::Cast(c)
11349 }
11350 other => other,
11351 };
11352 ld.this = nullable_date;
11353 Ok(Expression::LastDay(ld))
11354 }
11355 _ => Ok(Expression::LastDay(ld)),
11356 }
11357 } else {
11358 Ok(e)
11359 }
11360 }
11361
11362 Action::BigQueryCastFormat => {
11363 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
11364 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
11365 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
11366 let (this, to, format_expr, is_safe) = match e {
11367 Expression::Cast(ref c) if c.format.is_some() => (
11368 c.this.clone(),
11369 c.to.clone(),
11370 c.format.as_ref().unwrap().as_ref().clone(),
11371 false,
11372 ),
11373 Expression::SafeCast(ref c) if c.format.is_some() => (
11374 c.this.clone(),
11375 c.to.clone(),
11376 c.format.as_ref().unwrap().as_ref().clone(),
11377 true,
11378 ),
11379 _ => return Ok(e),
11380 };
11381 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
11382 if matches!(target, DialectType::BigQuery) {
11383 match &to {
11384 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
11385 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
11386 return Ok(e);
11387 }
11388 _ => {}
11389 }
11390 }
11391 // Extract timezone from format if AT TIME ZONE is present
11392 let (actual_format_expr, timezone) = match &format_expr {
11393 Expression::AtTimeZone(ref atz) => {
11394 (atz.this.clone(), Some(atz.zone.clone()))
11395 }
11396 _ => (format_expr.clone(), None),
11397 };
11398 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
11399 match target {
11400 DialectType::BigQuery => {
11401 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
11402 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
11403 let func_name = match &to {
11404 DataType::Date => "PARSE_DATE",
11405 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
11406 DataType::Time { .. } => "PARSE_TIMESTAMP",
11407 _ => "PARSE_TIMESTAMP",
11408 };
11409 let mut func_args = vec![strftime_fmt, this];
11410 if let Some(tz) = timezone {
11411 func_args.push(tz);
11412 }
11413 Ok(Expression::Function(Box::new(Function::new(
11414 func_name.to_string(),
11415 func_args,
11416 ))))
11417 }
11418 DialectType::DuckDB => {
11419 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
11420 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
11421 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
11422 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
11423 let parse_call = Expression::Function(Box::new(Function::new(
11424 parse_fn_name.to_string(),
11425 vec![this, duck_fmt],
11426 )));
11427 Ok(Expression::Cast(Box::new(Cast {
11428 this: parse_call,
11429 to,
11430 trailing_comments: vec![],
11431 double_colon_syntax: false,
11432 format: None,
11433 default: None,
11434 inferred_type: None,
11435 })))
11436 }
11437 _ => Ok(e),
11438 }
11439 }
11440
11441 Action::BigQueryFunctionNormalize => {
11442 Self::normalize_bigquery_function(e, source, target)
11443 }
11444
11445 Action::BigQueryToHexBare => {
11446 // Not used anymore - handled directly in normalize_bigquery_function
11447 Ok(e)
11448 }
11449
11450 Action::BigQueryToHexLower => {
11451 if let Expression::Lower(uf) = e {
11452 match uf.this {
11453 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
11454 Expression::Function(f)
11455 if matches!(target, DialectType::BigQuery)
11456 && f.name == "TO_HEX" =>
11457 {
11458 Ok(Expression::Function(f))
11459 }
11460 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
11461 Expression::Lower(inner_uf) => {
11462 if matches!(target, DialectType::BigQuery) {
11463 // BQ->BQ: extract TO_HEX
11464 if let Expression::Function(f) = inner_uf.this {
11465 Ok(Expression::Function(Box::new(Function::new(
11466 "TO_HEX".to_string(),
11467 f.args,
11468 ))))
11469 } else {
11470 Ok(Expression::Lower(inner_uf))
11471 }
11472 } else {
11473 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
11474 Ok(Expression::Lower(inner_uf))
11475 }
11476 }
11477 other => {
11478 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
11479 this: other,
11480 original_name: None,
11481 inferred_type: None,
11482 })))
11483 }
11484 }
11485 } else {
11486 Ok(e)
11487 }
11488 }
11489
11490 Action::BigQueryToHexUpper => {
11491 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
11492 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
11493 if let Expression::Upper(uf) = e {
11494 if let Expression::Lower(inner_uf) = uf.this {
11495 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
11496 if matches!(target, DialectType::BigQuery) {
11497 // Restore TO_HEX name in inner function
11498 if let Expression::Function(f) = inner_uf.this {
11499 let restored = Expression::Function(Box::new(Function::new(
11500 "TO_HEX".to_string(),
11501 f.args,
11502 )));
11503 Ok(Expression::Upper(Box::new(
11504 crate::expressions::UnaryFunc::new(restored),
11505 )))
11506 } else {
11507 Ok(Expression::Upper(inner_uf))
11508 }
11509 } else {
11510 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
11511 Ok(inner_uf.this)
11512 }
11513 } else {
11514 Ok(Expression::Upper(uf))
11515 }
11516 } else {
11517 Ok(e)
11518 }
11519 }
11520
11521 Action::BigQueryAnyValueHaving => {
11522 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
11523 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
11524 if let Expression::AnyValue(agg) = e {
11525 if let Some((having_expr, is_max)) = agg.having_max {
11526 let func_name = if is_max {
11527 "ARG_MAX_NULL"
11528 } else {
11529 "ARG_MIN_NULL"
11530 };
11531 Ok(Expression::Function(Box::new(Function::new(
11532 func_name.to_string(),
11533 vec![agg.this, *having_expr],
11534 ))))
11535 } else {
11536 Ok(Expression::AnyValue(agg))
11537 }
11538 } else {
11539 Ok(e)
11540 }
11541 }
11542
11543 Action::BigQueryApproxQuantiles => {
11544 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
11545 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
11546 if let Expression::AggregateFunction(agg) = e {
11547 if agg.args.len() >= 2 {
11548 let x_expr = agg.args[0].clone();
11549 let n_expr = &agg.args[1];
11550
11551 // Extract the numeric value from n_expr
11552 let n = match n_expr {
11553 Expression::Literal(lit)
11554 if matches!(
11555 lit.as_ref(),
11556 crate::expressions::Literal::Number(_)
11557 ) =>
11558 {
11559 let crate::expressions::Literal::Number(s) = lit.as_ref()
11560 else {
11561 unreachable!()
11562 };
11563 s.parse::<usize>().unwrap_or(2)
11564 }
11565 _ => 2,
11566 };
11567
11568 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
11569 let mut quantiles = Vec::new();
11570 for i in 0..=n {
11571 let q = i as f64 / n as f64;
11572 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
11573 if q == 0.0 {
11574 quantiles.push(Expression::number(0));
11575 } else if q == 1.0 {
11576 quantiles.push(Expression::number(1));
11577 } else {
11578 quantiles.push(Expression::Literal(Box::new(
11579 crate::expressions::Literal::Number(format!("{}", q)),
11580 )));
11581 }
11582 }
11583
11584 let array_expr =
11585 Expression::Array(Box::new(crate::expressions::Array {
11586 expressions: quantiles,
11587 }));
11588
11589 // Preserve DISTINCT modifier
11590 let mut new_func = Function::new(
11591 "APPROX_QUANTILE".to_string(),
11592 vec![x_expr, array_expr],
11593 );
11594 new_func.distinct = agg.distinct;
11595 Ok(Expression::Function(Box::new(new_func)))
11596 } else {
11597 Ok(Expression::AggregateFunction(agg))
11598 }
11599 } else {
11600 Ok(e)
11601 }
11602 }
11603
11604 Action::GenericFunctionNormalize => {
11605 // Helper closure to convert ARBITRARY to target-specific function
11606 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
11607 let name = match target {
11608 DialectType::ClickHouse => "any",
11609 DialectType::TSQL | DialectType::SQLite => "MAX",
11610 DialectType::Hive => "FIRST",
11611 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11612 "ARBITRARY"
11613 }
11614 _ => "ANY_VALUE",
11615 };
11616 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
11617 }
11618
11619 if let Expression::Function(f) = e {
11620 let name = f.name.to_ascii_uppercase();
11621 match name.as_str() {
11622 "ARBITRARY" if f.args.len() == 1 => {
11623 let arg = f.args.into_iter().next().unwrap();
11624 Ok(convert_arbitrary(arg, target))
11625 }
11626 "TO_NUMBER" if f.args.len() == 1 => {
11627 let arg = f.args.into_iter().next().unwrap();
11628 match target {
11629 DialectType::Oracle | DialectType::Snowflake => {
11630 Ok(Expression::Function(Box::new(Function::new(
11631 "TO_NUMBER".to_string(),
11632 vec![arg],
11633 ))))
11634 }
11635 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
11636 this: arg,
11637 to: crate::expressions::DataType::Double {
11638 precision: None,
11639 scale: None,
11640 },
11641 double_colon_syntax: false,
11642 trailing_comments: Vec::new(),
11643 format: None,
11644 default: None,
11645 inferred_type: None,
11646 }))),
11647 }
11648 }
11649 "AGGREGATE" if f.args.len() >= 3 => match target {
11650 DialectType::DuckDB
11651 | DialectType::Hive
11652 | DialectType::Presto
11653 | DialectType::Trino => Ok(Expression::Function(Box::new(
11654 Function::new("REDUCE".to_string(), f.args),
11655 ))),
11656 _ => Ok(Expression::Function(f)),
11657 },
11658 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep as-is for DuckDB
11659 "REGEXP_MATCHES" if f.args.len() >= 2 => {
11660 if matches!(target, DialectType::DuckDB) {
11661 Ok(Expression::Function(f))
11662 } else {
11663 let mut args = f.args;
11664 let this = args.remove(0);
11665 let pattern = args.remove(0);
11666 let flags = if args.is_empty() {
11667 None
11668 } else {
11669 Some(args.remove(0))
11670 };
11671 Ok(Expression::RegexpLike(Box::new(
11672 crate::expressions::RegexpFunc {
11673 this,
11674 pattern,
11675 flags,
11676 },
11677 )))
11678 }
11679 }
11680 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
11681 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
11682 if matches!(target, DialectType::DuckDB) {
11683 Ok(Expression::Function(f))
11684 } else {
11685 let mut args = f.args;
11686 let this = args.remove(0);
11687 let pattern = args.remove(0);
11688 let flags = if args.is_empty() {
11689 None
11690 } else {
11691 Some(args.remove(0))
11692 };
11693 Ok(Expression::RegexpLike(Box::new(
11694 crate::expressions::RegexpFunc {
11695 this,
11696 pattern,
11697 flags,
11698 },
11699 )))
11700 }
11701 }
11702 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
11703 "STRUCT_EXTRACT" if f.args.len() == 2 => {
11704 let mut args = f.args;
11705 let this = args.remove(0);
11706 let field_expr = args.remove(0);
11707 // Extract string literal to get field name
11708 let field_name = match &field_expr {
11709 Expression::Literal(lit)
11710 if matches!(
11711 lit.as_ref(),
11712 crate::expressions::Literal::String(_)
11713 ) =>
11714 {
11715 let crate::expressions::Literal::String(s) = lit.as_ref()
11716 else {
11717 unreachable!()
11718 };
11719 s.clone()
11720 }
11721 Expression::Identifier(id) => id.name.clone(),
11722 _ => {
11723 return Ok(Expression::Function(Box::new(Function::new(
11724 "STRUCT_EXTRACT".to_string(),
11725 vec![this, field_expr],
11726 ))))
11727 }
11728 };
11729 Ok(Expression::StructExtract(Box::new(
11730 crate::expressions::StructExtractFunc {
11731 this,
11732 field: crate::expressions::Identifier::new(field_name),
11733 },
11734 )))
11735 }
11736 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
11737 "LIST_FILTER" if f.args.len() == 2 => {
11738 let name = match target {
11739 DialectType::DuckDB => "LIST_FILTER",
11740 _ => "FILTER",
11741 };
11742 Ok(Expression::Function(Box::new(Function::new(
11743 name.to_string(),
11744 f.args,
11745 ))))
11746 }
11747 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
11748 "LIST_TRANSFORM" if f.args.len() == 2 => {
11749 let name = match target {
11750 DialectType::DuckDB => "LIST_TRANSFORM",
11751 _ => "TRANSFORM",
11752 };
11753 Ok(Expression::Function(Box::new(Function::new(
11754 name.to_string(),
11755 f.args,
11756 ))))
11757 }
11758 // LIST_SORT(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, SORT_ARRAY(x) for others
11759 "LIST_SORT" if f.args.len() >= 1 => {
11760 let name = match target {
11761 DialectType::DuckDB => "LIST_SORT",
11762 DialectType::Presto | DialectType::Trino => "ARRAY_SORT",
11763 _ => "SORT_ARRAY",
11764 };
11765 Ok(Expression::Function(Box::new(Function::new(
11766 name.to_string(),
11767 f.args,
11768 ))))
11769 }
11770 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
11771 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
11772 match target {
11773 DialectType::DuckDB => Ok(Expression::Function(Box::new(
11774 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
11775 ))),
11776 DialectType::Spark
11777 | DialectType::Databricks
11778 | DialectType::Hive => {
11779 let mut args = f.args;
11780 args.push(Expression::Identifier(
11781 crate::expressions::Identifier::new("FALSE"),
11782 ));
11783 Ok(Expression::Function(Box::new(Function::new(
11784 "SORT_ARRAY".to_string(),
11785 args,
11786 ))))
11787 }
11788 DialectType::Presto
11789 | DialectType::Trino
11790 | DialectType::Athena => {
11791 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
11792 let arr = f.args.into_iter().next().unwrap();
11793 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
11794 parameters: vec![
11795 crate::expressions::Identifier::new("a"),
11796 crate::expressions::Identifier::new("b"),
11797 ],
11798 body: Expression::Case(Box::new(Case {
11799 operand: None,
11800 whens: vec![
11801 (
11802 Expression::Lt(Box::new(BinaryOp::new(
11803 Expression::Identifier(crate::expressions::Identifier::new("a")),
11804 Expression::Identifier(crate::expressions::Identifier::new("b")),
11805 ))),
11806 Expression::number(1),
11807 ),
11808 (
11809 Expression::Gt(Box::new(BinaryOp::new(
11810 Expression::Identifier(crate::expressions::Identifier::new("a")),
11811 Expression::Identifier(crate::expressions::Identifier::new("b")),
11812 ))),
11813 Expression::Literal(Box::new(Literal::Number("-1".to_string()))),
11814 ),
11815 ],
11816 else_: Some(Expression::number(0)),
11817 comments: Vec::new(),
11818 inferred_type: None,
11819 })),
11820 colon: false,
11821 parameter_types: Vec::new(),
11822 }));
11823 Ok(Expression::Function(Box::new(Function::new(
11824 "ARRAY_SORT".to_string(),
11825 vec![arr, lambda],
11826 ))))
11827 }
11828 _ => Ok(Expression::Function(Box::new(Function::new(
11829 "LIST_REVERSE_SORT".to_string(),
11830 f.args,
11831 )))),
11832 }
11833 }
11834 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
11835 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
11836 let mut args = f.args;
11837 args.push(Expression::string(","));
11838 let name = match target {
11839 DialectType::DuckDB => "STR_SPLIT",
11840 DialectType::Presto | DialectType::Trino => "SPLIT",
11841 DialectType::Spark
11842 | DialectType::Databricks
11843 | DialectType::Hive => "SPLIT",
11844 DialectType::PostgreSQL => "STRING_TO_ARRAY",
11845 DialectType::Redshift => "SPLIT_TO_ARRAY",
11846 _ => "SPLIT",
11847 };
11848 Ok(Expression::Function(Box::new(Function::new(
11849 name.to_string(),
11850 args,
11851 ))))
11852 }
11853 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
11854 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
11855 let name = match target {
11856 DialectType::DuckDB => "STR_SPLIT",
11857 DialectType::Presto | DialectType::Trino => "SPLIT",
11858 DialectType::Spark
11859 | DialectType::Databricks
11860 | DialectType::Hive => "SPLIT",
11861 DialectType::PostgreSQL => "STRING_TO_ARRAY",
11862 DialectType::Redshift => "SPLIT_TO_ARRAY",
11863 _ => "SPLIT",
11864 };
11865 Ok(Expression::Function(Box::new(Function::new(
11866 name.to_string(),
11867 f.args,
11868 ))))
11869 }
11870 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
11871 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
11872 let name = match target {
11873 DialectType::DuckDB => "STR_SPLIT",
11874 DialectType::Presto | DialectType::Trino => "SPLIT",
11875 DialectType::Spark
11876 | DialectType::Databricks
11877 | DialectType::Hive => "SPLIT",
11878 DialectType::Doris | DialectType::StarRocks => {
11879 "SPLIT_BY_STRING"
11880 }
11881 DialectType::PostgreSQL | DialectType::Redshift => {
11882 "STRING_TO_ARRAY"
11883 }
11884 _ => "SPLIT",
11885 };
11886 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
11887 if matches!(
11888 target,
11889 DialectType::Spark
11890 | DialectType::Databricks
11891 | DialectType::Hive
11892 ) {
11893 let mut args = f.args;
11894 let x = args.remove(0);
11895 let sep = args.remove(0);
11896 // Wrap separator in CONCAT('\\Q', sep, '\\E')
11897 let escaped_sep =
11898 Expression::Function(Box::new(Function::new(
11899 "CONCAT".to_string(),
11900 vec![
11901 Expression::string("\\Q"),
11902 sep,
11903 Expression::string("\\E"),
11904 ],
11905 )));
11906 Ok(Expression::Function(Box::new(Function::new(
11907 name.to_string(),
11908 vec![x, escaped_sep],
11909 ))))
11910 } else {
11911 Ok(Expression::Function(Box::new(Function::new(
11912 name.to_string(),
11913 f.args,
11914 ))))
11915 }
11916 }
11917 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
11918 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
11919 let name = match target {
11920 DialectType::DuckDB => "STR_SPLIT_REGEX",
11921 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
11922 DialectType::Spark
11923 | DialectType::Databricks
11924 | DialectType::Hive => "SPLIT",
11925 _ => "REGEXP_SPLIT",
11926 };
11927 Ok(Expression::Function(Box::new(Function::new(
11928 name.to_string(),
11929 f.args,
11930 ))))
11931 }
11932 // SPLIT(str, delim) from Snowflake -> DuckDB with CASE wrapper
11933 "SPLIT"
11934 if f.args.len() == 2
11935 && matches!(source, DialectType::Snowflake)
11936 && matches!(target, DialectType::DuckDB) =>
11937 {
11938 let mut args = f.args;
11939 let str_arg = args.remove(0);
11940 let delim_arg = args.remove(0);
11941
11942 // STR_SPLIT(str, delim) as the base
11943 let base_func = Expression::Function(Box::new(Function::new(
11944 "STR_SPLIT".to_string(),
11945 vec![str_arg.clone(), delim_arg.clone()],
11946 )));
11947
11948 // [str] - array with single element
11949 let array_with_input =
11950 Expression::Array(Box::new(crate::expressions::Array {
11951 expressions: vec![str_arg],
11952 }));
11953
11954 // CASE
11955 // WHEN delim IS NULL THEN NULL
11956 // WHEN delim = '' THEN [str]
11957 // ELSE STR_SPLIT(str, delim)
11958 // END
11959 Ok(Expression::Case(Box::new(Case {
11960 operand: None,
11961 whens: vec![
11962 (
11963 Expression::Is(Box::new(BinaryOp {
11964 left: delim_arg.clone(),
11965 right: Expression::Null(Null),
11966 left_comments: vec![],
11967 operator_comments: vec![],
11968 trailing_comments: vec![],
11969 inferred_type: None,
11970 })),
11971 Expression::Null(Null),
11972 ),
11973 (
11974 Expression::Eq(Box::new(BinaryOp {
11975 left: delim_arg,
11976 right: Expression::string(""),
11977 left_comments: vec![],
11978 operator_comments: vec![],
11979 trailing_comments: vec![],
11980 inferred_type: None,
11981 })),
11982 array_with_input,
11983 ),
11984 ],
11985 else_: Some(base_func),
11986 comments: vec![],
11987 inferred_type: None,
11988 })))
11989 }
11990 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
11991 "SPLIT"
11992 if f.args.len() == 2
11993 && matches!(
11994 source,
11995 DialectType::Presto
11996 | DialectType::Trino
11997 | DialectType::Athena
11998 | DialectType::StarRocks
11999 | DialectType::Doris
12000 )
12001 && matches!(
12002 target,
12003 DialectType::Spark
12004 | DialectType::Databricks
12005 | DialectType::Hive
12006 ) =>
12007 {
12008 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
12009 let mut args = f.args;
12010 let x = args.remove(0);
12011 let sep = args.remove(0);
12012 let escaped_sep = Expression::Function(Box::new(Function::new(
12013 "CONCAT".to_string(),
12014 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
12015 )));
12016 Ok(Expression::Function(Box::new(Function::new(
12017 "SPLIT".to_string(),
12018 vec![x, escaped_sep],
12019 ))))
12020 }
12021 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
12022 // For ClickHouse target, preserve original name to maintain camelCase
12023 "SUBSTRINGINDEX" => {
12024 let name = if matches!(target, DialectType::ClickHouse) {
12025 f.name.clone()
12026 } else {
12027 "SUBSTRING_INDEX".to_string()
12028 };
12029 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
12030 }
12031 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
12032 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
12033 // DuckDB source CARDINALITY -> DuckDB target: keep as CARDINALITY (used for maps)
12034 if name == "CARDINALITY"
12035 && matches!(source, DialectType::DuckDB)
12036 && matches!(target, DialectType::DuckDB)
12037 {
12038 return Ok(Expression::Function(f));
12039 }
12040 // Get the array argument (first arg, drop dimension args)
12041 let mut args = f.args;
12042 let arr = if args.is_empty() {
12043 return Ok(Expression::Function(Box::new(Function::new(
12044 name.to_string(),
12045 args,
12046 ))));
12047 } else {
12048 args.remove(0)
12049 };
12050 let name =
12051 match target {
12052 DialectType::Spark
12053 | DialectType::Databricks
12054 | DialectType::Hive => "SIZE",
12055 DialectType::Presto | DialectType::Trino => "CARDINALITY",
12056 DialectType::BigQuery => "ARRAY_LENGTH",
12057 DialectType::DuckDB => {
12058 // DuckDB: use ARRAY_LENGTH with all args
12059 let mut all_args = vec![arr];
12060 all_args.extend(args);
12061 return Ok(Expression::Function(Box::new(
12062 Function::new("ARRAY_LENGTH".to_string(), all_args),
12063 )));
12064 }
12065 DialectType::PostgreSQL | DialectType::Redshift => {
12066 // Keep ARRAY_LENGTH with dimension arg
12067 let mut all_args = vec![arr];
12068 all_args.extend(args);
12069 return Ok(Expression::Function(Box::new(
12070 Function::new("ARRAY_LENGTH".to_string(), all_args),
12071 )));
12072 }
12073 DialectType::ClickHouse => "LENGTH",
12074 _ => "ARRAY_LENGTH",
12075 };
12076 Ok(Expression::Function(Box::new(Function::new(
12077 name.to_string(),
12078 vec![arr],
12079 ))))
12080 }
12081 // TO_VARIANT(x) -> CAST(x AS VARIANT) for DuckDB
12082 "TO_VARIANT" if f.args.len() == 1 => match target {
12083 DialectType::DuckDB => {
12084 let arg = f.args.into_iter().next().unwrap();
12085 Ok(Expression::Cast(Box::new(Cast {
12086 this: arg,
12087 to: DataType::Custom {
12088 name: "VARIANT".to_string(),
12089 },
12090 double_colon_syntax: false,
12091 trailing_comments: Vec::new(),
12092 format: None,
12093 default: None,
12094 inferred_type: None,
12095 })))
12096 }
12097 _ => Ok(Expression::Function(f)),
12098 },
12099 // JSON_GROUP_ARRAY(x) -> JSON_AGG(x) for PostgreSQL
12100 "JSON_GROUP_ARRAY" if f.args.len() == 1 => match target {
12101 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12102 Function::new("JSON_AGG".to_string(), f.args),
12103 ))),
12104 _ => Ok(Expression::Function(f)),
12105 },
12106 // JSON_GROUP_OBJECT(key, value) -> JSON_OBJECT_AGG(key, value) for PostgreSQL
12107 "JSON_GROUP_OBJECT" if f.args.len() == 2 => match target {
12108 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
12109 Function::new("JSON_OBJECT_AGG".to_string(), f.args),
12110 ))),
12111 _ => Ok(Expression::Function(f)),
12112 },
12113 // UNICODE(x) -> target-specific codepoint function
12114 "UNICODE" if f.args.len() == 1 => {
12115 match target {
12116 DialectType::SQLite | DialectType::DuckDB => {
12117 Ok(Expression::Function(Box::new(Function::new(
12118 "UNICODE".to_string(),
12119 f.args,
12120 ))))
12121 }
12122 DialectType::Oracle => {
12123 // ASCII(UNISTR(x))
12124 let inner = Expression::Function(Box::new(Function::new(
12125 "UNISTR".to_string(),
12126 f.args,
12127 )));
12128 Ok(Expression::Function(Box::new(Function::new(
12129 "ASCII".to_string(),
12130 vec![inner],
12131 ))))
12132 }
12133 DialectType::MySQL => {
12134 // ORD(CONVERT(x USING utf32))
12135 let arg = f.args.into_iter().next().unwrap();
12136 let convert_expr = Expression::ConvertToCharset(Box::new(
12137 crate::expressions::ConvertToCharset {
12138 this: Box::new(arg),
12139 dest: Some(Box::new(Expression::Identifier(
12140 crate::expressions::Identifier::new("utf32"),
12141 ))),
12142 source: None,
12143 },
12144 ));
12145 Ok(Expression::Function(Box::new(Function::new(
12146 "ORD".to_string(),
12147 vec![convert_expr],
12148 ))))
12149 }
12150 _ => Ok(Expression::Function(Box::new(Function::new(
12151 "ASCII".to_string(),
12152 f.args,
12153 )))),
12154 }
12155 }
12156 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
12157 "XOR" if f.args.len() >= 2 => {
12158 match target {
12159 DialectType::ClickHouse => {
12160 // ClickHouse: keep as xor() function with lowercase name
12161 Ok(Expression::Function(Box::new(Function::new(
12162 "xor".to_string(),
12163 f.args,
12164 ))))
12165 }
12166 DialectType::Presto | DialectType::Trino => {
12167 if f.args.len() == 2 {
12168 Ok(Expression::Function(Box::new(Function::new(
12169 "BITWISE_XOR".to_string(),
12170 f.args,
12171 ))))
12172 } else {
12173 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
12174 let mut args = f.args;
12175 let first = args.remove(0);
12176 let second = args.remove(0);
12177 let mut result =
12178 Expression::Function(Box::new(Function::new(
12179 "BITWISE_XOR".to_string(),
12180 vec![first, second],
12181 )));
12182 for arg in args {
12183 result =
12184 Expression::Function(Box::new(Function::new(
12185 "BITWISE_XOR".to_string(),
12186 vec![result, arg],
12187 )));
12188 }
12189 Ok(result)
12190 }
12191 }
12192 DialectType::MySQL
12193 | DialectType::SingleStore
12194 | DialectType::Doris
12195 | DialectType::StarRocks => {
12196 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
12197 let args = f.args;
12198 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
12199 this: None,
12200 expression: None,
12201 expressions: args,
12202 })))
12203 }
12204 DialectType::PostgreSQL | DialectType::Redshift => {
12205 // PostgreSQL: a # b (hash operator for XOR)
12206 let mut args = f.args;
12207 let first = args.remove(0);
12208 let second = args.remove(0);
12209 let mut result = Expression::BitwiseXor(Box::new(
12210 BinaryOp::new(first, second),
12211 ));
12212 for arg in args {
12213 result = Expression::BitwiseXor(Box::new(
12214 BinaryOp::new(result, arg),
12215 ));
12216 }
12217 Ok(result)
12218 }
12219 DialectType::DuckDB => {
12220 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
12221 Ok(Expression::Function(Box::new(Function::new(
12222 "XOR".to_string(),
12223 f.args,
12224 ))))
12225 }
12226 DialectType::BigQuery => {
12227 // BigQuery: a ^ b (caret operator for XOR)
12228 let mut args = f.args;
12229 let first = args.remove(0);
12230 let second = args.remove(0);
12231 let mut result = Expression::BitwiseXor(Box::new(
12232 BinaryOp::new(first, second),
12233 ));
12234 for arg in args {
12235 result = Expression::BitwiseXor(Box::new(
12236 BinaryOp::new(result, arg),
12237 ));
12238 }
12239 Ok(result)
12240 }
12241 _ => Ok(Expression::Function(Box::new(Function::new(
12242 "XOR".to_string(),
12243 f.args,
12244 )))),
12245 }
12246 }
12247 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
12248 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
12249 match target {
12250 DialectType::Spark
12251 | DialectType::Databricks
12252 | DialectType::Hive => {
12253 let mut args = f.args;
12254 args.push(Expression::Identifier(
12255 crate::expressions::Identifier::new("FALSE"),
12256 ));
12257 Ok(Expression::Function(Box::new(Function::new(
12258 "SORT_ARRAY".to_string(),
12259 args,
12260 ))))
12261 }
12262 DialectType::Presto
12263 | DialectType::Trino
12264 | DialectType::Athena => {
12265 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
12266 let arr = f.args.into_iter().next().unwrap();
12267 let lambda = Expression::Lambda(Box::new(
12268 crate::expressions::LambdaExpr {
12269 parameters: vec![
12270 Identifier::new("a"),
12271 Identifier::new("b"),
12272 ],
12273 colon: false,
12274 parameter_types: Vec::new(),
12275 body: Expression::Case(Box::new(Case {
12276 operand: None,
12277 whens: vec![
12278 (
12279 Expression::Lt(Box::new(
12280 BinaryOp::new(
12281 Expression::Identifier(
12282 Identifier::new("a"),
12283 ),
12284 Expression::Identifier(
12285 Identifier::new("b"),
12286 ),
12287 ),
12288 )),
12289 Expression::number(1),
12290 ),
12291 (
12292 Expression::Gt(Box::new(
12293 BinaryOp::new(
12294 Expression::Identifier(
12295 Identifier::new("a"),
12296 ),
12297 Expression::Identifier(
12298 Identifier::new("b"),
12299 ),
12300 ),
12301 )),
12302 Expression::Neg(Box::new(
12303 crate::expressions::UnaryOp {
12304 this: Expression::number(1),
12305 inferred_type: None,
12306 },
12307 )),
12308 ),
12309 ],
12310 else_: Some(Expression::number(0)),
12311 comments: Vec::new(),
12312 inferred_type: None,
12313 })),
12314 },
12315 ));
12316 Ok(Expression::Function(Box::new(Function::new(
12317 "ARRAY_SORT".to_string(),
12318 vec![arr, lambda],
12319 ))))
12320 }
12321 _ => Ok(Expression::Function(Box::new(Function::new(
12322 "ARRAY_REVERSE_SORT".to_string(),
12323 f.args,
12324 )))),
12325 }
12326 }
12327 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
12328 "ENCODE" if f.args.len() == 1 => match target {
12329 DialectType::Spark
12330 | DialectType::Databricks
12331 | DialectType::Hive => {
12332 let mut args = f.args;
12333 args.push(Expression::string("utf-8"));
12334 Ok(Expression::Function(Box::new(Function::new(
12335 "ENCODE".to_string(),
12336 args,
12337 ))))
12338 }
12339 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12340 Ok(Expression::Function(Box::new(Function::new(
12341 "TO_UTF8".to_string(),
12342 f.args,
12343 ))))
12344 }
12345 _ => Ok(Expression::Function(Box::new(Function::new(
12346 "ENCODE".to_string(),
12347 f.args,
12348 )))),
12349 },
12350 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
12351 "DECODE" if f.args.len() == 1 => match target {
12352 DialectType::Spark
12353 | DialectType::Databricks
12354 | DialectType::Hive => {
12355 let mut args = f.args;
12356 args.push(Expression::string("utf-8"));
12357 Ok(Expression::Function(Box::new(Function::new(
12358 "DECODE".to_string(),
12359 args,
12360 ))))
12361 }
12362 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12363 Ok(Expression::Function(Box::new(Function::new(
12364 "FROM_UTF8".to_string(),
12365 f.args,
12366 ))))
12367 }
12368 _ => Ok(Expression::Function(Box::new(Function::new(
12369 "DECODE".to_string(),
12370 f.args,
12371 )))),
12372 },
12373 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
12374 "QUANTILE" if f.args.len() == 2 => {
12375 let name = match target {
12376 DialectType::Spark
12377 | DialectType::Databricks
12378 | DialectType::Hive => "PERCENTILE",
12379 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
12380 DialectType::BigQuery => "PERCENTILE_CONT",
12381 _ => "QUANTILE",
12382 };
12383 Ok(Expression::Function(Box::new(Function::new(
12384 name.to_string(),
12385 f.args,
12386 ))))
12387 }
12388 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
12389 "QUANTILE_CONT" if f.args.len() == 2 => {
12390 let mut args = f.args;
12391 let column = args.remove(0);
12392 let quantile = args.remove(0);
12393 match target {
12394 DialectType::DuckDB => {
12395 Ok(Expression::Function(Box::new(Function::new(
12396 "QUANTILE_CONT".to_string(),
12397 vec![column, quantile],
12398 ))))
12399 }
12400 DialectType::PostgreSQL
12401 | DialectType::Redshift
12402 | DialectType::Snowflake => {
12403 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
12404 let inner = Expression::PercentileCont(Box::new(
12405 crate::expressions::PercentileFunc {
12406 this: column.clone(),
12407 percentile: quantile,
12408 order_by: None,
12409 filter: None,
12410 },
12411 ));
12412 Ok(Expression::WithinGroup(Box::new(
12413 crate::expressions::WithinGroup {
12414 this: inner,
12415 order_by: vec![crate::expressions::Ordered {
12416 this: column,
12417 desc: false,
12418 nulls_first: None,
12419 explicit_asc: false,
12420 with_fill: None,
12421 }],
12422 },
12423 )))
12424 }
12425 _ => Ok(Expression::Function(Box::new(Function::new(
12426 "QUANTILE_CONT".to_string(),
12427 vec![column, quantile],
12428 )))),
12429 }
12430 }
12431 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
12432 "QUANTILE_DISC" if f.args.len() == 2 => {
12433 let mut args = f.args;
12434 let column = args.remove(0);
12435 let quantile = args.remove(0);
12436 match target {
12437 DialectType::DuckDB => {
12438 Ok(Expression::Function(Box::new(Function::new(
12439 "QUANTILE_DISC".to_string(),
12440 vec![column, quantile],
12441 ))))
12442 }
12443 DialectType::PostgreSQL
12444 | DialectType::Redshift
12445 | DialectType::Snowflake => {
12446 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
12447 let inner = Expression::PercentileDisc(Box::new(
12448 crate::expressions::PercentileFunc {
12449 this: column.clone(),
12450 percentile: quantile,
12451 order_by: None,
12452 filter: None,
12453 },
12454 ));
12455 Ok(Expression::WithinGroup(Box::new(
12456 crate::expressions::WithinGroup {
12457 this: inner,
12458 order_by: vec![crate::expressions::Ordered {
12459 this: column,
12460 desc: false,
12461 nulls_first: None,
12462 explicit_asc: false,
12463 with_fill: None,
12464 }],
12465 },
12466 )))
12467 }
12468 _ => Ok(Expression::Function(Box::new(Function::new(
12469 "QUANTILE_DISC".to_string(),
12470 vec![column, quantile],
12471 )))),
12472 }
12473 }
12474 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
12475 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
12476 let name = match target {
12477 DialectType::Presto
12478 | DialectType::Trino
12479 | DialectType::Athena => "APPROX_PERCENTILE",
12480 DialectType::Spark
12481 | DialectType::Databricks
12482 | DialectType::Hive => "PERCENTILE_APPROX",
12483 DialectType::DuckDB => "APPROX_QUANTILE",
12484 DialectType::PostgreSQL | DialectType::Redshift => {
12485 "PERCENTILE_CONT"
12486 }
12487 _ => &f.name,
12488 };
12489 Ok(Expression::Function(Box::new(Function::new(
12490 name.to_string(),
12491 f.args,
12492 ))))
12493 }
12494 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
12495 "EPOCH" if f.args.len() == 1 => {
12496 let name = match target {
12497 DialectType::Spark
12498 | DialectType::Databricks
12499 | DialectType::Hive => "UNIX_TIMESTAMP",
12500 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
12501 _ => "EPOCH",
12502 };
12503 Ok(Expression::Function(Box::new(Function::new(
12504 name.to_string(),
12505 f.args,
12506 ))))
12507 }
12508 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
12509 "EPOCH_MS" if f.args.len() == 1 => {
12510 match target {
12511 DialectType::Spark | DialectType::Databricks => {
12512 Ok(Expression::Function(Box::new(Function::new(
12513 "TIMESTAMP_MILLIS".to_string(),
12514 f.args,
12515 ))))
12516 }
12517 DialectType::Hive => {
12518 // Hive: FROM_UNIXTIME(x / 1000)
12519 let arg = f.args.into_iter().next().unwrap();
12520 let div_expr = Expression::Div(Box::new(
12521 crate::expressions::BinaryOp::new(
12522 arg,
12523 Expression::number(1000),
12524 ),
12525 ));
12526 Ok(Expression::Function(Box::new(Function::new(
12527 "FROM_UNIXTIME".to_string(),
12528 vec![div_expr],
12529 ))))
12530 }
12531 DialectType::Presto | DialectType::Trino => {
12532 Ok(Expression::Function(Box::new(Function::new(
12533 "FROM_UNIXTIME".to_string(),
12534 vec![Expression::Div(Box::new(
12535 crate::expressions::BinaryOp::new(
12536 f.args.into_iter().next().unwrap(),
12537 Expression::number(1000),
12538 ),
12539 ))],
12540 ))))
12541 }
12542 _ => Ok(Expression::Function(Box::new(Function::new(
12543 "EPOCH_MS".to_string(),
12544 f.args,
12545 )))),
12546 }
12547 }
12548 // HASHBYTES('algorithm', x) -> target-specific hash function
12549 "HASHBYTES" if f.args.len() == 2 => {
12550 // Keep HASHBYTES as-is for TSQL target
12551 if matches!(target, DialectType::TSQL) {
12552 return Ok(Expression::Function(f));
12553 }
12554 let algo_expr = &f.args[0];
12555 let algo = match algo_expr {
12556 Expression::Literal(lit)
12557 if matches!(
12558 lit.as_ref(),
12559 crate::expressions::Literal::String(_)
12560 ) =>
12561 {
12562 let crate::expressions::Literal::String(s) = lit.as_ref()
12563 else {
12564 unreachable!()
12565 };
12566 s.to_ascii_uppercase()
12567 }
12568 _ => return Ok(Expression::Function(f)),
12569 };
12570 let data_arg = f.args.into_iter().nth(1).unwrap();
12571 match algo.as_str() {
12572 "SHA1" => {
12573 let name = match target {
12574 DialectType::Spark | DialectType::Databricks => "SHA",
12575 DialectType::Hive => "SHA1",
12576 _ => "SHA1",
12577 };
12578 Ok(Expression::Function(Box::new(Function::new(
12579 name.to_string(),
12580 vec![data_arg],
12581 ))))
12582 }
12583 "SHA2_256" => {
12584 Ok(Expression::Function(Box::new(Function::new(
12585 "SHA2".to_string(),
12586 vec![data_arg, Expression::number(256)],
12587 ))))
12588 }
12589 "SHA2_512" => {
12590 Ok(Expression::Function(Box::new(Function::new(
12591 "SHA2".to_string(),
12592 vec![data_arg, Expression::number(512)],
12593 ))))
12594 }
12595 "MD5" => Ok(Expression::Function(Box::new(Function::new(
12596 "MD5".to_string(),
12597 vec![data_arg],
12598 )))),
12599 _ => Ok(Expression::Function(Box::new(Function::new(
12600 "HASHBYTES".to_string(),
12601 vec![Expression::string(&algo), data_arg],
12602 )))),
12603 }
12604 }
12605 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
12606 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
12607 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
12608 let mut args = f.args;
12609 let json_expr = args.remove(0);
12610 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
12611 let mut json_path = "$".to_string();
12612 for a in &args {
12613 match a {
12614 Expression::Literal(lit)
12615 if matches!(
12616 lit.as_ref(),
12617 crate::expressions::Literal::String(_)
12618 ) =>
12619 {
12620 let crate::expressions::Literal::String(s) =
12621 lit.as_ref()
12622 else {
12623 unreachable!()
12624 };
12625 // Numeric string keys become array indices: [0]
12626 if s.chars().all(|c| c.is_ascii_digit()) {
12627 json_path.push('[');
12628 json_path.push_str(s);
12629 json_path.push(']');
12630 } else {
12631 json_path.push('.');
12632 json_path.push_str(s);
12633 }
12634 }
12635 _ => {
12636 json_path.push_str(".?");
12637 }
12638 }
12639 }
12640 match target {
12641 DialectType::Spark
12642 | DialectType::Databricks
12643 | DialectType::Hive => {
12644 Ok(Expression::Function(Box::new(Function::new(
12645 "GET_JSON_OBJECT".to_string(),
12646 vec![json_expr, Expression::string(&json_path)],
12647 ))))
12648 }
12649 DialectType::Presto | DialectType::Trino => {
12650 let func_name = if is_text {
12651 "JSON_EXTRACT_SCALAR"
12652 } else {
12653 "JSON_EXTRACT"
12654 };
12655 Ok(Expression::Function(Box::new(Function::new(
12656 func_name.to_string(),
12657 vec![json_expr, Expression::string(&json_path)],
12658 ))))
12659 }
12660 DialectType::BigQuery | DialectType::MySQL => {
12661 let func_name = if is_text {
12662 "JSON_EXTRACT_SCALAR"
12663 } else {
12664 "JSON_EXTRACT"
12665 };
12666 Ok(Expression::Function(Box::new(Function::new(
12667 func_name.to_string(),
12668 vec![json_expr, Expression::string(&json_path)],
12669 ))))
12670 }
12671 DialectType::PostgreSQL | DialectType::Materialize => {
12672 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
12673 let func_name = if is_text {
12674 "JSON_EXTRACT_PATH_TEXT"
12675 } else {
12676 "JSON_EXTRACT_PATH"
12677 };
12678 let mut new_args = vec![json_expr];
12679 new_args.extend(args);
12680 Ok(Expression::Function(Box::new(Function::new(
12681 func_name.to_string(),
12682 new_args,
12683 ))))
12684 }
12685 DialectType::DuckDB | DialectType::SQLite => {
12686 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
12687 if is_text {
12688 Ok(Expression::JsonExtractScalar(Box::new(
12689 crate::expressions::JsonExtractFunc {
12690 this: json_expr,
12691 path: Expression::string(&json_path),
12692 returning: None,
12693 arrow_syntax: true,
12694 hash_arrow_syntax: false,
12695 wrapper_option: None,
12696 quotes_option: None,
12697 on_scalar_string: false,
12698 on_error: None,
12699 },
12700 )))
12701 } else {
12702 Ok(Expression::JsonExtract(Box::new(
12703 crate::expressions::JsonExtractFunc {
12704 this: json_expr,
12705 path: Expression::string(&json_path),
12706 returning: None,
12707 arrow_syntax: true,
12708 hash_arrow_syntax: false,
12709 wrapper_option: None,
12710 quotes_option: None,
12711 on_scalar_string: false,
12712 on_error: None,
12713 },
12714 )))
12715 }
12716 }
12717 DialectType::Redshift => {
12718 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
12719 let mut new_args = vec![json_expr];
12720 new_args.extend(args);
12721 Ok(Expression::Function(Box::new(Function::new(
12722 "JSON_EXTRACT_PATH_TEXT".to_string(),
12723 new_args,
12724 ))))
12725 }
12726 DialectType::TSQL => {
12727 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
12728 let jq = Expression::Function(Box::new(Function::new(
12729 "JSON_QUERY".to_string(),
12730 vec![json_expr.clone(), Expression::string(&json_path)],
12731 )));
12732 let jv = Expression::Function(Box::new(Function::new(
12733 "JSON_VALUE".to_string(),
12734 vec![json_expr, Expression::string(&json_path)],
12735 )));
12736 Ok(Expression::Function(Box::new(Function::new(
12737 "ISNULL".to_string(),
12738 vec![jq, jv],
12739 ))))
12740 }
12741 DialectType::ClickHouse => {
12742 let func_name = if is_text {
12743 "JSONExtractString"
12744 } else {
12745 "JSONExtractRaw"
12746 };
12747 let mut new_args = vec![json_expr];
12748 new_args.extend(args);
12749 Ok(Expression::Function(Box::new(Function::new(
12750 func_name.to_string(),
12751 new_args,
12752 ))))
12753 }
12754 _ => {
12755 let func_name = if is_text {
12756 "JSON_EXTRACT_SCALAR"
12757 } else {
12758 "JSON_EXTRACT"
12759 };
12760 Ok(Expression::Function(Box::new(Function::new(
12761 func_name.to_string(),
12762 vec![json_expr, Expression::string(&json_path)],
12763 ))))
12764 }
12765 }
12766 }
12767 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
12768 "APPROX_DISTINCT" if f.args.len() >= 1 => {
12769 let name = match target {
12770 DialectType::Spark
12771 | DialectType::Databricks
12772 | DialectType::Hive
12773 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
12774 _ => "APPROX_DISTINCT",
12775 };
12776 let mut args = f.args;
12777 // Hive doesn't support the accuracy parameter
12778 if name == "APPROX_COUNT_DISTINCT"
12779 && matches!(target, DialectType::Hive)
12780 {
12781 args.truncate(1);
12782 }
12783 Ok(Expression::Function(Box::new(Function::new(
12784 name.to_string(),
12785 args,
12786 ))))
12787 }
12788 // REGEXP_EXTRACT(x, pattern) - normalize default group index
12789 "REGEXP_EXTRACT" if f.args.len() == 2 => {
12790 // Determine source default group index
12791 let source_default = match source {
12792 DialectType::Presto
12793 | DialectType::Trino
12794 | DialectType::DuckDB => 0,
12795 _ => 1, // Hive/Spark/Databricks default = 1
12796 };
12797 // Determine target default group index
12798 let target_default = match target {
12799 DialectType::Presto
12800 | DialectType::Trino
12801 | DialectType::DuckDB
12802 | DialectType::BigQuery => 0,
12803 DialectType::Snowflake => {
12804 // Snowflake uses REGEXP_SUBSTR
12805 return Ok(Expression::Function(Box::new(Function::new(
12806 "REGEXP_SUBSTR".to_string(),
12807 f.args,
12808 ))));
12809 }
12810 _ => 1, // Hive/Spark/Databricks default = 1
12811 };
12812 if source_default != target_default {
12813 let mut args = f.args;
12814 args.push(Expression::number(source_default));
12815 Ok(Expression::Function(Box::new(Function::new(
12816 "REGEXP_EXTRACT".to_string(),
12817 args,
12818 ))))
12819 } else {
12820 Ok(Expression::Function(Box::new(Function::new(
12821 "REGEXP_EXTRACT".to_string(),
12822 f.args,
12823 ))))
12824 }
12825 }
12826 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
12827 "RLIKE" if f.args.len() == 2 => {
12828 let mut args = f.args;
12829 let str_expr = args.remove(0);
12830 let pattern = args.remove(0);
12831 match target {
12832 DialectType::DuckDB => {
12833 // REGEXP_MATCHES(str, pattern)
12834 Ok(Expression::Function(Box::new(Function::new(
12835 "REGEXP_MATCHES".to_string(),
12836 vec![str_expr, pattern],
12837 ))))
12838 }
12839 _ => {
12840 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
12841 Ok(Expression::RegexpLike(Box::new(
12842 crate::expressions::RegexpFunc {
12843 this: str_expr,
12844 pattern,
12845 flags: None,
12846 },
12847 )))
12848 }
12849 }
12850 }
12851 // EOMONTH(date[, month_offset]) -> target-specific
12852 "EOMONTH" if f.args.len() >= 1 => {
12853 let mut args = f.args;
12854 let date_arg = args.remove(0);
12855 let month_offset = if !args.is_empty() {
12856 Some(args.remove(0))
12857 } else {
12858 None
12859 };
12860
12861 // Helper: wrap date in CAST to DATE
12862 let cast_to_date = |e: Expression| -> Expression {
12863 Expression::Cast(Box::new(Cast {
12864 this: e,
12865 to: DataType::Date,
12866 trailing_comments: vec![],
12867 double_colon_syntax: false,
12868 format: None,
12869 default: None,
12870 inferred_type: None,
12871 }))
12872 };
12873
12874 match target {
12875 DialectType::TSQL | DialectType::Fabric => {
12876 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
12877 let date = cast_to_date(date_arg);
12878 let date = if let Some(offset) = month_offset {
12879 Expression::Function(Box::new(Function::new(
12880 "DATEADD".to_string(),
12881 vec![
12882 Expression::Identifier(Identifier::new(
12883 "MONTH",
12884 )),
12885 offset,
12886 date,
12887 ],
12888 )))
12889 } else {
12890 date
12891 };
12892 Ok(Expression::Function(Box::new(Function::new(
12893 "EOMONTH".to_string(),
12894 vec![date],
12895 ))))
12896 }
12897 DialectType::Presto
12898 | DialectType::Trino
12899 | DialectType::Athena => {
12900 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
12901 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
12902 let cast_ts = Expression::Cast(Box::new(Cast {
12903 this: date_arg,
12904 to: DataType::Timestamp {
12905 timezone: false,
12906 precision: None,
12907 },
12908 trailing_comments: vec![],
12909 double_colon_syntax: false,
12910 format: None,
12911 default: None,
12912 inferred_type: None,
12913 }));
12914 let date = cast_to_date(cast_ts);
12915 let date = if let Some(offset) = month_offset {
12916 Expression::Function(Box::new(Function::new(
12917 "DATE_ADD".to_string(),
12918 vec![Expression::string("MONTH"), offset, date],
12919 )))
12920 } else {
12921 date
12922 };
12923 Ok(Expression::Function(Box::new(Function::new(
12924 "LAST_DAY_OF_MONTH".to_string(),
12925 vec![date],
12926 ))))
12927 }
12928 DialectType::PostgreSQL => {
12929 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
12930 let date = cast_to_date(date_arg);
12931 let date = if let Some(offset) = month_offset {
12932 let interval_str = format!(
12933 "{} MONTH",
12934 Self::expr_to_string_static(&offset)
12935 );
12936 Expression::Add(Box::new(
12937 crate::expressions::BinaryOp::new(
12938 date,
12939 Expression::Interval(Box::new(
12940 crate::expressions::Interval {
12941 this: Some(Expression::string(
12942 &interval_str,
12943 )),
12944 unit: None,
12945 },
12946 )),
12947 ),
12948 ))
12949 } else {
12950 date
12951 };
12952 let truncated =
12953 Expression::Function(Box::new(Function::new(
12954 "DATE_TRUNC".to_string(),
12955 vec![Expression::string("MONTH"), date],
12956 )));
12957 let plus_month = Expression::Add(Box::new(
12958 crate::expressions::BinaryOp::new(
12959 truncated,
12960 Expression::Interval(Box::new(
12961 crate::expressions::Interval {
12962 this: Some(Expression::string("1 MONTH")),
12963 unit: None,
12964 },
12965 )),
12966 ),
12967 ));
12968 let minus_day = Expression::Sub(Box::new(
12969 crate::expressions::BinaryOp::new(
12970 plus_month,
12971 Expression::Interval(Box::new(
12972 crate::expressions::Interval {
12973 this: Some(Expression::string("1 DAY")),
12974 unit: None,
12975 },
12976 )),
12977 ),
12978 ));
12979 Ok(Expression::Cast(Box::new(Cast {
12980 this: minus_day,
12981 to: DataType::Date,
12982 trailing_comments: vec![],
12983 double_colon_syntax: false,
12984 format: None,
12985 default: None,
12986 inferred_type: None,
12987 })))
12988 }
12989 DialectType::DuckDB => {
12990 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
12991 let date = cast_to_date(date_arg);
12992 let date = if let Some(offset) = month_offset {
12993 // Wrap negative numbers in parentheses for DuckDB INTERVAL
12994 let interval_val =
12995 if matches!(&offset, Expression::Neg(_)) {
12996 Expression::Paren(Box::new(
12997 crate::expressions::Paren {
12998 this: offset,
12999 trailing_comments: Vec::new(),
13000 },
13001 ))
13002 } else {
13003 offset
13004 };
13005 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
13006 date,
13007 Expression::Interval(Box::new(crate::expressions::Interval {
13008 this: Some(interval_val),
13009 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
13010 unit: crate::expressions::IntervalUnit::Month,
13011 use_plural: false,
13012 }),
13013 })),
13014 )))
13015 } else {
13016 date
13017 };
13018 Ok(Expression::Function(Box::new(Function::new(
13019 "LAST_DAY".to_string(),
13020 vec![date],
13021 ))))
13022 }
13023 DialectType::Snowflake | DialectType::Redshift => {
13024 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
13025 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
13026 let date = if matches!(target, DialectType::Snowflake) {
13027 Expression::Function(Box::new(Function::new(
13028 "TO_DATE".to_string(),
13029 vec![date_arg],
13030 )))
13031 } else {
13032 cast_to_date(date_arg)
13033 };
13034 let date = if let Some(offset) = month_offset {
13035 Expression::Function(Box::new(Function::new(
13036 "DATEADD".to_string(),
13037 vec![
13038 Expression::Identifier(Identifier::new(
13039 "MONTH",
13040 )),
13041 offset,
13042 date,
13043 ],
13044 )))
13045 } else {
13046 date
13047 };
13048 Ok(Expression::Function(Box::new(Function::new(
13049 "LAST_DAY".to_string(),
13050 vec![date],
13051 ))))
13052 }
13053 DialectType::Spark | DialectType::Databricks => {
13054 // Spark: LAST_DAY(TO_DATE(date))
13055 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
13056 let date = Expression::Function(Box::new(Function::new(
13057 "TO_DATE".to_string(),
13058 vec![date_arg],
13059 )));
13060 let date = if let Some(offset) = month_offset {
13061 Expression::Function(Box::new(Function::new(
13062 "ADD_MONTHS".to_string(),
13063 vec![date, offset],
13064 )))
13065 } else {
13066 date
13067 };
13068 Ok(Expression::Function(Box::new(Function::new(
13069 "LAST_DAY".to_string(),
13070 vec![date],
13071 ))))
13072 }
13073 DialectType::MySQL => {
13074 // MySQL: LAST_DAY(DATE(date)) - no offset
13075 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
13076 let date = if let Some(offset) = month_offset {
13077 let iu = crate::expressions::IntervalUnit::Month;
13078 Expression::DateAdd(Box::new(
13079 crate::expressions::DateAddFunc {
13080 this: date_arg,
13081 interval: offset,
13082 unit: iu,
13083 },
13084 ))
13085 } else {
13086 Expression::Function(Box::new(Function::new(
13087 "DATE".to_string(),
13088 vec![date_arg],
13089 )))
13090 };
13091 Ok(Expression::Function(Box::new(Function::new(
13092 "LAST_DAY".to_string(),
13093 vec![date],
13094 ))))
13095 }
13096 DialectType::BigQuery => {
13097 // BigQuery: LAST_DAY(CAST(date AS DATE))
13098 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
13099 let date = cast_to_date(date_arg);
13100 let date = if let Some(offset) = month_offset {
13101 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
13102 this: Some(offset),
13103 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
13104 unit: crate::expressions::IntervalUnit::Month,
13105 use_plural: false,
13106 }),
13107 }));
13108 Expression::Function(Box::new(Function::new(
13109 "DATE_ADD".to_string(),
13110 vec![date, interval],
13111 )))
13112 } else {
13113 date
13114 };
13115 Ok(Expression::Function(Box::new(Function::new(
13116 "LAST_DAY".to_string(),
13117 vec![date],
13118 ))))
13119 }
13120 DialectType::ClickHouse => {
13121 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
13122 let date = Expression::Cast(Box::new(Cast {
13123 this: date_arg,
13124 to: DataType::Nullable {
13125 inner: Box::new(DataType::Date),
13126 },
13127 trailing_comments: vec![],
13128 double_colon_syntax: false,
13129 format: None,
13130 default: None,
13131 inferred_type: None,
13132 }));
13133 let date = if let Some(offset) = month_offset {
13134 Expression::Function(Box::new(Function::new(
13135 "DATE_ADD".to_string(),
13136 vec![
13137 Expression::Identifier(Identifier::new(
13138 "MONTH",
13139 )),
13140 offset,
13141 date,
13142 ],
13143 )))
13144 } else {
13145 date
13146 };
13147 Ok(Expression::Function(Box::new(Function::new(
13148 "LAST_DAY".to_string(),
13149 vec![date],
13150 ))))
13151 }
13152 DialectType::Hive => {
13153 // Hive: LAST_DAY(date)
13154 let date = if let Some(offset) = month_offset {
13155 Expression::Function(Box::new(Function::new(
13156 "ADD_MONTHS".to_string(),
13157 vec![date_arg, offset],
13158 )))
13159 } else {
13160 date_arg
13161 };
13162 Ok(Expression::Function(Box::new(Function::new(
13163 "LAST_DAY".to_string(),
13164 vec![date],
13165 ))))
13166 }
13167 _ => {
13168 // Default: LAST_DAY(date)
13169 let date = if let Some(offset) = month_offset {
13170 let unit =
13171 Expression::Identifier(Identifier::new("MONTH"));
13172 Expression::Function(Box::new(Function::new(
13173 "DATEADD".to_string(),
13174 vec![unit, offset, date_arg],
13175 )))
13176 } else {
13177 date_arg
13178 };
13179 Ok(Expression::Function(Box::new(Function::new(
13180 "LAST_DAY".to_string(),
13181 vec![date],
13182 ))))
13183 }
13184 }
13185 }
13186 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
13187 "LAST_DAY" | "LAST_DAY_OF_MONTH"
13188 if !matches!(source, DialectType::BigQuery)
13189 && f.args.len() >= 1 =>
13190 {
13191 let first_arg = f.args.into_iter().next().unwrap();
13192 match target {
13193 DialectType::TSQL | DialectType::Fabric => {
13194 Ok(Expression::Function(Box::new(Function::new(
13195 "EOMONTH".to_string(),
13196 vec![first_arg],
13197 ))))
13198 }
13199 DialectType::Presto
13200 | DialectType::Trino
13201 | DialectType::Athena => {
13202 Ok(Expression::Function(Box::new(Function::new(
13203 "LAST_DAY_OF_MONTH".to_string(),
13204 vec![first_arg],
13205 ))))
13206 }
13207 _ => Ok(Expression::Function(Box::new(Function::new(
13208 "LAST_DAY".to_string(),
13209 vec![first_arg],
13210 )))),
13211 }
13212 }
13213 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
13214 "MAP"
13215 if f.args.len() == 2
13216 && matches!(
13217 source,
13218 DialectType::Presto
13219 | DialectType::Trino
13220 | DialectType::Athena
13221 ) =>
13222 {
13223 let keys_arg = f.args[0].clone();
13224 let vals_arg = f.args[1].clone();
13225
13226 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
13227 fn extract_array_elements(
13228 expr: &Expression,
13229 ) -> Option<&Vec<Expression>> {
13230 match expr {
13231 Expression::Array(arr) => Some(&arr.expressions),
13232 Expression::ArrayFunc(arr) => Some(&arr.expressions),
13233 Expression::Function(f)
13234 if f.name.eq_ignore_ascii_case("ARRAY") =>
13235 {
13236 Some(&f.args)
13237 }
13238 _ => None,
13239 }
13240 }
13241
13242 match target {
13243 DialectType::Spark | DialectType::Databricks => {
13244 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
13245 Ok(Expression::Function(Box::new(Function::new(
13246 "MAP_FROM_ARRAYS".to_string(),
13247 f.args,
13248 ))))
13249 }
13250 DialectType::Hive => {
13251 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
13252 if let (Some(keys), Some(vals)) = (
13253 extract_array_elements(&keys_arg),
13254 extract_array_elements(&vals_arg),
13255 ) {
13256 if keys.len() == vals.len() {
13257 let mut interleaved = Vec::new();
13258 for (k, v) in keys.iter().zip(vals.iter()) {
13259 interleaved.push(k.clone());
13260 interleaved.push(v.clone());
13261 }
13262 Ok(Expression::Function(Box::new(Function::new(
13263 "MAP".to_string(),
13264 interleaved,
13265 ))))
13266 } else {
13267 Ok(Expression::Function(Box::new(Function::new(
13268 "MAP".to_string(),
13269 f.args,
13270 ))))
13271 }
13272 } else {
13273 Ok(Expression::Function(Box::new(Function::new(
13274 "MAP".to_string(),
13275 f.args,
13276 ))))
13277 }
13278 }
13279 DialectType::Snowflake => {
13280 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
13281 if let (Some(keys), Some(vals)) = (
13282 extract_array_elements(&keys_arg),
13283 extract_array_elements(&vals_arg),
13284 ) {
13285 if keys.len() == vals.len() {
13286 let mut interleaved = Vec::new();
13287 for (k, v) in keys.iter().zip(vals.iter()) {
13288 interleaved.push(k.clone());
13289 interleaved.push(v.clone());
13290 }
13291 Ok(Expression::Function(Box::new(Function::new(
13292 "OBJECT_CONSTRUCT".to_string(),
13293 interleaved,
13294 ))))
13295 } else {
13296 Ok(Expression::Function(Box::new(Function::new(
13297 "MAP".to_string(),
13298 f.args,
13299 ))))
13300 }
13301 } else {
13302 Ok(Expression::Function(Box::new(Function::new(
13303 "MAP".to_string(),
13304 f.args,
13305 ))))
13306 }
13307 }
13308 _ => Ok(Expression::Function(f)),
13309 }
13310 }
13311 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
13312 "MAP"
13313 if f.args.is_empty()
13314 && matches!(
13315 source,
13316 DialectType::Hive
13317 | DialectType::Spark
13318 | DialectType::Databricks
13319 )
13320 && matches!(
13321 target,
13322 DialectType::Presto
13323 | DialectType::Trino
13324 | DialectType::Athena
13325 ) =>
13326 {
13327 let empty_keys =
13328 Expression::Array(Box::new(crate::expressions::Array {
13329 expressions: vec![],
13330 }));
13331 let empty_vals =
13332 Expression::Array(Box::new(crate::expressions::Array {
13333 expressions: vec![],
13334 }));
13335 Ok(Expression::Function(Box::new(Function::new(
13336 "MAP".to_string(),
13337 vec![empty_keys, empty_vals],
13338 ))))
13339 }
13340 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
13341 "MAP"
13342 if f.args.len() >= 2
13343 && f.args.len() % 2 == 0
13344 && matches!(
13345 source,
13346 DialectType::Hive
13347 | DialectType::Spark
13348 | DialectType::Databricks
13349 | DialectType::ClickHouse
13350 ) =>
13351 {
13352 let args = f.args;
13353 match target {
13354 DialectType::DuckDB => {
13355 // MAP([k1, k2], [v1, v2])
13356 let mut keys = Vec::new();
13357 let mut vals = Vec::new();
13358 for (i, arg) in args.into_iter().enumerate() {
13359 if i % 2 == 0 {
13360 keys.push(arg);
13361 } else {
13362 vals.push(arg);
13363 }
13364 }
13365 let keys_arr = Expression::Array(Box::new(
13366 crate::expressions::Array { expressions: keys },
13367 ));
13368 let vals_arr = Expression::Array(Box::new(
13369 crate::expressions::Array { expressions: vals },
13370 ));
13371 Ok(Expression::Function(Box::new(Function::new(
13372 "MAP".to_string(),
13373 vec![keys_arr, vals_arr],
13374 ))))
13375 }
13376 DialectType::Presto | DialectType::Trino => {
13377 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
13378 let mut keys = Vec::new();
13379 let mut vals = Vec::new();
13380 for (i, arg) in args.into_iter().enumerate() {
13381 if i % 2 == 0 {
13382 keys.push(arg);
13383 } else {
13384 vals.push(arg);
13385 }
13386 }
13387 let keys_arr = Expression::Array(Box::new(
13388 crate::expressions::Array { expressions: keys },
13389 ));
13390 let vals_arr = Expression::Array(Box::new(
13391 crate::expressions::Array { expressions: vals },
13392 ));
13393 Ok(Expression::Function(Box::new(Function::new(
13394 "MAP".to_string(),
13395 vec![keys_arr, vals_arr],
13396 ))))
13397 }
13398 DialectType::Snowflake => Ok(Expression::Function(Box::new(
13399 Function::new("OBJECT_CONSTRUCT".to_string(), args),
13400 ))),
13401 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
13402 Function::new("map".to_string(), args),
13403 ))),
13404 _ => Ok(Expression::Function(Box::new(Function::new(
13405 "MAP".to_string(),
13406 args,
13407 )))),
13408 }
13409 }
13410 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
13411 "COLLECT_LIST" if f.args.len() >= 1 => {
13412 let name = match target {
13413 DialectType::Spark
13414 | DialectType::Databricks
13415 | DialectType::Hive => "COLLECT_LIST",
13416 DialectType::DuckDB
13417 | DialectType::PostgreSQL
13418 | DialectType::Redshift
13419 | DialectType::Snowflake
13420 | DialectType::BigQuery => "ARRAY_AGG",
13421 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
13422 _ => "ARRAY_AGG",
13423 };
13424 Ok(Expression::Function(Box::new(Function::new(
13425 name.to_string(),
13426 f.args,
13427 ))))
13428 }
13429 // COLLECT_SET(x) -> target-specific distinct array aggregation
13430 "COLLECT_SET" if f.args.len() >= 1 => {
13431 let name = match target {
13432 DialectType::Spark
13433 | DialectType::Databricks
13434 | DialectType::Hive => "COLLECT_SET",
13435 DialectType::Presto
13436 | DialectType::Trino
13437 | DialectType::Athena => "SET_AGG",
13438 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
13439 _ => "ARRAY_AGG",
13440 };
13441 Ok(Expression::Function(Box::new(Function::new(
13442 name.to_string(),
13443 f.args,
13444 ))))
13445 }
13446 // ISNAN(x) / IS_NAN(x) - normalize
13447 "ISNAN" | "IS_NAN" => {
13448 let name = match target {
13449 DialectType::Spark
13450 | DialectType::Databricks
13451 | DialectType::Hive => "ISNAN",
13452 DialectType::Presto
13453 | DialectType::Trino
13454 | DialectType::Athena => "IS_NAN",
13455 DialectType::BigQuery
13456 | DialectType::PostgreSQL
13457 | DialectType::Redshift => "IS_NAN",
13458 DialectType::ClickHouse => "IS_NAN",
13459 _ => "ISNAN",
13460 };
13461 Ok(Expression::Function(Box::new(Function::new(
13462 name.to_string(),
13463 f.args,
13464 ))))
13465 }
13466 // SPLIT_PART(str, delim, index) -> target-specific
13467 "SPLIT_PART" if f.args.len() == 3 => {
13468 match target {
13469 DialectType::Spark | DialectType::Databricks => {
13470 // Keep as SPLIT_PART (Spark 3.4+)
13471 Ok(Expression::Function(Box::new(Function::new(
13472 "SPLIT_PART".to_string(),
13473 f.args,
13474 ))))
13475 }
13476 DialectType::DuckDB
13477 if matches!(source, DialectType::Snowflake) =>
13478 {
13479 // Snowflake SPLIT_PART -> DuckDB with CASE wrapper:
13480 // - part_index 0 treated as 1
13481 // - empty delimiter: return whole string if index 1 or -1, else ''
13482 let mut args = f.args;
13483 let str_arg = args.remove(0);
13484 let delim_arg = args.remove(0);
13485 let idx_arg = args.remove(0);
13486
13487 // (CASE WHEN idx = 0 THEN 1 ELSE idx END)
13488 let adjusted_idx = Expression::Paren(Box::new(Paren {
13489 this: Expression::Case(Box::new(Case {
13490 operand: None,
13491 whens: vec![(
13492 Expression::Eq(Box::new(BinaryOp {
13493 left: idx_arg.clone(),
13494 right: Expression::number(0),
13495 left_comments: vec![],
13496 operator_comments: vec![],
13497 trailing_comments: vec![],
13498 inferred_type: None,
13499 })),
13500 Expression::number(1),
13501 )],
13502 else_: Some(idx_arg.clone()),
13503 comments: vec![],
13504 inferred_type: None,
13505 })),
13506 trailing_comments: vec![],
13507 }));
13508
13509 // SPLIT_PART(str, delim, adjusted_idx)
13510 let base_func =
13511 Expression::Function(Box::new(Function::new(
13512 "SPLIT_PART".to_string(),
13513 vec![
13514 str_arg.clone(),
13515 delim_arg.clone(),
13516 adjusted_idx.clone(),
13517 ],
13518 )));
13519
13520 // (CASE WHEN adjusted_idx = 1 OR adjusted_idx = -1 THEN str ELSE '' END)
13521 let empty_delim_case = Expression::Paren(Box::new(Paren {
13522 this: Expression::Case(Box::new(Case {
13523 operand: None,
13524 whens: vec![(
13525 Expression::Or(Box::new(BinaryOp {
13526 left: Expression::Eq(Box::new(BinaryOp {
13527 left: adjusted_idx.clone(),
13528 right: Expression::number(1),
13529 left_comments: vec![],
13530 operator_comments: vec![],
13531 trailing_comments: vec![],
13532 inferred_type: None,
13533 })),
13534 right: Expression::Eq(Box::new(BinaryOp {
13535 left: adjusted_idx,
13536 right: Expression::number(-1),
13537 left_comments: vec![],
13538 operator_comments: vec![],
13539 trailing_comments: vec![],
13540 inferred_type: None,
13541 })),
13542 left_comments: vec![],
13543 operator_comments: vec![],
13544 trailing_comments: vec![],
13545 inferred_type: None,
13546 })),
13547 str_arg,
13548 )],
13549 else_: Some(Expression::string("")),
13550 comments: vec![],
13551 inferred_type: None,
13552 })),
13553 trailing_comments: vec![],
13554 }));
13555
13556 // CASE WHEN delim = '' THEN (empty case) ELSE SPLIT_PART(...) END
13557 Ok(Expression::Case(Box::new(Case {
13558 operand: None,
13559 whens: vec![(
13560 Expression::Eq(Box::new(BinaryOp {
13561 left: delim_arg,
13562 right: Expression::string(""),
13563 left_comments: vec![],
13564 operator_comments: vec![],
13565 trailing_comments: vec![],
13566 inferred_type: None,
13567 })),
13568 empty_delim_case,
13569 )],
13570 else_: Some(base_func),
13571 comments: vec![],
13572 inferred_type: None,
13573 })))
13574 }
13575 DialectType::DuckDB
13576 | DialectType::PostgreSQL
13577 | DialectType::Snowflake
13578 | DialectType::Redshift
13579 | DialectType::Trino
13580 | DialectType::Presto => Ok(Expression::Function(Box::new(
13581 Function::new("SPLIT_PART".to_string(), f.args),
13582 ))),
13583 DialectType::Hive => {
13584 // SPLIT(str, delim)[index]
13585 // Complex conversion, just keep as-is for now
13586 Ok(Expression::Function(Box::new(Function::new(
13587 "SPLIT_PART".to_string(),
13588 f.args,
13589 ))))
13590 }
13591 _ => Ok(Expression::Function(Box::new(Function::new(
13592 "SPLIT_PART".to_string(),
13593 f.args,
13594 )))),
13595 }
13596 }
13597 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
13598 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
13599 let is_scalar = name == "JSON_EXTRACT_SCALAR";
13600 match target {
13601 DialectType::Spark
13602 | DialectType::Databricks
13603 | DialectType::Hive => {
13604 let mut args = f.args;
13605 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
13606 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
13607 if let Some(Expression::Function(inner)) = args.first() {
13608 if inner.name.eq_ignore_ascii_case("TRY")
13609 && inner.args.len() == 1
13610 {
13611 let mut inner_args = inner.args.clone();
13612 args[0] = inner_args.remove(0);
13613 }
13614 }
13615 Ok(Expression::Function(Box::new(Function::new(
13616 "GET_JSON_OBJECT".to_string(),
13617 args,
13618 ))))
13619 }
13620 DialectType::DuckDB | DialectType::SQLite => {
13621 // json -> path syntax
13622 let mut args = f.args;
13623 let json_expr = args.remove(0);
13624 let path = args.remove(0);
13625 Ok(Expression::JsonExtract(Box::new(
13626 crate::expressions::JsonExtractFunc {
13627 this: json_expr,
13628 path,
13629 returning: None,
13630 arrow_syntax: true,
13631 hash_arrow_syntax: false,
13632 wrapper_option: None,
13633 quotes_option: None,
13634 on_scalar_string: false,
13635 on_error: None,
13636 },
13637 )))
13638 }
13639 DialectType::TSQL => {
13640 let func_name = if is_scalar {
13641 "JSON_VALUE"
13642 } else {
13643 "JSON_QUERY"
13644 };
13645 Ok(Expression::Function(Box::new(Function::new(
13646 func_name.to_string(),
13647 f.args,
13648 ))))
13649 }
13650 DialectType::PostgreSQL | DialectType::Redshift => {
13651 let func_name = if is_scalar {
13652 "JSON_EXTRACT_PATH_TEXT"
13653 } else {
13654 "JSON_EXTRACT_PATH"
13655 };
13656 Ok(Expression::Function(Box::new(Function::new(
13657 func_name.to_string(),
13658 f.args,
13659 ))))
13660 }
13661 _ => Ok(Expression::Function(Box::new(Function::new(
13662 name.to_string(),
13663 f.args,
13664 )))),
13665 }
13666 }
13667 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
13668 "JSON_SEARCH"
13669 if matches!(target, DialectType::DuckDB)
13670 && (3..=5).contains(&f.args.len()) =>
13671 {
13672 let args = &f.args;
13673
13674 // Only rewrite deterministic modes and NULL/no escape-char variant.
13675 let mode = match &args[1] {
13676 Expression::Literal(lit)
13677 if matches!(
13678 lit.as_ref(),
13679 crate::expressions::Literal::String(_)
13680 ) =>
13681 {
13682 let crate::expressions::Literal::String(s) = lit.as_ref()
13683 else {
13684 unreachable!()
13685 };
13686 s.to_ascii_lowercase()
13687 }
13688 _ => return Ok(Expression::Function(f)),
13689 };
13690 if mode != "one" && mode != "all" {
13691 return Ok(Expression::Function(f));
13692 }
13693 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
13694 return Ok(Expression::Function(f));
13695 }
13696
13697 let json_doc_sql = match Generator::sql(&args[0]) {
13698 Ok(sql) => sql,
13699 Err(_) => return Ok(Expression::Function(f)),
13700 };
13701 let search_sql = match Generator::sql(&args[2]) {
13702 Ok(sql) => sql,
13703 Err(_) => return Ok(Expression::Function(f)),
13704 };
13705 let path_sql = if args.len() == 5 {
13706 match Generator::sql(&args[4]) {
13707 Ok(sql) => sql,
13708 Err(_) => return Ok(Expression::Function(f)),
13709 }
13710 } else {
13711 "'$'".to_string()
13712 };
13713
13714 let rewrite_sql = if mode == "all" {
13715 format!(
13716 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
13717 json_doc_sql, path_sql, search_sql
13718 )
13719 } else {
13720 format!(
13721 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
13722 json_doc_sql, path_sql, search_sql
13723 )
13724 };
13725
13726 Ok(Expression::Raw(crate::expressions::Raw {
13727 sql: rewrite_sql,
13728 }))
13729 }
13730 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
13731 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
13732 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
13733 if f.args.len() >= 2
13734 && matches!(source, DialectType::SingleStore) =>
13735 {
13736 let is_bson = name == "BSON_EXTRACT_BSON";
13737 let mut args = f.args;
13738 let json_expr = args.remove(0);
13739
13740 // Build JSONPath from remaining arguments
13741 let mut path = String::from("$");
13742 for arg in &args {
13743 if let Expression::Literal(lit) = arg {
13744 if let crate::expressions::Literal::String(s) = lit.as_ref()
13745 {
13746 // Check if it's a numeric string (array index)
13747 if s.parse::<i64>().is_ok() {
13748 path.push('[');
13749 path.push_str(s);
13750 path.push(']');
13751 } else {
13752 path.push('.');
13753 path.push_str(s);
13754 }
13755 }
13756 }
13757 }
13758
13759 let target_func = if is_bson {
13760 "JSONB_EXTRACT"
13761 } else {
13762 "JSON_EXTRACT"
13763 };
13764 Ok(Expression::Function(Box::new(Function::new(
13765 target_func.to_string(),
13766 vec![json_expr, Expression::string(&path)],
13767 ))))
13768 }
13769 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
13770 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
13771 Ok(Expression::Function(Box::new(Function {
13772 name: "arraySum".to_string(),
13773 args: f.args,
13774 distinct: f.distinct,
13775 trailing_comments: f.trailing_comments,
13776 use_bracket_syntax: f.use_bracket_syntax,
13777 no_parens: f.no_parens,
13778 quoted: f.quoted,
13779 span: None,
13780 inferred_type: None,
13781 })))
13782 }
13783 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
13784 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
13785 // and is handled by JsonQueryValueConvert action. This handles the case where
13786 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
13787 "JSON_QUERY" | "JSON_VALUE"
13788 if f.args.len() == 2
13789 && matches!(
13790 source,
13791 DialectType::TSQL | DialectType::Fabric
13792 ) =>
13793 {
13794 match target {
13795 DialectType::Spark
13796 | DialectType::Databricks
13797 | DialectType::Hive => Ok(Expression::Function(Box::new(
13798 Function::new("GET_JSON_OBJECT".to_string(), f.args),
13799 ))),
13800 _ => Ok(Expression::Function(Box::new(Function::new(
13801 name.to_string(),
13802 f.args,
13803 )))),
13804 }
13805 }
13806 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
13807 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
13808 let arg = f.args.into_iter().next().unwrap();
13809 let is_hive_source = matches!(
13810 source,
13811 DialectType::Hive
13812 | DialectType::Spark
13813 | DialectType::Databricks
13814 );
13815 match target {
13816 DialectType::DuckDB if is_hive_source => {
13817 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
13818 let strptime =
13819 Expression::Function(Box::new(Function::new(
13820 "STRPTIME".to_string(),
13821 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
13822 )));
13823 Ok(Expression::Function(Box::new(Function::new(
13824 "EPOCH".to_string(),
13825 vec![strptime],
13826 ))))
13827 }
13828 DialectType::Presto | DialectType::Trino if is_hive_source => {
13829 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
13830 let cast_varchar =
13831 Expression::Cast(Box::new(crate::expressions::Cast {
13832 this: arg.clone(),
13833 to: DataType::VarChar {
13834 length: None,
13835 parenthesized_length: false,
13836 },
13837 trailing_comments: vec![],
13838 double_colon_syntax: false,
13839 format: None,
13840 default: None,
13841 inferred_type: None,
13842 }));
13843 let date_parse =
13844 Expression::Function(Box::new(Function::new(
13845 "DATE_PARSE".to_string(),
13846 vec![
13847 cast_varchar,
13848 Expression::string("%Y-%m-%d %T"),
13849 ],
13850 )));
13851 let try_expr = Expression::Function(Box::new(
13852 Function::new("TRY".to_string(), vec![date_parse]),
13853 ));
13854 let date_format =
13855 Expression::Function(Box::new(Function::new(
13856 "DATE_FORMAT".to_string(),
13857 vec![arg, Expression::string("%Y-%m-%d %T")],
13858 )));
13859 let parse_datetime =
13860 Expression::Function(Box::new(Function::new(
13861 "PARSE_DATETIME".to_string(),
13862 vec![
13863 date_format,
13864 Expression::string("yyyy-MM-dd HH:mm:ss"),
13865 ],
13866 )));
13867 let coalesce =
13868 Expression::Function(Box::new(Function::new(
13869 "COALESCE".to_string(),
13870 vec![try_expr, parse_datetime],
13871 )));
13872 Ok(Expression::Function(Box::new(Function::new(
13873 "TO_UNIXTIME".to_string(),
13874 vec![coalesce],
13875 ))))
13876 }
13877 DialectType::Presto | DialectType::Trino => {
13878 Ok(Expression::Function(Box::new(Function::new(
13879 "TO_UNIXTIME".to_string(),
13880 vec![arg],
13881 ))))
13882 }
13883 _ => Ok(Expression::Function(Box::new(Function::new(
13884 "UNIX_TIMESTAMP".to_string(),
13885 vec![arg],
13886 )))),
13887 }
13888 }
13889 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
13890 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
13891 DialectType::Spark
13892 | DialectType::Databricks
13893 | DialectType::Hive => Ok(Expression::Function(Box::new(
13894 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
13895 ))),
13896 _ => Ok(Expression::Function(Box::new(Function::new(
13897 "TO_UNIX_TIMESTAMP".to_string(),
13898 f.args,
13899 )))),
13900 },
13901 // CURDATE() -> CURRENT_DATE
13902 "CURDATE" => {
13903 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
13904 }
13905 // CURTIME() -> CURRENT_TIME
13906 "CURTIME" => {
13907 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
13908 precision: None,
13909 }))
13910 }
13911 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive, LIST_SORT for DuckDB
13912 "ARRAY_SORT" if f.args.len() >= 1 => {
13913 match target {
13914 DialectType::Hive => {
13915 let mut args = f.args;
13916 args.truncate(1); // Drop lambda comparator
13917 Ok(Expression::Function(Box::new(Function::new(
13918 "SORT_ARRAY".to_string(),
13919 args,
13920 ))))
13921 }
13922 DialectType::DuckDB
13923 if matches!(source, DialectType::Snowflake) =>
13924 {
13925 // Snowflake ARRAY_SORT(arr[, asc_bool[, nulls_first_bool]]) -> DuckDB LIST_SORT(arr[, 'ASC'/'DESC'[, 'NULLS FIRST']])
13926 let mut args_iter = f.args.into_iter();
13927 let arr = args_iter.next().unwrap();
13928 let asc_arg = args_iter.next();
13929 let nulls_first_arg = args_iter.next();
13930
13931 let is_asc_bool = asc_arg
13932 .as_ref()
13933 .map(|a| matches!(a, Expression::Boolean(_)))
13934 .unwrap_or(false);
13935 let is_nf_bool = nulls_first_arg
13936 .as_ref()
13937 .map(|a| matches!(a, Expression::Boolean(_)))
13938 .unwrap_or(false);
13939
13940 // No boolean args: pass through as-is
13941 if !is_asc_bool && !is_nf_bool {
13942 let mut result_args = vec![arr];
13943 if let Some(asc) = asc_arg {
13944 result_args.push(asc);
13945 if let Some(nf) = nulls_first_arg {
13946 result_args.push(nf);
13947 }
13948 }
13949 Ok(Expression::Function(Box::new(Function::new(
13950 "LIST_SORT".to_string(),
13951 result_args,
13952 ))))
13953 } else {
13954 // Has boolean args: convert to DuckDB LIST_SORT format
13955 let descending = matches!(&asc_arg, Some(Expression::Boolean(b)) if !b.value);
13956
13957 // Snowflake defaults: nulls_first = TRUE for DESC, FALSE for ASC
13958 let nulls_are_first = match &nulls_first_arg {
13959 Some(Expression::Boolean(b)) => b.value,
13960 None if is_asc_bool => descending, // Snowflake default
13961 _ => false,
13962 };
13963 let nulls_first_sql = if nulls_are_first {
13964 Some(Expression::string("NULLS FIRST"))
13965 } else {
13966 None
13967 };
13968
13969 if !is_asc_bool {
13970 // asc is non-boolean expression, nulls_first is boolean
13971 let mut result_args = vec![arr];
13972 if let Some(asc) = asc_arg {
13973 result_args.push(asc);
13974 }
13975 if let Some(nf) = nulls_first_sql {
13976 result_args.push(nf);
13977 }
13978 Ok(Expression::Function(Box::new(Function::new(
13979 "LIST_SORT".to_string(),
13980 result_args,
13981 ))))
13982 } else {
13983 if !descending && !nulls_are_first {
13984 // ASC, NULLS LAST (default) -> LIST_SORT(arr)
13985 Ok(Expression::Function(Box::new(
13986 Function::new(
13987 "LIST_SORT".to_string(),
13988 vec![arr],
13989 ),
13990 )))
13991 } else if descending && !nulls_are_first {
13992 // DESC, NULLS LAST -> ARRAY_REVERSE_SORT(arr)
13993 Ok(Expression::Function(Box::new(
13994 Function::new(
13995 "ARRAY_REVERSE_SORT".to_string(),
13996 vec![arr],
13997 ),
13998 )))
13999 } else {
14000 // NULLS FIRST -> LIST_SORT(arr, 'ASC'/'DESC', 'NULLS FIRST')
14001 let order_str =
14002 if descending { "DESC" } else { "ASC" };
14003 Ok(Expression::Function(Box::new(
14004 Function::new(
14005 "LIST_SORT".to_string(),
14006 vec![
14007 arr,
14008 Expression::string(order_str),
14009 Expression::string("NULLS FIRST"),
14010 ],
14011 ),
14012 )))
14013 }
14014 }
14015 }
14016 }
14017 DialectType::DuckDB => {
14018 // Non-Snowflake source: ARRAY_SORT(x, lambda) -> ARRAY_SORT(x) (drop comparator)
14019 let mut args = f.args;
14020 args.truncate(1); // Drop lambda comparator for DuckDB
14021 Ok(Expression::Function(Box::new(Function::new(
14022 "ARRAY_SORT".to_string(),
14023 args,
14024 ))))
14025 }
14026 _ => Ok(Expression::Function(f)),
14027 }
14028 }
14029 // SORT_ARRAY(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, keep for Hive/Spark
14030 "SORT_ARRAY" if f.args.len() == 1 => match target {
14031 DialectType::Hive
14032 | DialectType::Spark
14033 | DialectType::Databricks => Ok(Expression::Function(f)),
14034 DialectType::DuckDB => Ok(Expression::Function(Box::new(
14035 Function::new("LIST_SORT".to_string(), f.args),
14036 ))),
14037 _ => Ok(Expression::Function(Box::new(Function::new(
14038 "ARRAY_SORT".to_string(),
14039 f.args,
14040 )))),
14041 },
14042 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
14043 "SORT_ARRAY" if f.args.len() == 2 => {
14044 let is_desc =
14045 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
14046 if is_desc {
14047 match target {
14048 DialectType::DuckDB => {
14049 Ok(Expression::Function(Box::new(Function::new(
14050 "ARRAY_REVERSE_SORT".to_string(),
14051 vec![f.args.into_iter().next().unwrap()],
14052 ))))
14053 }
14054 DialectType::Presto | DialectType::Trino => {
14055 let arr_arg = f.args.into_iter().next().unwrap();
14056 let a = Expression::Column(Box::new(
14057 crate::expressions::Column {
14058 name: crate::expressions::Identifier::new("a"),
14059 table: None,
14060 join_mark: false,
14061 trailing_comments: Vec::new(),
14062 span: None,
14063 inferred_type: None,
14064 },
14065 ));
14066 let b = Expression::Column(Box::new(
14067 crate::expressions::Column {
14068 name: crate::expressions::Identifier::new("b"),
14069 table: None,
14070 join_mark: false,
14071 trailing_comments: Vec::new(),
14072 span: None,
14073 inferred_type: None,
14074 },
14075 ));
14076 let case_expr = Expression::Case(Box::new(
14077 crate::expressions::Case {
14078 operand: None,
14079 whens: vec![
14080 (
14081 Expression::Lt(Box::new(
14082 BinaryOp::new(a.clone(), b.clone()),
14083 )),
14084 Expression::Literal(Box::new(
14085 Literal::Number("1".to_string()),
14086 )),
14087 ),
14088 (
14089 Expression::Gt(Box::new(
14090 BinaryOp::new(a.clone(), b.clone()),
14091 )),
14092 Expression::Literal(Box::new(
14093 Literal::Number("-1".to_string()),
14094 )),
14095 ),
14096 ],
14097 else_: Some(Expression::Literal(Box::new(
14098 Literal::Number("0".to_string()),
14099 ))),
14100 comments: Vec::new(),
14101 inferred_type: None,
14102 },
14103 ));
14104 let lambda = Expression::Lambda(Box::new(
14105 crate::expressions::LambdaExpr {
14106 parameters: vec![
14107 crate::expressions::Identifier::new("a"),
14108 crate::expressions::Identifier::new("b"),
14109 ],
14110 body: case_expr,
14111 colon: false,
14112 parameter_types: Vec::new(),
14113 },
14114 ));
14115 Ok(Expression::Function(Box::new(Function::new(
14116 "ARRAY_SORT".to_string(),
14117 vec![arr_arg, lambda],
14118 ))))
14119 }
14120 _ => Ok(Expression::Function(f)),
14121 }
14122 } else {
14123 // SORT_ARRAY(x, TRUE) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for others
14124 match target {
14125 DialectType::Hive => Ok(Expression::Function(f)),
14126 DialectType::DuckDB => {
14127 Ok(Expression::Function(Box::new(Function::new(
14128 "LIST_SORT".to_string(),
14129 vec![f.args.into_iter().next().unwrap()],
14130 ))))
14131 }
14132 _ => Ok(Expression::Function(Box::new(Function::new(
14133 "ARRAY_SORT".to_string(),
14134 vec![f.args.into_iter().next().unwrap()],
14135 )))),
14136 }
14137 }
14138 }
14139 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
14140 "LEFT" if f.args.len() == 2 => {
14141 match target {
14142 DialectType::Hive
14143 | DialectType::Presto
14144 | DialectType::Trino
14145 | DialectType::Athena => {
14146 let x = f.args[0].clone();
14147 let n = f.args[1].clone();
14148 Ok(Expression::Function(Box::new(Function::new(
14149 "SUBSTRING".to_string(),
14150 vec![x, Expression::number(1), n],
14151 ))))
14152 }
14153 DialectType::Spark | DialectType::Databricks
14154 if matches!(
14155 source,
14156 DialectType::TSQL | DialectType::Fabric
14157 ) =>
14158 {
14159 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
14160 let x = f.args[0].clone();
14161 let n = f.args[1].clone();
14162 let cast_x = Expression::Cast(Box::new(Cast {
14163 this: x,
14164 to: DataType::VarChar {
14165 length: None,
14166 parenthesized_length: false,
14167 },
14168 double_colon_syntax: false,
14169 trailing_comments: Vec::new(),
14170 format: None,
14171 default: None,
14172 inferred_type: None,
14173 }));
14174 Ok(Expression::Function(Box::new(Function::new(
14175 "LEFT".to_string(),
14176 vec![cast_x, n],
14177 ))))
14178 }
14179 _ => Ok(Expression::Function(f)),
14180 }
14181 }
14182 "RIGHT" if f.args.len() == 2 => {
14183 match target {
14184 DialectType::Hive
14185 | DialectType::Presto
14186 | DialectType::Trino
14187 | DialectType::Athena => {
14188 let x = f.args[0].clone();
14189 let n = f.args[1].clone();
14190 // SUBSTRING(x, LENGTH(x) - (n - 1))
14191 let len_x = Expression::Function(Box::new(Function::new(
14192 "LENGTH".to_string(),
14193 vec![x.clone()],
14194 )));
14195 let n_minus_1 = Expression::Sub(Box::new(
14196 crate::expressions::BinaryOp::new(
14197 n,
14198 Expression::number(1),
14199 ),
14200 ));
14201 let n_minus_1_paren = Expression::Paren(Box::new(
14202 crate::expressions::Paren {
14203 this: n_minus_1,
14204 trailing_comments: Vec::new(),
14205 },
14206 ));
14207 let offset = Expression::Sub(Box::new(
14208 crate::expressions::BinaryOp::new(
14209 len_x,
14210 n_minus_1_paren,
14211 ),
14212 ));
14213 Ok(Expression::Function(Box::new(Function::new(
14214 "SUBSTRING".to_string(),
14215 vec![x, offset],
14216 ))))
14217 }
14218 DialectType::Spark | DialectType::Databricks
14219 if matches!(
14220 source,
14221 DialectType::TSQL | DialectType::Fabric
14222 ) =>
14223 {
14224 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
14225 let x = f.args[0].clone();
14226 let n = f.args[1].clone();
14227 let cast_x = Expression::Cast(Box::new(Cast {
14228 this: x,
14229 to: DataType::VarChar {
14230 length: None,
14231 parenthesized_length: false,
14232 },
14233 double_colon_syntax: false,
14234 trailing_comments: Vec::new(),
14235 format: None,
14236 default: None,
14237 inferred_type: None,
14238 }));
14239 Ok(Expression::Function(Box::new(Function::new(
14240 "RIGHT".to_string(),
14241 vec![cast_x, n],
14242 ))))
14243 }
14244 _ => Ok(Expression::Function(f)),
14245 }
14246 }
14247 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
14248 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
14249 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14250 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
14251 ))),
14252 DialectType::Spark | DialectType::Databricks => {
14253 Ok(Expression::Function(Box::new(Function::new(
14254 "MAP_FROM_ARRAYS".to_string(),
14255 f.args,
14256 ))))
14257 }
14258 _ => Ok(Expression::Function(Box::new(Function::new(
14259 "MAP".to_string(),
14260 f.args,
14261 )))),
14262 },
14263 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
14264 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
14265 "LIKE" if f.args.len() >= 2 => {
14266 let (this, pattern) = if matches!(source, DialectType::SQLite) {
14267 // SQLite: LIKE(pattern, string) -> string LIKE pattern
14268 (f.args[1].clone(), f.args[0].clone())
14269 } else {
14270 // Standard: LIKE(string, pattern) -> string LIKE pattern
14271 (f.args[0].clone(), f.args[1].clone())
14272 };
14273 let escape = if f.args.len() >= 3 {
14274 Some(f.args[2].clone())
14275 } else {
14276 None
14277 };
14278 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
14279 left: this,
14280 right: pattern,
14281 escape,
14282 quantifier: None,
14283 inferred_type: None,
14284 })))
14285 }
14286 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
14287 "ILIKE" if f.args.len() >= 2 => {
14288 let this = f.args[0].clone();
14289 let pattern = f.args[1].clone();
14290 let escape = if f.args.len() >= 3 {
14291 Some(f.args[2].clone())
14292 } else {
14293 None
14294 };
14295 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
14296 left: this,
14297 right: pattern,
14298 escape,
14299 quantifier: None,
14300 inferred_type: None,
14301 })))
14302 }
14303 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
14304 "CHAR" if f.args.len() == 1 => match target {
14305 DialectType::MySQL
14306 | DialectType::SingleStore
14307 | DialectType::TSQL => Ok(Expression::Function(f)),
14308 _ => Ok(Expression::Function(Box::new(Function::new(
14309 "CHR".to_string(),
14310 f.args,
14311 )))),
14312 },
14313 // CONCAT(a, b) -> a || b for PostgreSQL
14314 "CONCAT"
14315 if f.args.len() == 2
14316 && matches!(target, DialectType::PostgreSQL)
14317 && matches!(
14318 source,
14319 DialectType::ClickHouse | DialectType::MySQL
14320 ) =>
14321 {
14322 let mut args = f.args;
14323 let right = args.pop().unwrap();
14324 let left = args.pop().unwrap();
14325 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
14326 this: Box::new(left),
14327 expression: Box::new(right),
14328 safe: None,
14329 })))
14330 }
14331 // ARRAY_TO_STRING(arr, delim) -> target-specific
14332 "ARRAY_TO_STRING"
14333 if f.args.len() == 2
14334 && matches!(target, DialectType::DuckDB)
14335 && matches!(source, DialectType::Snowflake) =>
14336 {
14337 let mut args = f.args;
14338 let arr = args.remove(0);
14339 let sep = args.remove(0);
14340 // sep IS NULL
14341 let sep_is_null = Expression::IsNull(Box::new(IsNull {
14342 this: sep.clone(),
14343 not: false,
14344 postfix_form: false,
14345 }));
14346 // COALESCE(CAST(x AS TEXT), '')
14347 let cast_x = Expression::Cast(Box::new(Cast {
14348 this: Expression::Identifier(Identifier::new("x")),
14349 to: DataType::Text,
14350 trailing_comments: Vec::new(),
14351 double_colon_syntax: false,
14352 format: None,
14353 default: None,
14354 inferred_type: None,
14355 }));
14356 let coalesce = Expression::Coalesce(Box::new(
14357 crate::expressions::VarArgFunc {
14358 original_name: None,
14359 expressions: vec![
14360 cast_x,
14361 Expression::Literal(Box::new(Literal::String(
14362 String::new(),
14363 ))),
14364 ],
14365 inferred_type: None,
14366 },
14367 ));
14368 let lambda =
14369 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
14370 parameters: vec![Identifier::new("x")],
14371 body: coalesce,
14372 colon: false,
14373 parameter_types: Vec::new(),
14374 }));
14375 let list_transform = Expression::Function(Box::new(Function::new(
14376 "LIST_TRANSFORM".to_string(),
14377 vec![arr, lambda],
14378 )));
14379 let array_to_string =
14380 Expression::Function(Box::new(Function::new(
14381 "ARRAY_TO_STRING".to_string(),
14382 vec![list_transform, sep],
14383 )));
14384 Ok(Expression::Case(Box::new(Case {
14385 operand: None,
14386 whens: vec![(sep_is_null, Expression::Null(Null))],
14387 else_: Some(array_to_string),
14388 comments: Vec::new(),
14389 inferred_type: None,
14390 })))
14391 }
14392 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
14393 DialectType::Presto | DialectType::Trino => {
14394 Ok(Expression::Function(Box::new(Function::new(
14395 "ARRAY_JOIN".to_string(),
14396 f.args,
14397 ))))
14398 }
14399 DialectType::TSQL => Ok(Expression::Function(Box::new(
14400 Function::new("STRING_AGG".to_string(), f.args),
14401 ))),
14402 _ => Ok(Expression::Function(f)),
14403 },
14404 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
14405 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
14406 DialectType::Spark
14407 | DialectType::Databricks
14408 | DialectType::Hive => Ok(Expression::Function(Box::new(
14409 Function::new("CONCAT".to_string(), f.args),
14410 ))),
14411 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14412 Function::new("ARRAY_CAT".to_string(), f.args),
14413 ))),
14414 DialectType::Redshift => Ok(Expression::Function(Box::new(
14415 Function::new("ARRAY_CONCAT".to_string(), f.args),
14416 ))),
14417 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14418 Function::new("ARRAY_CAT".to_string(), f.args),
14419 ))),
14420 DialectType::DuckDB => Ok(Expression::Function(Box::new(
14421 Function::new("LIST_CONCAT".to_string(), f.args),
14422 ))),
14423 DialectType::Presto | DialectType::Trino => {
14424 Ok(Expression::Function(Box::new(Function::new(
14425 "CONCAT".to_string(),
14426 f.args,
14427 ))))
14428 }
14429 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14430 Function::new("ARRAY_CONCAT".to_string(), f.args),
14431 ))),
14432 _ => Ok(Expression::Function(f)),
14433 },
14434 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
14435 "HAS" if f.args.len() == 2 => match target {
14436 DialectType::Spark
14437 | DialectType::Databricks
14438 | DialectType::Hive => Ok(Expression::Function(Box::new(
14439 Function::new("ARRAY_CONTAINS".to_string(), f.args),
14440 ))),
14441 DialectType::Presto | DialectType::Trino => {
14442 Ok(Expression::Function(Box::new(Function::new(
14443 "CONTAINS".to_string(),
14444 f.args,
14445 ))))
14446 }
14447 _ => Ok(Expression::Function(f)),
14448 },
14449 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
14450 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
14451 Function::new("COALESCE".to_string(), f.args),
14452 ))),
14453 // ISNULL(x) in MySQL -> (x IS NULL)
14454 "ISNULL"
14455 if f.args.len() == 1
14456 && matches!(source, DialectType::MySQL)
14457 && matches!(target, DialectType::MySQL) =>
14458 {
14459 let arg = f.args.into_iter().next().unwrap();
14460 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
14461 this: Expression::IsNull(Box::new(
14462 crate::expressions::IsNull {
14463 this: arg,
14464 not: false,
14465 postfix_form: false,
14466 },
14467 )),
14468 trailing_comments: Vec::new(),
14469 })))
14470 }
14471 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
14472 "MONTHNAME"
14473 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
14474 {
14475 let arg = f.args.into_iter().next().unwrap();
14476 Ok(Expression::Function(Box::new(Function::new(
14477 "DATE_FORMAT".to_string(),
14478 vec![arg, Expression::string("%M")],
14479 ))))
14480 }
14481 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
14482 "SPLITBYSTRING" if f.args.len() == 2 => {
14483 let sep = f.args[0].clone();
14484 let str_arg = f.args[1].clone();
14485 match target {
14486 DialectType::DuckDB => Ok(Expression::Function(Box::new(
14487 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
14488 ))),
14489 DialectType::Doris => {
14490 Ok(Expression::Function(Box::new(Function::new(
14491 "SPLIT_BY_STRING".to_string(),
14492 vec![str_arg, sep],
14493 ))))
14494 }
14495 DialectType::Hive
14496 | DialectType::Spark
14497 | DialectType::Databricks => {
14498 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
14499 let escaped =
14500 Expression::Function(Box::new(Function::new(
14501 "CONCAT".to_string(),
14502 vec![
14503 Expression::string("\\Q"),
14504 sep,
14505 Expression::string("\\E"),
14506 ],
14507 )));
14508 Ok(Expression::Function(Box::new(Function::new(
14509 "SPLIT".to_string(),
14510 vec![str_arg, escaped],
14511 ))))
14512 }
14513 _ => Ok(Expression::Function(f)),
14514 }
14515 }
14516 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
14517 "SPLITBYREGEXP" if f.args.len() == 2 => {
14518 let sep = f.args[0].clone();
14519 let str_arg = f.args[1].clone();
14520 match target {
14521 DialectType::DuckDB => {
14522 Ok(Expression::Function(Box::new(Function::new(
14523 "STR_SPLIT_REGEX".to_string(),
14524 vec![str_arg, sep],
14525 ))))
14526 }
14527 DialectType::Hive
14528 | DialectType::Spark
14529 | DialectType::Databricks => {
14530 Ok(Expression::Function(Box::new(Function::new(
14531 "SPLIT".to_string(),
14532 vec![str_arg, sep],
14533 ))))
14534 }
14535 _ => Ok(Expression::Function(f)),
14536 }
14537 }
14538 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
14539 "TOMONDAY" => {
14540 if f.args.len() == 1 {
14541 let arg = f.args.into_iter().next().unwrap();
14542 match target {
14543 DialectType::Doris => {
14544 Ok(Expression::Function(Box::new(Function::new(
14545 "DATE_TRUNC".to_string(),
14546 vec![arg, Expression::string("WEEK")],
14547 ))))
14548 }
14549 _ => Ok(Expression::Function(Box::new(Function::new(
14550 "DATE_TRUNC".to_string(),
14551 vec![Expression::string("WEEK"), arg],
14552 )))),
14553 }
14554 } else {
14555 Ok(Expression::Function(f))
14556 }
14557 }
14558 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
14559 "COLLECT_LIST" if f.args.len() == 1 => match target {
14560 DialectType::Spark
14561 | DialectType::Databricks
14562 | DialectType::Hive => Ok(Expression::Function(f)),
14563 _ => Ok(Expression::Function(Box::new(Function::new(
14564 "ARRAY_AGG".to_string(),
14565 f.args,
14566 )))),
14567 },
14568 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
14569 "TO_CHAR"
14570 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
14571 {
14572 let arg = f.args.into_iter().next().unwrap();
14573 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
14574 this: arg,
14575 to: DataType::Custom {
14576 name: "STRING".to_string(),
14577 },
14578 double_colon_syntax: false,
14579 trailing_comments: Vec::new(),
14580 format: None,
14581 default: None,
14582 inferred_type: None,
14583 })))
14584 }
14585 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
14586 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
14587 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14588 Function::new("RANDOM".to_string(), vec![]),
14589 ))),
14590 _ => Ok(Expression::Function(f)),
14591 },
14592 // ClickHouse formatDateTime -> target-specific
14593 "FORMATDATETIME" if f.args.len() >= 2 => match target {
14594 DialectType::MySQL => Ok(Expression::Function(Box::new(
14595 Function::new("DATE_FORMAT".to_string(), f.args),
14596 ))),
14597 _ => Ok(Expression::Function(f)),
14598 },
14599 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
14600 "REPLICATE" if f.args.len() == 2 => match target {
14601 DialectType::TSQL => Ok(Expression::Function(f)),
14602 _ => Ok(Expression::Function(Box::new(Function::new(
14603 "REPEAT".to_string(),
14604 f.args,
14605 )))),
14606 },
14607 // LEN(x) -> LENGTH(x) for non-TSQL targets
14608 // No CAST needed when arg is already a string literal
14609 "LEN" if f.args.len() == 1 => {
14610 match target {
14611 DialectType::TSQL => Ok(Expression::Function(f)),
14612 DialectType::Spark | DialectType::Databricks => {
14613 let arg = f.args.into_iter().next().unwrap();
14614 // Don't wrap string literals with CAST - they're already strings
14615 let is_string = matches!(
14616 &arg,
14617 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
14618 );
14619 let final_arg = if is_string {
14620 arg
14621 } else {
14622 Expression::Cast(Box::new(Cast {
14623 this: arg,
14624 to: DataType::VarChar {
14625 length: None,
14626 parenthesized_length: false,
14627 },
14628 double_colon_syntax: false,
14629 trailing_comments: Vec::new(),
14630 format: None,
14631 default: None,
14632 inferred_type: None,
14633 }))
14634 };
14635 Ok(Expression::Function(Box::new(Function::new(
14636 "LENGTH".to_string(),
14637 vec![final_arg],
14638 ))))
14639 }
14640 _ => {
14641 let arg = f.args.into_iter().next().unwrap();
14642 Ok(Expression::Function(Box::new(Function::new(
14643 "LENGTH".to_string(),
14644 vec![arg],
14645 ))))
14646 }
14647 }
14648 }
14649 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
14650 "COUNT_BIG" if f.args.len() == 1 => match target {
14651 DialectType::TSQL => Ok(Expression::Function(f)),
14652 _ => Ok(Expression::Function(Box::new(Function::new(
14653 "COUNT".to_string(),
14654 f.args,
14655 )))),
14656 },
14657 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
14658 "DATEFROMPARTS" if f.args.len() == 3 => match target {
14659 DialectType::TSQL => Ok(Expression::Function(f)),
14660 _ => Ok(Expression::Function(Box::new(Function::new(
14661 "MAKE_DATE".to_string(),
14662 f.args,
14663 )))),
14664 },
14665 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
14666 "REGEXP_LIKE" if f.args.len() >= 2 => {
14667 let str_expr = f.args[0].clone();
14668 let pattern = f.args[1].clone();
14669 let flags = if f.args.len() >= 3 {
14670 Some(f.args[2].clone())
14671 } else {
14672 None
14673 };
14674 match target {
14675 DialectType::DuckDB => {
14676 let mut new_args = vec![str_expr, pattern];
14677 if let Some(fl) = flags {
14678 new_args.push(fl);
14679 }
14680 Ok(Expression::Function(Box::new(Function::new(
14681 "REGEXP_MATCHES".to_string(),
14682 new_args,
14683 ))))
14684 }
14685 _ => Ok(Expression::RegexpLike(Box::new(
14686 crate::expressions::RegexpFunc {
14687 this: str_expr,
14688 pattern,
14689 flags,
14690 },
14691 ))),
14692 }
14693 }
14694 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
14695 "ARRAYJOIN" if f.args.len() == 1 => match target {
14696 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14697 Function::new("UNNEST".to_string(), f.args),
14698 ))),
14699 _ => Ok(Expression::Function(f)),
14700 },
14701 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
14702 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
14703 match target {
14704 DialectType::TSQL => Ok(Expression::Function(f)),
14705 DialectType::DuckDB => {
14706 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
14707 let mut args = f.args;
14708 let ms = args.pop().unwrap();
14709 let s = args.pop().unwrap();
14710 // s + (ms / 1000.0)
14711 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
14712 ms,
14713 Expression::Literal(Box::new(
14714 crate::expressions::Literal::Number(
14715 "1000.0".to_string(),
14716 ),
14717 )),
14718 )));
14719 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
14720 s,
14721 Expression::Paren(Box::new(Paren {
14722 this: ms_frac,
14723 trailing_comments: vec![],
14724 })),
14725 )));
14726 args.push(s_with_ms);
14727 Ok(Expression::Function(Box::new(Function::new(
14728 "MAKE_TIMESTAMP".to_string(),
14729 args,
14730 ))))
14731 }
14732 DialectType::Snowflake => {
14733 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
14734 let mut args = f.args;
14735 let ms = args.pop().unwrap();
14736 // ms * 1000000
14737 let ns = Expression::Mul(Box::new(BinaryOp::new(
14738 ms,
14739 Expression::number(1000000),
14740 )));
14741 args.push(ns);
14742 Ok(Expression::Function(Box::new(Function::new(
14743 "TIMESTAMP_FROM_PARTS".to_string(),
14744 args,
14745 ))))
14746 }
14747 _ => {
14748 // Default: keep function name for other targets
14749 Ok(Expression::Function(Box::new(Function::new(
14750 "DATETIMEFROMPARTS".to_string(),
14751 f.args,
14752 ))))
14753 }
14754 }
14755 }
14756 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
14757 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
14758 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
14759 let is_try = name == "TRY_CONVERT";
14760 let type_expr = f.args[0].clone();
14761 let value_expr = f.args[1].clone();
14762 let style = if f.args.len() >= 3 {
14763 Some(&f.args[2])
14764 } else {
14765 None
14766 };
14767
14768 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
14769 if matches!(target, DialectType::TSQL) {
14770 let normalized_type = match &type_expr {
14771 Expression::DataType(dt) => {
14772 let new_dt = match dt {
14773 DataType::Int { .. } => DataType::Custom {
14774 name: "INTEGER".to_string(),
14775 },
14776 _ => dt.clone(),
14777 };
14778 Expression::DataType(new_dt)
14779 }
14780 Expression::Identifier(id) => {
14781 if id.name.eq_ignore_ascii_case("INT") {
14782 Expression::Identifier(
14783 crate::expressions::Identifier::new("INTEGER"),
14784 )
14785 } else {
14786 let upper = id.name.to_ascii_uppercase();
14787 Expression::Identifier(
14788 crate::expressions::Identifier::new(upper),
14789 )
14790 }
14791 }
14792 Expression::Column(col) => {
14793 if col.name.name.eq_ignore_ascii_case("INT") {
14794 Expression::Identifier(
14795 crate::expressions::Identifier::new("INTEGER"),
14796 )
14797 } else {
14798 let upper = col.name.name.to_ascii_uppercase();
14799 Expression::Identifier(
14800 crate::expressions::Identifier::new(upper),
14801 )
14802 }
14803 }
14804 _ => type_expr.clone(),
14805 };
14806 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
14807 let mut new_args = vec![normalized_type, value_expr];
14808 if let Some(s) = style {
14809 new_args.push(s.clone());
14810 }
14811 return Ok(Expression::Function(Box::new(Function::new(
14812 func_name.to_string(),
14813 new_args,
14814 ))));
14815 }
14816
14817 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
14818 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
14819 match e {
14820 Expression::DataType(dt) => {
14821 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
14822 match dt {
14823 DataType::Custom { name }
14824 if name.starts_with("NVARCHAR(")
14825 || name.starts_with("NCHAR(") =>
14826 {
14827 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
14828 let inner = &name[name.find('(').unwrap() + 1
14829 ..name.len() - 1];
14830 if inner.eq_ignore_ascii_case("MAX") {
14831 Some(DataType::Text)
14832 } else if let Ok(len) = inner.parse::<u32>() {
14833 if name.starts_with("NCHAR") {
14834 Some(DataType::Char {
14835 length: Some(len),
14836 })
14837 } else {
14838 Some(DataType::VarChar {
14839 length: Some(len),
14840 parenthesized_length: false,
14841 })
14842 }
14843 } else {
14844 Some(dt.clone())
14845 }
14846 }
14847 DataType::Custom { name } if name == "NVARCHAR" => {
14848 Some(DataType::VarChar {
14849 length: None,
14850 parenthesized_length: false,
14851 })
14852 }
14853 DataType::Custom { name } if name == "NCHAR" => {
14854 Some(DataType::Char { length: None })
14855 }
14856 DataType::Custom { name }
14857 if name == "NVARCHAR(MAX)"
14858 || name == "VARCHAR(MAX)" =>
14859 {
14860 Some(DataType::Text)
14861 }
14862 _ => Some(dt.clone()),
14863 }
14864 }
14865 Expression::Identifier(id) => {
14866 let name = id.name.to_ascii_uppercase();
14867 match name.as_str() {
14868 "INT" | "INTEGER" => Some(DataType::Int {
14869 length: None,
14870 integer_spelling: false,
14871 }),
14872 "BIGINT" => Some(DataType::BigInt { length: None }),
14873 "SMALLINT" => {
14874 Some(DataType::SmallInt { length: None })
14875 }
14876 "TINYINT" => {
14877 Some(DataType::TinyInt { length: None })
14878 }
14879 "FLOAT" => Some(DataType::Float {
14880 precision: None,
14881 scale: None,
14882 real_spelling: false,
14883 }),
14884 "REAL" => Some(DataType::Float {
14885 precision: None,
14886 scale: None,
14887 real_spelling: true,
14888 }),
14889 "DATETIME" | "DATETIME2" => {
14890 Some(DataType::Timestamp {
14891 timezone: false,
14892 precision: None,
14893 })
14894 }
14895 "DATE" => Some(DataType::Date),
14896 "BIT" => Some(DataType::Boolean),
14897 "TEXT" => Some(DataType::Text),
14898 "NUMERIC" => Some(DataType::Decimal {
14899 precision: None,
14900 scale: None,
14901 }),
14902 "MONEY" => Some(DataType::Decimal {
14903 precision: Some(15),
14904 scale: Some(4),
14905 }),
14906 "SMALLMONEY" => Some(DataType::Decimal {
14907 precision: Some(6),
14908 scale: Some(4),
14909 }),
14910 "VARCHAR" => Some(DataType::VarChar {
14911 length: None,
14912 parenthesized_length: false,
14913 }),
14914 "NVARCHAR" => Some(DataType::VarChar {
14915 length: None,
14916 parenthesized_length: false,
14917 }),
14918 "CHAR" => Some(DataType::Char { length: None }),
14919 "NCHAR" => Some(DataType::Char { length: None }),
14920 _ => Some(DataType::Custom { name }),
14921 }
14922 }
14923 Expression::Column(col) => {
14924 let name = col.name.name.to_ascii_uppercase();
14925 match name.as_str() {
14926 "INT" | "INTEGER" => Some(DataType::Int {
14927 length: None,
14928 integer_spelling: false,
14929 }),
14930 "BIGINT" => Some(DataType::BigInt { length: None }),
14931 "FLOAT" => Some(DataType::Float {
14932 precision: None,
14933 scale: None,
14934 real_spelling: false,
14935 }),
14936 "DATETIME" | "DATETIME2" => {
14937 Some(DataType::Timestamp {
14938 timezone: false,
14939 precision: None,
14940 })
14941 }
14942 "DATE" => Some(DataType::Date),
14943 "NUMERIC" => Some(DataType::Decimal {
14944 precision: None,
14945 scale: None,
14946 }),
14947 "VARCHAR" => Some(DataType::VarChar {
14948 length: None,
14949 parenthesized_length: false,
14950 }),
14951 "NVARCHAR" => Some(DataType::VarChar {
14952 length: None,
14953 parenthesized_length: false,
14954 }),
14955 "CHAR" => Some(DataType::Char { length: None }),
14956 "NCHAR" => Some(DataType::Char { length: None }),
14957 _ => Some(DataType::Custom { name }),
14958 }
14959 }
14960 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
14961 Expression::Function(f) => {
14962 let fname = f.name.to_ascii_uppercase();
14963 match fname.as_str() {
14964 "VARCHAR" | "NVARCHAR" => {
14965 let len = f.args.first().and_then(|a| {
14966 if let Expression::Literal(lit) = a
14967 {
14968 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14969 n.parse::<u32>().ok()
14970 } else { None }
14971 } else if let Expression::Identifier(id) = a
14972 {
14973 if id.name.eq_ignore_ascii_case("MAX") {
14974 None
14975 } else {
14976 None
14977 }
14978 } else {
14979 None
14980 }
14981 });
14982 // Check for VARCHAR(MAX) -> TEXT
14983 let is_max = f.args.first().map_or(false, |a| {
14984 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
14985 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
14986 });
14987 if is_max {
14988 Some(DataType::Text)
14989 } else {
14990 Some(DataType::VarChar {
14991 length: len,
14992 parenthesized_length: false,
14993 })
14994 }
14995 }
14996 "NCHAR" | "CHAR" => {
14997 let len = f.args.first().and_then(|a| {
14998 if let Expression::Literal(lit) = a
14999 {
15000 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
15001 n.parse::<u32>().ok()
15002 } else { None }
15003 } else {
15004 None
15005 }
15006 });
15007 Some(DataType::Char { length: len })
15008 }
15009 "NUMERIC" | "DECIMAL" => {
15010 let precision = f.args.first().and_then(|a| {
15011 if let Expression::Literal(lit) = a
15012 {
15013 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
15014 n.parse::<u32>().ok()
15015 } else { None }
15016 } else {
15017 None
15018 }
15019 });
15020 let scale = f.args.get(1).and_then(|a| {
15021 if let Expression::Literal(lit) = a
15022 {
15023 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
15024 n.parse::<u32>().ok()
15025 } else { None }
15026 } else {
15027 None
15028 }
15029 });
15030 Some(DataType::Decimal { precision, scale })
15031 }
15032 _ => None,
15033 }
15034 }
15035 _ => None,
15036 }
15037 }
15038
15039 if let Some(mut dt) = expr_to_datatype(&type_expr) {
15040 // For TSQL source: VARCHAR/CHAR without length defaults to 30
15041 let is_tsql_source =
15042 matches!(source, DialectType::TSQL | DialectType::Fabric);
15043 if is_tsql_source {
15044 match &dt {
15045 DataType::VarChar { length: None, .. } => {
15046 dt = DataType::VarChar {
15047 length: Some(30),
15048 parenthesized_length: false,
15049 };
15050 }
15051 DataType::Char { length: None } => {
15052 dt = DataType::Char { length: Some(30) };
15053 }
15054 _ => {}
15055 }
15056 }
15057
15058 // Determine if this is a string type
15059 let is_string_type = matches!(
15060 dt,
15061 DataType::VarChar { .. }
15062 | DataType::Char { .. }
15063 | DataType::Text
15064 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
15065 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
15066 || name.starts_with("VARCHAR(") || name == "VARCHAR"
15067 || name == "STRING");
15068
15069 // Determine if this is a date/time type
15070 let is_datetime_type = matches!(
15071 dt,
15072 DataType::Timestamp { .. } | DataType::Date
15073 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
15074 || name == "DATETIME2" || name == "SMALLDATETIME");
15075
15076 // Check for date conversion with style
15077 if style.is_some() {
15078 let style_num = style.and_then(|s| {
15079 if let Expression::Literal(lit) = s {
15080 if let crate::expressions::Literal::Number(n) =
15081 lit.as_ref()
15082 {
15083 n.parse::<u32>().ok()
15084 } else {
15085 None
15086 }
15087 } else {
15088 None
15089 }
15090 });
15091
15092 // TSQL CONVERT date styles (Java format)
15093 let format_str = style_num.and_then(|n| match n {
15094 101 => Some("MM/dd/yyyy"),
15095 102 => Some("yyyy.MM.dd"),
15096 103 => Some("dd/MM/yyyy"),
15097 104 => Some("dd.MM.yyyy"),
15098 105 => Some("dd-MM-yyyy"),
15099 108 => Some("HH:mm:ss"),
15100 110 => Some("MM-dd-yyyy"),
15101 112 => Some("yyyyMMdd"),
15102 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
15103 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
15104 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
15105 _ => None,
15106 });
15107
15108 // Non-string, non-datetime types with style: just CAST, ignore the style
15109 if !is_string_type && !is_datetime_type {
15110 let cast_expr = if is_try {
15111 Expression::TryCast(Box::new(
15112 crate::expressions::Cast {
15113 this: value_expr,
15114 to: dt,
15115 trailing_comments: Vec::new(),
15116 double_colon_syntax: false,
15117 format: None,
15118 default: None,
15119 inferred_type: None,
15120 },
15121 ))
15122 } else {
15123 Expression::Cast(Box::new(
15124 crate::expressions::Cast {
15125 this: value_expr,
15126 to: dt,
15127 trailing_comments: Vec::new(),
15128 double_colon_syntax: false,
15129 format: None,
15130 default: None,
15131 inferred_type: None,
15132 },
15133 ))
15134 };
15135 return Ok(cast_expr);
15136 }
15137
15138 if let Some(java_fmt) = format_str {
15139 let c_fmt = java_fmt
15140 .replace("yyyy", "%Y")
15141 .replace("MM", "%m")
15142 .replace("dd", "%d")
15143 .replace("HH", "%H")
15144 .replace("mm", "%M")
15145 .replace("ss", "%S")
15146 .replace("SSSSSS", "%f")
15147 .replace("SSS", "%f")
15148 .replace("'T'", "T");
15149
15150 // For datetime target types: style is the INPUT format for parsing strings -> dates
15151 if is_datetime_type {
15152 match target {
15153 DialectType::DuckDB => {
15154 return Ok(Expression::Function(Box::new(
15155 Function::new(
15156 "STRPTIME".to_string(),
15157 vec![
15158 value_expr,
15159 Expression::string(&c_fmt),
15160 ],
15161 ),
15162 )));
15163 }
15164 DialectType::Spark
15165 | DialectType::Databricks => {
15166 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
15167 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
15168 let func_name =
15169 if matches!(dt, DataType::Date) {
15170 "TO_DATE"
15171 } else {
15172 "TO_TIMESTAMP"
15173 };
15174 return Ok(Expression::Function(Box::new(
15175 Function::new(
15176 func_name.to_string(),
15177 vec![
15178 value_expr,
15179 Expression::string(java_fmt),
15180 ],
15181 ),
15182 )));
15183 }
15184 DialectType::Hive => {
15185 return Ok(Expression::Function(Box::new(
15186 Function::new(
15187 "TO_TIMESTAMP".to_string(),
15188 vec![
15189 value_expr,
15190 Expression::string(java_fmt),
15191 ],
15192 ),
15193 )));
15194 }
15195 _ => {
15196 return Ok(Expression::Cast(Box::new(
15197 crate::expressions::Cast {
15198 this: value_expr,
15199 to: dt,
15200 trailing_comments: Vec::new(),
15201 double_colon_syntax: false,
15202 format: None,
15203 default: None,
15204 inferred_type: None,
15205 },
15206 )));
15207 }
15208 }
15209 }
15210
15211 // For string target types: style is the OUTPUT format for dates -> strings
15212 match target {
15213 DialectType::DuckDB => Ok(Expression::Function(
15214 Box::new(Function::new(
15215 "STRPTIME".to_string(),
15216 vec![
15217 value_expr,
15218 Expression::string(&c_fmt),
15219 ],
15220 )),
15221 )),
15222 DialectType::Spark | DialectType::Databricks => {
15223 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
15224 // Determine the target string type
15225 let string_dt = match &dt {
15226 DataType::VarChar {
15227 length: Some(l),
15228 ..
15229 } => DataType::VarChar {
15230 length: Some(*l),
15231 parenthesized_length: false,
15232 },
15233 DataType::Text => DataType::Custom {
15234 name: "STRING".to_string(),
15235 },
15236 _ => DataType::Custom {
15237 name: "STRING".to_string(),
15238 },
15239 };
15240 let date_format_expr = Expression::Function(
15241 Box::new(Function::new(
15242 "DATE_FORMAT".to_string(),
15243 vec![
15244 value_expr,
15245 Expression::string(java_fmt),
15246 ],
15247 )),
15248 );
15249 let cast_expr = if is_try {
15250 Expression::TryCast(Box::new(
15251 crate::expressions::Cast {
15252 this: date_format_expr,
15253 to: string_dt,
15254 trailing_comments: Vec::new(),
15255 double_colon_syntax: false,
15256 format: None,
15257 default: None,
15258 inferred_type: None,
15259 },
15260 ))
15261 } else {
15262 Expression::Cast(Box::new(
15263 crate::expressions::Cast {
15264 this: date_format_expr,
15265 to: string_dt,
15266 trailing_comments: Vec::new(),
15267 double_colon_syntax: false,
15268 format: None,
15269 default: None,
15270 inferred_type: None,
15271 },
15272 ))
15273 };
15274 Ok(cast_expr)
15275 }
15276 DialectType::MySQL | DialectType::SingleStore => {
15277 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
15278 let mysql_fmt = java_fmt
15279 .replace("yyyy", "%Y")
15280 .replace("MM", "%m")
15281 .replace("dd", "%d")
15282 .replace("HH:mm:ss.SSSSSS", "%T")
15283 .replace("HH:mm:ss", "%T")
15284 .replace("HH", "%H")
15285 .replace("mm", "%i")
15286 .replace("ss", "%S");
15287 let date_format_expr = Expression::Function(
15288 Box::new(Function::new(
15289 "DATE_FORMAT".to_string(),
15290 vec![
15291 value_expr,
15292 Expression::string(&mysql_fmt),
15293 ],
15294 )),
15295 );
15296 // MySQL uses CHAR for string casts
15297 let mysql_dt = match &dt {
15298 DataType::VarChar { length, .. } => {
15299 DataType::Char { length: *length }
15300 }
15301 _ => dt,
15302 };
15303 Ok(Expression::Cast(Box::new(
15304 crate::expressions::Cast {
15305 this: date_format_expr,
15306 to: mysql_dt,
15307 trailing_comments: Vec::new(),
15308 double_colon_syntax: false,
15309 format: None,
15310 default: None,
15311 inferred_type: None,
15312 },
15313 )))
15314 }
15315 DialectType::Hive => {
15316 let func_name = "TO_TIMESTAMP";
15317 Ok(Expression::Function(Box::new(
15318 Function::new(
15319 func_name.to_string(),
15320 vec![
15321 value_expr,
15322 Expression::string(java_fmt),
15323 ],
15324 ),
15325 )))
15326 }
15327 _ => Ok(Expression::Cast(Box::new(
15328 crate::expressions::Cast {
15329 this: value_expr,
15330 to: dt,
15331 trailing_comments: Vec::new(),
15332 double_colon_syntax: false,
15333 format: None,
15334 default: None,
15335 inferred_type: None,
15336 },
15337 ))),
15338 }
15339 } else {
15340 // Unknown style, just CAST
15341 let cast_expr = if is_try {
15342 Expression::TryCast(Box::new(
15343 crate::expressions::Cast {
15344 this: value_expr,
15345 to: dt,
15346 trailing_comments: Vec::new(),
15347 double_colon_syntax: false,
15348 format: None,
15349 default: None,
15350 inferred_type: None,
15351 },
15352 ))
15353 } else {
15354 Expression::Cast(Box::new(
15355 crate::expressions::Cast {
15356 this: value_expr,
15357 to: dt,
15358 trailing_comments: Vec::new(),
15359 double_colon_syntax: false,
15360 format: None,
15361 default: None,
15362 inferred_type: None,
15363 },
15364 ))
15365 };
15366 Ok(cast_expr)
15367 }
15368 } else {
15369 // No style - simple CAST
15370 let final_dt = if matches!(
15371 target,
15372 DialectType::MySQL | DialectType::SingleStore
15373 ) {
15374 match &dt {
15375 DataType::Int { .. }
15376 | DataType::BigInt { .. }
15377 | DataType::SmallInt { .. }
15378 | DataType::TinyInt { .. } => DataType::Custom {
15379 name: "SIGNED".to_string(),
15380 },
15381 DataType::VarChar { length, .. } => {
15382 DataType::Char { length: *length }
15383 }
15384 _ => dt,
15385 }
15386 } else {
15387 dt
15388 };
15389 let cast_expr = if is_try {
15390 Expression::TryCast(Box::new(
15391 crate::expressions::Cast {
15392 this: value_expr,
15393 to: final_dt,
15394 trailing_comments: Vec::new(),
15395 double_colon_syntax: false,
15396 format: None,
15397 default: None,
15398 inferred_type: None,
15399 },
15400 ))
15401 } else {
15402 Expression::Cast(Box::new(crate::expressions::Cast {
15403 this: value_expr,
15404 to: final_dt,
15405 trailing_comments: Vec::new(),
15406 double_colon_syntax: false,
15407 format: None,
15408 default: None,
15409 inferred_type: None,
15410 }))
15411 };
15412 Ok(cast_expr)
15413 }
15414 } else {
15415 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
15416 Ok(Expression::Function(f))
15417 }
15418 }
15419 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
15420 "STRFTIME" if f.args.len() == 2 => {
15421 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
15422 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
15423 // SQLite: args[0] = format, args[1] = value
15424 (f.args[1].clone(), &f.args[0])
15425 } else {
15426 // DuckDB and others: args[0] = value, args[1] = format
15427 (f.args[0].clone(), &f.args[1])
15428 };
15429
15430 // Helper to convert C-style format to Java-style
15431 fn c_to_java_format(fmt: &str) -> String {
15432 fmt.replace("%Y", "yyyy")
15433 .replace("%m", "MM")
15434 .replace("%d", "dd")
15435 .replace("%H", "HH")
15436 .replace("%M", "mm")
15437 .replace("%S", "ss")
15438 .replace("%f", "SSSSSS")
15439 .replace("%y", "yy")
15440 .replace("%-m", "M")
15441 .replace("%-d", "d")
15442 .replace("%-H", "H")
15443 .replace("%-I", "h")
15444 .replace("%I", "hh")
15445 .replace("%p", "a")
15446 .replace("%j", "DDD")
15447 .replace("%a", "EEE")
15448 .replace("%b", "MMM")
15449 .replace("%F", "yyyy-MM-dd")
15450 .replace("%T", "HH:mm:ss")
15451 }
15452
15453 // Helper: recursively convert format strings within expressions (handles CONCAT)
15454 fn convert_fmt_expr(
15455 expr: &Expression,
15456 converter: &dyn Fn(&str) -> String,
15457 ) -> Expression {
15458 match expr {
15459 Expression::Literal(lit)
15460 if matches!(
15461 lit.as_ref(),
15462 crate::expressions::Literal::String(_)
15463 ) =>
15464 {
15465 let crate::expressions::Literal::String(s) =
15466 lit.as_ref()
15467 else {
15468 unreachable!()
15469 };
15470 Expression::string(&converter(s))
15471 }
15472 Expression::Function(func)
15473 if func.name.eq_ignore_ascii_case("CONCAT") =>
15474 {
15475 let new_args: Vec<Expression> = func
15476 .args
15477 .iter()
15478 .map(|a| convert_fmt_expr(a, converter))
15479 .collect();
15480 Expression::Function(Box::new(Function::new(
15481 "CONCAT".to_string(),
15482 new_args,
15483 )))
15484 }
15485 other => other.clone(),
15486 }
15487 }
15488
15489 match target {
15490 DialectType::DuckDB => {
15491 if matches!(source, DialectType::SQLite) {
15492 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
15493 let cast_val = Expression::Cast(Box::new(Cast {
15494 this: val,
15495 to: crate::expressions::DataType::Timestamp {
15496 precision: None,
15497 timezone: false,
15498 },
15499 trailing_comments: Vec::new(),
15500 double_colon_syntax: false,
15501 format: None,
15502 default: None,
15503 inferred_type: None,
15504 }));
15505 Ok(Expression::Function(Box::new(Function::new(
15506 "STRFTIME".to_string(),
15507 vec![cast_val, fmt_expr.clone()],
15508 ))))
15509 } else {
15510 Ok(Expression::Function(f))
15511 }
15512 }
15513 DialectType::Spark
15514 | DialectType::Databricks
15515 | DialectType::Hive => {
15516 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
15517 let converted_fmt =
15518 convert_fmt_expr(fmt_expr, &c_to_java_format);
15519 Ok(Expression::Function(Box::new(Function::new(
15520 "DATE_FORMAT".to_string(),
15521 vec![val, converted_fmt],
15522 ))))
15523 }
15524 DialectType::TSQL | DialectType::Fabric => {
15525 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
15526 let converted_fmt =
15527 convert_fmt_expr(fmt_expr, &c_to_java_format);
15528 Ok(Expression::Function(Box::new(Function::new(
15529 "FORMAT".to_string(),
15530 vec![val, converted_fmt],
15531 ))))
15532 }
15533 DialectType::Presto
15534 | DialectType::Trino
15535 | DialectType::Athena => {
15536 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
15537 if let Expression::Literal(lit) = fmt_expr {
15538 if let crate::expressions::Literal::String(s) =
15539 lit.as_ref()
15540 {
15541 let presto_fmt = duckdb_to_presto_format(s);
15542 Ok(Expression::Function(Box::new(Function::new(
15543 "DATE_FORMAT".to_string(),
15544 vec![val, Expression::string(&presto_fmt)],
15545 ))))
15546 } else {
15547 Ok(Expression::Function(Box::new(Function::new(
15548 "DATE_FORMAT".to_string(),
15549 vec![val, fmt_expr.clone()],
15550 ))))
15551 }
15552 } else {
15553 Ok(Expression::Function(Box::new(Function::new(
15554 "DATE_FORMAT".to_string(),
15555 vec![val, fmt_expr.clone()],
15556 ))))
15557 }
15558 }
15559 DialectType::BigQuery => {
15560 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
15561 if let Expression::Literal(lit) = fmt_expr {
15562 if let crate::expressions::Literal::String(s) =
15563 lit.as_ref()
15564 {
15565 let bq_fmt = duckdb_to_bigquery_format(s);
15566 Ok(Expression::Function(Box::new(Function::new(
15567 "FORMAT_DATE".to_string(),
15568 vec![Expression::string(&bq_fmt), val],
15569 ))))
15570 } else {
15571 Ok(Expression::Function(Box::new(Function::new(
15572 "FORMAT_DATE".to_string(),
15573 vec![fmt_expr.clone(), val],
15574 ))))
15575 }
15576 } else {
15577 Ok(Expression::Function(Box::new(Function::new(
15578 "FORMAT_DATE".to_string(),
15579 vec![fmt_expr.clone(), val],
15580 ))))
15581 }
15582 }
15583 DialectType::PostgreSQL | DialectType::Redshift => {
15584 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
15585 if let Expression::Literal(lit) = fmt_expr {
15586 if let crate::expressions::Literal::String(s) =
15587 lit.as_ref()
15588 {
15589 let pg_fmt = s
15590 .replace("%Y", "YYYY")
15591 .replace("%m", "MM")
15592 .replace("%d", "DD")
15593 .replace("%H", "HH24")
15594 .replace("%M", "MI")
15595 .replace("%S", "SS")
15596 .replace("%y", "YY")
15597 .replace("%-m", "FMMM")
15598 .replace("%-d", "FMDD")
15599 .replace("%-H", "FMHH24")
15600 .replace("%-I", "FMHH12")
15601 .replace("%p", "AM")
15602 .replace("%F", "YYYY-MM-DD")
15603 .replace("%T", "HH24:MI:SS");
15604 Ok(Expression::Function(Box::new(Function::new(
15605 "TO_CHAR".to_string(),
15606 vec![val, Expression::string(&pg_fmt)],
15607 ))))
15608 } else {
15609 Ok(Expression::Function(Box::new(Function::new(
15610 "TO_CHAR".to_string(),
15611 vec![val, fmt_expr.clone()],
15612 ))))
15613 }
15614 } else {
15615 Ok(Expression::Function(Box::new(Function::new(
15616 "TO_CHAR".to_string(),
15617 vec![val, fmt_expr.clone()],
15618 ))))
15619 }
15620 }
15621 _ => Ok(Expression::Function(f)),
15622 }
15623 }
15624 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
15625 "STRPTIME" if f.args.len() == 2 => {
15626 let val = f.args[0].clone();
15627 let fmt_expr = &f.args[1];
15628
15629 fn c_to_java_format_parse(fmt: &str) -> String {
15630 fmt.replace("%Y", "yyyy")
15631 .replace("%m", "MM")
15632 .replace("%d", "dd")
15633 .replace("%H", "HH")
15634 .replace("%M", "mm")
15635 .replace("%S", "ss")
15636 .replace("%f", "SSSSSS")
15637 .replace("%y", "yy")
15638 .replace("%-m", "M")
15639 .replace("%-d", "d")
15640 .replace("%-H", "H")
15641 .replace("%-I", "h")
15642 .replace("%I", "hh")
15643 .replace("%p", "a")
15644 .replace("%F", "yyyy-MM-dd")
15645 .replace("%T", "HH:mm:ss")
15646 }
15647
15648 match target {
15649 DialectType::DuckDB => Ok(Expression::Function(f)),
15650 DialectType::Spark | DialectType::Databricks => {
15651 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
15652 if let Expression::Literal(lit) = fmt_expr {
15653 if let crate::expressions::Literal::String(s) =
15654 lit.as_ref()
15655 {
15656 let java_fmt = c_to_java_format_parse(s);
15657 Ok(Expression::Function(Box::new(Function::new(
15658 "TO_TIMESTAMP".to_string(),
15659 vec![val, Expression::string(&java_fmt)],
15660 ))))
15661 } else {
15662 Ok(Expression::Function(Box::new(Function::new(
15663 "TO_TIMESTAMP".to_string(),
15664 vec![val, fmt_expr.clone()],
15665 ))))
15666 }
15667 } else {
15668 Ok(Expression::Function(Box::new(Function::new(
15669 "TO_TIMESTAMP".to_string(),
15670 vec![val, fmt_expr.clone()],
15671 ))))
15672 }
15673 }
15674 DialectType::Hive => {
15675 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
15676 if let Expression::Literal(lit) = fmt_expr {
15677 if let crate::expressions::Literal::String(s) =
15678 lit.as_ref()
15679 {
15680 let java_fmt = c_to_java_format_parse(s);
15681 let unix_ts =
15682 Expression::Function(Box::new(Function::new(
15683 "UNIX_TIMESTAMP".to_string(),
15684 vec![val, Expression::string(&java_fmt)],
15685 )));
15686 let from_unix =
15687 Expression::Function(Box::new(Function::new(
15688 "FROM_UNIXTIME".to_string(),
15689 vec![unix_ts],
15690 )));
15691 Ok(Expression::Cast(Box::new(
15692 crate::expressions::Cast {
15693 this: from_unix,
15694 to: DataType::Timestamp {
15695 timezone: false,
15696 precision: None,
15697 },
15698 trailing_comments: Vec::new(),
15699 double_colon_syntax: false,
15700 format: None,
15701 default: None,
15702 inferred_type: None,
15703 },
15704 )))
15705 } else {
15706 Ok(Expression::Function(f))
15707 }
15708 } else {
15709 Ok(Expression::Function(f))
15710 }
15711 }
15712 DialectType::Presto
15713 | DialectType::Trino
15714 | DialectType::Athena => {
15715 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
15716 if let Expression::Literal(lit) = fmt_expr {
15717 if let crate::expressions::Literal::String(s) =
15718 lit.as_ref()
15719 {
15720 let presto_fmt = duckdb_to_presto_format(s);
15721 Ok(Expression::Function(Box::new(Function::new(
15722 "DATE_PARSE".to_string(),
15723 vec![val, Expression::string(&presto_fmt)],
15724 ))))
15725 } else {
15726 Ok(Expression::Function(Box::new(Function::new(
15727 "DATE_PARSE".to_string(),
15728 vec![val, fmt_expr.clone()],
15729 ))))
15730 }
15731 } else {
15732 Ok(Expression::Function(Box::new(Function::new(
15733 "DATE_PARSE".to_string(),
15734 vec![val, fmt_expr.clone()],
15735 ))))
15736 }
15737 }
15738 DialectType::BigQuery => {
15739 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
15740 if let Expression::Literal(lit) = fmt_expr {
15741 if let crate::expressions::Literal::String(s) =
15742 lit.as_ref()
15743 {
15744 let bq_fmt = duckdb_to_bigquery_format(s);
15745 Ok(Expression::Function(Box::new(Function::new(
15746 "PARSE_TIMESTAMP".to_string(),
15747 vec![Expression::string(&bq_fmt), val],
15748 ))))
15749 } else {
15750 Ok(Expression::Function(Box::new(Function::new(
15751 "PARSE_TIMESTAMP".to_string(),
15752 vec![fmt_expr.clone(), val],
15753 ))))
15754 }
15755 } else {
15756 Ok(Expression::Function(Box::new(Function::new(
15757 "PARSE_TIMESTAMP".to_string(),
15758 vec![fmt_expr.clone(), val],
15759 ))))
15760 }
15761 }
15762 _ => Ok(Expression::Function(f)),
15763 }
15764 }
15765 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
15766 "DATE_FORMAT"
15767 if f.args.len() >= 2
15768 && matches!(
15769 source,
15770 DialectType::Presto
15771 | DialectType::Trino
15772 | DialectType::Athena
15773 ) =>
15774 {
15775 let val = f.args[0].clone();
15776 let fmt_expr = &f.args[1];
15777
15778 match target {
15779 DialectType::Presto
15780 | DialectType::Trino
15781 | DialectType::Athena => {
15782 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
15783 if let Expression::Literal(lit) = fmt_expr {
15784 if let crate::expressions::Literal::String(s) =
15785 lit.as_ref()
15786 {
15787 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
15788 Ok(Expression::Function(Box::new(Function::new(
15789 "DATE_FORMAT".to_string(),
15790 vec![val, Expression::string(&normalized)],
15791 ))))
15792 } else {
15793 Ok(Expression::Function(f))
15794 }
15795 } else {
15796 Ok(Expression::Function(f))
15797 }
15798 }
15799 DialectType::Hive
15800 | DialectType::Spark
15801 | DialectType::Databricks => {
15802 // Convert Presto C-style to Java-style format
15803 if let Expression::Literal(lit) = fmt_expr {
15804 if let crate::expressions::Literal::String(s) =
15805 lit.as_ref()
15806 {
15807 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15808 Ok(Expression::Function(Box::new(Function::new(
15809 "DATE_FORMAT".to_string(),
15810 vec![val, Expression::string(&java_fmt)],
15811 ))))
15812 } else {
15813 Ok(Expression::Function(f))
15814 }
15815 } else {
15816 Ok(Expression::Function(f))
15817 }
15818 }
15819 DialectType::DuckDB => {
15820 // Convert to STRFTIME(val, duckdb_fmt)
15821 if let Expression::Literal(lit) = fmt_expr {
15822 if let crate::expressions::Literal::String(s) =
15823 lit.as_ref()
15824 {
15825 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
15826 Ok(Expression::Function(Box::new(Function::new(
15827 "STRFTIME".to_string(),
15828 vec![val, Expression::string(&duckdb_fmt)],
15829 ))))
15830 } else {
15831 Ok(Expression::Function(Box::new(Function::new(
15832 "STRFTIME".to_string(),
15833 vec![val, fmt_expr.clone()],
15834 ))))
15835 }
15836 } else {
15837 Ok(Expression::Function(Box::new(Function::new(
15838 "STRFTIME".to_string(),
15839 vec![val, fmt_expr.clone()],
15840 ))))
15841 }
15842 }
15843 DialectType::BigQuery => {
15844 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
15845 if let Expression::Literal(lit) = fmt_expr {
15846 if let crate::expressions::Literal::String(s) =
15847 lit.as_ref()
15848 {
15849 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
15850 Ok(Expression::Function(Box::new(Function::new(
15851 "FORMAT_DATE".to_string(),
15852 vec![Expression::string(&bq_fmt), val],
15853 ))))
15854 } else {
15855 Ok(Expression::Function(Box::new(Function::new(
15856 "FORMAT_DATE".to_string(),
15857 vec![fmt_expr.clone(), val],
15858 ))))
15859 }
15860 } else {
15861 Ok(Expression::Function(Box::new(Function::new(
15862 "FORMAT_DATE".to_string(),
15863 vec![fmt_expr.clone(), val],
15864 ))))
15865 }
15866 }
15867 _ => Ok(Expression::Function(f)),
15868 }
15869 }
15870 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
15871 "DATE_PARSE"
15872 if f.args.len() >= 2
15873 && matches!(
15874 source,
15875 DialectType::Presto
15876 | DialectType::Trino
15877 | DialectType::Athena
15878 ) =>
15879 {
15880 let val = f.args[0].clone();
15881 let fmt_expr = &f.args[1];
15882
15883 match target {
15884 DialectType::Presto
15885 | DialectType::Trino
15886 | DialectType::Athena => {
15887 // Presto -> Presto: normalize format
15888 if let Expression::Literal(lit) = fmt_expr {
15889 if let crate::expressions::Literal::String(s) =
15890 lit.as_ref()
15891 {
15892 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
15893 Ok(Expression::Function(Box::new(Function::new(
15894 "DATE_PARSE".to_string(),
15895 vec![val, Expression::string(&normalized)],
15896 ))))
15897 } else {
15898 Ok(Expression::Function(f))
15899 }
15900 } else {
15901 Ok(Expression::Function(f))
15902 }
15903 }
15904 DialectType::Hive => {
15905 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
15906 if let Expression::Literal(lit) = fmt_expr {
15907 if let crate::expressions::Literal::String(s) =
15908 lit.as_ref()
15909 {
15910 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
15911 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
15912 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
15913 this: val,
15914 to: DataType::Timestamp { timezone: false, precision: None },
15915 trailing_comments: Vec::new(),
15916 double_colon_syntax: false,
15917 format: None,
15918 default: None,
15919 inferred_type: None,
15920 })))
15921 } else {
15922 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15923 Ok(Expression::Function(Box::new(Function::new(
15924 "TO_TIMESTAMP".to_string(),
15925 vec![val, Expression::string(&java_fmt)],
15926 ))))
15927 }
15928 } else {
15929 Ok(Expression::Function(f))
15930 }
15931 } else {
15932 Ok(Expression::Function(f))
15933 }
15934 }
15935 DialectType::Spark | DialectType::Databricks => {
15936 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
15937 if let Expression::Literal(lit) = fmt_expr {
15938 if let crate::expressions::Literal::String(s) =
15939 lit.as_ref()
15940 {
15941 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15942 Ok(Expression::Function(Box::new(Function::new(
15943 "TO_TIMESTAMP".to_string(),
15944 vec![val, Expression::string(&java_fmt)],
15945 ))))
15946 } else {
15947 Ok(Expression::Function(f))
15948 }
15949 } else {
15950 Ok(Expression::Function(f))
15951 }
15952 }
15953 DialectType::DuckDB => {
15954 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
15955 if let Expression::Literal(lit) = fmt_expr {
15956 if let crate::expressions::Literal::String(s) =
15957 lit.as_ref()
15958 {
15959 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
15960 Ok(Expression::Function(Box::new(Function::new(
15961 "STRPTIME".to_string(),
15962 vec![val, Expression::string(&duckdb_fmt)],
15963 ))))
15964 } else {
15965 Ok(Expression::Function(Box::new(Function::new(
15966 "STRPTIME".to_string(),
15967 vec![val, fmt_expr.clone()],
15968 ))))
15969 }
15970 } else {
15971 Ok(Expression::Function(Box::new(Function::new(
15972 "STRPTIME".to_string(),
15973 vec![val, fmt_expr.clone()],
15974 ))))
15975 }
15976 }
15977 _ => Ok(Expression::Function(f)),
15978 }
15979 }
15980 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
15981 "FROM_BASE64"
15982 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
15983 {
15984 Ok(Expression::Function(Box::new(Function::new(
15985 "UNBASE64".to_string(),
15986 f.args,
15987 ))))
15988 }
15989 "TO_BASE64"
15990 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
15991 {
15992 Ok(Expression::Function(Box::new(Function::new(
15993 "BASE64".to_string(),
15994 f.args,
15995 ))))
15996 }
15997 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
15998 "FROM_UNIXTIME"
15999 if f.args.len() == 1
16000 && matches!(
16001 source,
16002 DialectType::Presto
16003 | DialectType::Trino
16004 | DialectType::Athena
16005 )
16006 && matches!(
16007 target,
16008 DialectType::Spark | DialectType::Databricks
16009 ) =>
16010 {
16011 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
16012 let from_unix = Expression::Function(Box::new(Function::new(
16013 "FROM_UNIXTIME".to_string(),
16014 f.args,
16015 )));
16016 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
16017 this: from_unix,
16018 to: DataType::Timestamp {
16019 timezone: false,
16020 precision: None,
16021 },
16022 trailing_comments: Vec::new(),
16023 double_colon_syntax: false,
16024 format: None,
16025 default: None,
16026 inferred_type: None,
16027 })))
16028 }
16029 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
16030 "DATE_FORMAT"
16031 if f.args.len() >= 2
16032 && !matches!(
16033 target,
16034 DialectType::Hive
16035 | DialectType::Spark
16036 | DialectType::Databricks
16037 | DialectType::MySQL
16038 | DialectType::SingleStore
16039 ) =>
16040 {
16041 let val = f.args[0].clone();
16042 let fmt_expr = &f.args[1];
16043 let is_hive_source = matches!(
16044 source,
16045 DialectType::Hive
16046 | DialectType::Spark
16047 | DialectType::Databricks
16048 );
16049
16050 fn java_to_c_format(fmt: &str) -> String {
16051 // Replace Java patterns with C strftime patterns.
16052 // Uses multi-pass to handle patterns that conflict.
16053 // First pass: replace multi-char patterns (longer first)
16054 let result = fmt
16055 .replace("yyyy", "%Y")
16056 .replace("SSSSSS", "%f")
16057 .replace("EEEE", "%W")
16058 .replace("MM", "%m")
16059 .replace("dd", "%d")
16060 .replace("HH", "%H")
16061 .replace("mm", "%M")
16062 .replace("ss", "%S")
16063 .replace("yy", "%y");
16064 // Second pass: handle single-char timezone patterns
16065 // z -> %Z (timezone name), Z -> %z (timezone offset)
16066 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
16067 let mut out = String::new();
16068 let chars: Vec<char> = result.chars().collect();
16069 let mut i = 0;
16070 while i < chars.len() {
16071 if chars[i] == '%' && i + 1 < chars.len() {
16072 // Already a format specifier, skip both chars
16073 out.push(chars[i]);
16074 out.push(chars[i + 1]);
16075 i += 2;
16076 } else if chars[i] == 'z' {
16077 out.push_str("%Z");
16078 i += 1;
16079 } else if chars[i] == 'Z' {
16080 out.push_str("%z");
16081 i += 1;
16082 } else {
16083 out.push(chars[i]);
16084 i += 1;
16085 }
16086 }
16087 out
16088 }
16089
16090 fn java_to_presto_format(fmt: &str) -> String {
16091 // Presto uses %T for HH:MM:SS
16092 let c_fmt = java_to_c_format(fmt);
16093 c_fmt.replace("%H:%M:%S", "%T")
16094 }
16095
16096 fn java_to_bq_format(fmt: &str) -> String {
16097 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
16098 let c_fmt = java_to_c_format(fmt);
16099 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
16100 }
16101
16102 // For Hive source, CAST string literals to appropriate type
16103 let cast_val = if is_hive_source {
16104 match &val {
16105 Expression::Literal(lit)
16106 if matches!(
16107 lit.as_ref(),
16108 crate::expressions::Literal::String(_)
16109 ) =>
16110 {
16111 match target {
16112 DialectType::DuckDB
16113 | DialectType::Presto
16114 | DialectType::Trino
16115 | DialectType::Athena => {
16116 Self::ensure_cast_timestamp(val.clone())
16117 }
16118 DialectType::BigQuery => {
16119 // BigQuery: CAST(val AS DATETIME)
16120 Expression::Cast(Box::new(
16121 crate::expressions::Cast {
16122 this: val.clone(),
16123 to: DataType::Custom {
16124 name: "DATETIME".to_string(),
16125 },
16126 trailing_comments: vec![],
16127 double_colon_syntax: false,
16128 format: None,
16129 default: None,
16130 inferred_type: None,
16131 },
16132 ))
16133 }
16134 _ => val.clone(),
16135 }
16136 }
16137 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
16138 Expression::Cast(c)
16139 if matches!(c.to, DataType::Date)
16140 && matches!(
16141 target,
16142 DialectType::Presto
16143 | DialectType::Trino
16144 | DialectType::Athena
16145 ) =>
16146 {
16147 Expression::Cast(Box::new(crate::expressions::Cast {
16148 this: val.clone(),
16149 to: DataType::Timestamp {
16150 timezone: false,
16151 precision: None,
16152 },
16153 trailing_comments: vec![],
16154 double_colon_syntax: false,
16155 format: None,
16156 default: None,
16157 inferred_type: None,
16158 }))
16159 }
16160 Expression::Literal(lit)
16161 if matches!(
16162 lit.as_ref(),
16163 crate::expressions::Literal::Date(_)
16164 ) && matches!(
16165 target,
16166 DialectType::Presto
16167 | DialectType::Trino
16168 | DialectType::Athena
16169 ) =>
16170 {
16171 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
16172 let cast_date = Self::date_literal_to_cast(val.clone());
16173 Expression::Cast(Box::new(crate::expressions::Cast {
16174 this: cast_date,
16175 to: DataType::Timestamp {
16176 timezone: false,
16177 precision: None,
16178 },
16179 trailing_comments: vec![],
16180 double_colon_syntax: false,
16181 format: None,
16182 default: None,
16183 inferred_type: None,
16184 }))
16185 }
16186 _ => val.clone(),
16187 }
16188 } else {
16189 val.clone()
16190 };
16191
16192 match target {
16193 DialectType::DuckDB => {
16194 if let Expression::Literal(lit) = fmt_expr {
16195 if let crate::expressions::Literal::String(s) =
16196 lit.as_ref()
16197 {
16198 let c_fmt = if is_hive_source {
16199 java_to_c_format(s)
16200 } else {
16201 s.clone()
16202 };
16203 Ok(Expression::Function(Box::new(Function::new(
16204 "STRFTIME".to_string(),
16205 vec![cast_val, Expression::string(&c_fmt)],
16206 ))))
16207 } else {
16208 Ok(Expression::Function(Box::new(Function::new(
16209 "STRFTIME".to_string(),
16210 vec![cast_val, fmt_expr.clone()],
16211 ))))
16212 }
16213 } else {
16214 Ok(Expression::Function(Box::new(Function::new(
16215 "STRFTIME".to_string(),
16216 vec![cast_val, fmt_expr.clone()],
16217 ))))
16218 }
16219 }
16220 DialectType::Presto
16221 | DialectType::Trino
16222 | DialectType::Athena => {
16223 if is_hive_source {
16224 if let Expression::Literal(lit) = fmt_expr {
16225 if let crate::expressions::Literal::String(s) =
16226 lit.as_ref()
16227 {
16228 let p_fmt = java_to_presto_format(s);
16229 Ok(Expression::Function(Box::new(
16230 Function::new(
16231 "DATE_FORMAT".to_string(),
16232 vec![
16233 cast_val,
16234 Expression::string(&p_fmt),
16235 ],
16236 ),
16237 )))
16238 } else {
16239 Ok(Expression::Function(Box::new(
16240 Function::new(
16241 "DATE_FORMAT".to_string(),
16242 vec![cast_val, fmt_expr.clone()],
16243 ),
16244 )))
16245 }
16246 } else {
16247 Ok(Expression::Function(Box::new(Function::new(
16248 "DATE_FORMAT".to_string(),
16249 vec![cast_val, fmt_expr.clone()],
16250 ))))
16251 }
16252 } else {
16253 Ok(Expression::Function(Box::new(Function::new(
16254 "DATE_FORMAT".to_string(),
16255 f.args,
16256 ))))
16257 }
16258 }
16259 DialectType::BigQuery => {
16260 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
16261 if let Expression::Literal(lit) = fmt_expr {
16262 if let crate::expressions::Literal::String(s) =
16263 lit.as_ref()
16264 {
16265 let bq_fmt = if is_hive_source {
16266 java_to_bq_format(s)
16267 } else {
16268 java_to_c_format(s)
16269 };
16270 Ok(Expression::Function(Box::new(Function::new(
16271 "FORMAT_DATE".to_string(),
16272 vec![Expression::string(&bq_fmt), cast_val],
16273 ))))
16274 } else {
16275 Ok(Expression::Function(Box::new(Function::new(
16276 "FORMAT_DATE".to_string(),
16277 vec![fmt_expr.clone(), cast_val],
16278 ))))
16279 }
16280 } else {
16281 Ok(Expression::Function(Box::new(Function::new(
16282 "FORMAT_DATE".to_string(),
16283 vec![fmt_expr.clone(), cast_val],
16284 ))))
16285 }
16286 }
16287 DialectType::PostgreSQL | DialectType::Redshift => {
16288 if let Expression::Literal(lit) = fmt_expr {
16289 if let crate::expressions::Literal::String(s) =
16290 lit.as_ref()
16291 {
16292 let pg_fmt = s
16293 .replace("yyyy", "YYYY")
16294 .replace("MM", "MM")
16295 .replace("dd", "DD")
16296 .replace("HH", "HH24")
16297 .replace("mm", "MI")
16298 .replace("ss", "SS")
16299 .replace("yy", "YY");
16300 Ok(Expression::Function(Box::new(Function::new(
16301 "TO_CHAR".to_string(),
16302 vec![val, Expression::string(&pg_fmt)],
16303 ))))
16304 } else {
16305 Ok(Expression::Function(Box::new(Function::new(
16306 "TO_CHAR".to_string(),
16307 vec![val, fmt_expr.clone()],
16308 ))))
16309 }
16310 } else {
16311 Ok(Expression::Function(Box::new(Function::new(
16312 "TO_CHAR".to_string(),
16313 vec![val, fmt_expr.clone()],
16314 ))))
16315 }
16316 }
16317 _ => Ok(Expression::Function(f)),
16318 }
16319 }
16320 // DATEDIFF(unit, start, end) - 3-arg form
16321 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
16322 "DATEDIFF" if f.args.len() == 3 => {
16323 let mut args = f.args;
16324 // SQLite source: args = (date1, date2, unit_string)
16325 // Standard source: args = (unit, start, end)
16326 let (_arg0, arg1, arg2, unit_str) =
16327 if matches!(source, DialectType::SQLite) {
16328 let date1 = args.remove(0);
16329 let date2 = args.remove(0);
16330 let unit_expr = args.remove(0);
16331 let unit_s = Self::get_unit_str_static(&unit_expr);
16332
16333 // For SQLite target, generate JULIANDAY arithmetic directly
16334 if matches!(target, DialectType::SQLite) {
16335 let jd_first = Expression::Function(Box::new(
16336 Function::new("JULIANDAY".to_string(), vec![date1]),
16337 ));
16338 let jd_second = Expression::Function(Box::new(
16339 Function::new("JULIANDAY".to_string(), vec![date2]),
16340 ));
16341 let diff = Expression::Sub(Box::new(
16342 crate::expressions::BinaryOp::new(
16343 jd_first, jd_second,
16344 ),
16345 ));
16346 let paren_diff = Expression::Paren(Box::new(
16347 crate::expressions::Paren {
16348 this: diff,
16349 trailing_comments: Vec::new(),
16350 },
16351 ));
16352 let adjusted = match unit_s.as_str() {
16353 "HOUR" => Expression::Mul(Box::new(
16354 crate::expressions::BinaryOp::new(
16355 paren_diff,
16356 Expression::Literal(Box::new(
16357 Literal::Number("24.0".to_string()),
16358 )),
16359 ),
16360 )),
16361 "MINUTE" => Expression::Mul(Box::new(
16362 crate::expressions::BinaryOp::new(
16363 paren_diff,
16364 Expression::Literal(Box::new(
16365 Literal::Number("1440.0".to_string()),
16366 )),
16367 ),
16368 )),
16369 "SECOND" => Expression::Mul(Box::new(
16370 crate::expressions::BinaryOp::new(
16371 paren_diff,
16372 Expression::Literal(Box::new(
16373 Literal::Number("86400.0".to_string()),
16374 )),
16375 ),
16376 )),
16377 "MONTH" => Expression::Div(Box::new(
16378 crate::expressions::BinaryOp::new(
16379 paren_diff,
16380 Expression::Literal(Box::new(
16381 Literal::Number("30.0".to_string()),
16382 )),
16383 ),
16384 )),
16385 "YEAR" => Expression::Div(Box::new(
16386 crate::expressions::BinaryOp::new(
16387 paren_diff,
16388 Expression::Literal(Box::new(
16389 Literal::Number("365.0".to_string()),
16390 )),
16391 ),
16392 )),
16393 _ => paren_diff,
16394 };
16395 return Ok(Expression::Cast(Box::new(Cast {
16396 this: adjusted,
16397 to: DataType::Int {
16398 length: None,
16399 integer_spelling: true,
16400 },
16401 trailing_comments: vec![],
16402 double_colon_syntax: false,
16403 format: None,
16404 default: None,
16405 inferred_type: None,
16406 })));
16407 }
16408
16409 // For other targets, remap to standard (unit, start, end) form
16410 let unit_ident =
16411 Expression::Identifier(Identifier::new(&unit_s));
16412 (unit_ident, date1, date2, unit_s)
16413 } else {
16414 let arg0 = args.remove(0);
16415 let arg1 = args.remove(0);
16416 let arg2 = args.remove(0);
16417 let unit_s = Self::get_unit_str_static(&arg0);
16418 (arg0, arg1, arg2, unit_s)
16419 };
16420
16421 // For Hive/Spark source, string literal dates need to be cast
16422 // Note: Databricks is excluded - it handles string args like standard SQL
16423 let is_hive_spark =
16424 matches!(source, DialectType::Hive | DialectType::Spark);
16425
16426 match target {
16427 DialectType::Snowflake => {
16428 let unit =
16429 Expression::Identifier(Identifier::new(&unit_str));
16430 // Use ensure_to_date_preserved to add TO_DATE with a marker
16431 // that prevents the Snowflake TO_DATE handler from converting it to CAST
16432 let d1 = if is_hive_spark {
16433 Self::ensure_to_date_preserved(arg1)
16434 } else {
16435 arg1
16436 };
16437 let d2 = if is_hive_spark {
16438 Self::ensure_to_date_preserved(arg2)
16439 } else {
16440 arg2
16441 };
16442 Ok(Expression::Function(Box::new(Function::new(
16443 "DATEDIFF".to_string(),
16444 vec![unit, d1, d2],
16445 ))))
16446 }
16447 DialectType::Redshift => {
16448 let unit =
16449 Expression::Identifier(Identifier::new(&unit_str));
16450 let d1 = if is_hive_spark {
16451 Self::ensure_cast_date(arg1)
16452 } else {
16453 arg1
16454 };
16455 let d2 = if is_hive_spark {
16456 Self::ensure_cast_date(arg2)
16457 } else {
16458 arg2
16459 };
16460 Ok(Expression::Function(Box::new(Function::new(
16461 "DATEDIFF".to_string(),
16462 vec![unit, d1, d2],
16463 ))))
16464 }
16465 DialectType::TSQL => {
16466 let unit =
16467 Expression::Identifier(Identifier::new(&unit_str));
16468 Ok(Expression::Function(Box::new(Function::new(
16469 "DATEDIFF".to_string(),
16470 vec![unit, arg1, arg2],
16471 ))))
16472 }
16473 DialectType::DuckDB => {
16474 let is_redshift_tsql = matches!(
16475 source,
16476 DialectType::Redshift | DialectType::TSQL
16477 );
16478 if is_hive_spark {
16479 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
16480 let d1 = Self::ensure_cast_date(arg1);
16481 let d2 = Self::ensure_cast_date(arg2);
16482 Ok(Expression::Function(Box::new(Function::new(
16483 "DATE_DIFF".to_string(),
16484 vec![Expression::string(&unit_str), d1, d2],
16485 ))))
16486 } else if matches!(source, DialectType::Snowflake) {
16487 // For Snowflake source: special handling per unit
16488 match unit_str.as_str() {
16489 "NANOSECOND" => {
16490 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
16491 fn cast_to_timestamp_ns(
16492 expr: Expression,
16493 ) -> Expression
16494 {
16495 Expression::Cast(Box::new(Cast {
16496 this: expr,
16497 to: DataType::Custom {
16498 name: "TIMESTAMP_NS".to_string(),
16499 },
16500 trailing_comments: vec![],
16501 double_colon_syntax: false,
16502 format: None,
16503 default: None,
16504 inferred_type: None,
16505 }))
16506 }
16507 let epoch_end = Expression::Function(Box::new(
16508 Function::new(
16509 "EPOCH_NS".to_string(),
16510 vec![cast_to_timestamp_ns(arg2)],
16511 ),
16512 ));
16513 let epoch_start = Expression::Function(
16514 Box::new(Function::new(
16515 "EPOCH_NS".to_string(),
16516 vec![cast_to_timestamp_ns(arg1)],
16517 )),
16518 );
16519 Ok(Expression::Sub(Box::new(BinaryOp::new(
16520 epoch_end,
16521 epoch_start,
16522 ))))
16523 }
16524 "WEEK" => {
16525 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
16526 let d1 = Self::force_cast_date(arg1);
16527 let d2 = Self::force_cast_date(arg2);
16528 let dt1 = Expression::Function(Box::new(
16529 Function::new(
16530 "DATE_TRUNC".to_string(),
16531 vec![Expression::string("WEEK"), d1],
16532 ),
16533 ));
16534 let dt2 = Expression::Function(Box::new(
16535 Function::new(
16536 "DATE_TRUNC".to_string(),
16537 vec![Expression::string("WEEK"), d2],
16538 ),
16539 ));
16540 Ok(Expression::Function(Box::new(
16541 Function::new(
16542 "DATE_DIFF".to_string(),
16543 vec![
16544 Expression::string(&unit_str),
16545 dt1,
16546 dt2,
16547 ],
16548 ),
16549 )))
16550 }
16551 _ => {
16552 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
16553 let d1 = Self::force_cast_date(arg1);
16554 let d2 = Self::force_cast_date(arg2);
16555 Ok(Expression::Function(Box::new(
16556 Function::new(
16557 "DATE_DIFF".to_string(),
16558 vec![
16559 Expression::string(&unit_str),
16560 d1,
16561 d2,
16562 ],
16563 ),
16564 )))
16565 }
16566 }
16567 } else if is_redshift_tsql {
16568 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
16569 let d1 = Self::force_cast_timestamp(arg1);
16570 let d2 = Self::force_cast_timestamp(arg2);
16571 Ok(Expression::Function(Box::new(Function::new(
16572 "DATE_DIFF".to_string(),
16573 vec![Expression::string(&unit_str), d1, d2],
16574 ))))
16575 } else {
16576 // Keep as DATEDIFF so DuckDB's transform_datediff handles
16577 // DATE_TRUNC for WEEK, CAST for string literals, etc.
16578 let unit =
16579 Expression::Identifier(Identifier::new(&unit_str));
16580 Ok(Expression::Function(Box::new(Function::new(
16581 "DATEDIFF".to_string(),
16582 vec![unit, arg1, arg2],
16583 ))))
16584 }
16585 }
16586 DialectType::BigQuery => {
16587 let is_redshift_tsql = matches!(
16588 source,
16589 DialectType::Redshift
16590 | DialectType::TSQL
16591 | DialectType::Snowflake
16592 );
16593 let cast_d1 = if is_hive_spark {
16594 Self::ensure_cast_date(arg1)
16595 } else if is_redshift_tsql {
16596 Self::force_cast_datetime(arg1)
16597 } else {
16598 Self::ensure_cast_datetime(arg1)
16599 };
16600 let cast_d2 = if is_hive_spark {
16601 Self::ensure_cast_date(arg2)
16602 } else if is_redshift_tsql {
16603 Self::force_cast_datetime(arg2)
16604 } else {
16605 Self::ensure_cast_datetime(arg2)
16606 };
16607 let unit =
16608 Expression::Identifier(Identifier::new(&unit_str));
16609 Ok(Expression::Function(Box::new(Function::new(
16610 "DATE_DIFF".to_string(),
16611 vec![cast_d2, cast_d1, unit],
16612 ))))
16613 }
16614 DialectType::Presto
16615 | DialectType::Trino
16616 | DialectType::Athena => {
16617 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
16618 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
16619 let is_redshift_tsql = matches!(
16620 source,
16621 DialectType::Redshift
16622 | DialectType::TSQL
16623 | DialectType::Snowflake
16624 );
16625 let d1 = if is_hive_spark {
16626 Self::double_cast_timestamp_date(arg1)
16627 } else if is_redshift_tsql {
16628 Self::force_cast_timestamp(arg1)
16629 } else {
16630 arg1
16631 };
16632 let d2 = if is_hive_spark {
16633 Self::double_cast_timestamp_date(arg2)
16634 } else if is_redshift_tsql {
16635 Self::force_cast_timestamp(arg2)
16636 } else {
16637 arg2
16638 };
16639 Ok(Expression::Function(Box::new(Function::new(
16640 "DATE_DIFF".to_string(),
16641 vec![Expression::string(&unit_str), d1, d2],
16642 ))))
16643 }
16644 DialectType::Hive => match unit_str.as_str() {
16645 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
16646 this: Expression::Function(Box::new(Function::new(
16647 "MONTHS_BETWEEN".to_string(),
16648 vec![arg2, arg1],
16649 ))),
16650 to: DataType::Int {
16651 length: None,
16652 integer_spelling: false,
16653 },
16654 trailing_comments: vec![],
16655 double_colon_syntax: false,
16656 format: None,
16657 default: None,
16658 inferred_type: None,
16659 }))),
16660 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
16661 this: Expression::Div(Box::new(
16662 crate::expressions::BinaryOp::new(
16663 Expression::Function(Box::new(Function::new(
16664 "DATEDIFF".to_string(),
16665 vec![arg2, arg1],
16666 ))),
16667 Expression::number(7),
16668 ),
16669 )),
16670 to: DataType::Int {
16671 length: None,
16672 integer_spelling: false,
16673 },
16674 trailing_comments: vec![],
16675 double_colon_syntax: false,
16676 format: None,
16677 default: None,
16678 inferred_type: None,
16679 }))),
16680 _ => Ok(Expression::Function(Box::new(Function::new(
16681 "DATEDIFF".to_string(),
16682 vec![arg2, arg1],
16683 )))),
16684 },
16685 DialectType::Spark | DialectType::Databricks => {
16686 let unit =
16687 Expression::Identifier(Identifier::new(&unit_str));
16688 Ok(Expression::Function(Box::new(Function::new(
16689 "DATEDIFF".to_string(),
16690 vec![unit, arg1, arg2],
16691 ))))
16692 }
16693 _ => {
16694 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
16695 let d1 = if is_hive_spark {
16696 Self::ensure_cast_date(arg1)
16697 } else {
16698 arg1
16699 };
16700 let d2 = if is_hive_spark {
16701 Self::ensure_cast_date(arg2)
16702 } else {
16703 arg2
16704 };
16705 let unit =
16706 Expression::Identifier(Identifier::new(&unit_str));
16707 Ok(Expression::Function(Box::new(Function::new(
16708 "DATEDIFF".to_string(),
16709 vec![unit, d1, d2],
16710 ))))
16711 }
16712 }
16713 }
16714 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
16715 "DATEDIFF" if f.args.len() == 2 => {
16716 let mut args = f.args;
16717 let arg0 = args.remove(0);
16718 let arg1 = args.remove(0);
16719
16720 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
16721 // Also recognizes TryCast/Cast to DATE that may have been produced by
16722 // cross-dialect TO_DATE -> TRY_CAST conversion
16723 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
16724 if let Expression::Function(ref f) = e {
16725 if f.name.eq_ignore_ascii_case("TO_DATE")
16726 && f.args.len() == 1
16727 {
16728 return (f.args[0].clone(), true);
16729 }
16730 }
16731 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
16732 if let Expression::TryCast(ref c) = e {
16733 if matches!(c.to, DataType::Date) {
16734 return (e, true); // Already properly cast, return as-is
16735 }
16736 }
16737 (e, false)
16738 };
16739
16740 match target {
16741 DialectType::DuckDB => {
16742 // For Hive source, always CAST to DATE
16743 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
16744 let cast_d0 = if matches!(
16745 source,
16746 DialectType::Hive
16747 | DialectType::Spark
16748 | DialectType::Databricks
16749 ) {
16750 let (inner, was_to_date) = unwrap_to_date(arg1);
16751 if was_to_date {
16752 // Already a date expression, use directly
16753 if matches!(&inner, Expression::TryCast(_)) {
16754 inner // Already TRY_CAST(x AS DATE)
16755 } else {
16756 Self::try_cast_date(inner)
16757 }
16758 } else {
16759 Self::force_cast_date(inner)
16760 }
16761 } else {
16762 Self::ensure_cast_date(arg1)
16763 };
16764 let cast_d1 = if matches!(
16765 source,
16766 DialectType::Hive
16767 | DialectType::Spark
16768 | DialectType::Databricks
16769 ) {
16770 let (inner, was_to_date) = unwrap_to_date(arg0);
16771 if was_to_date {
16772 if matches!(&inner, Expression::TryCast(_)) {
16773 inner
16774 } else {
16775 Self::try_cast_date(inner)
16776 }
16777 } else {
16778 Self::force_cast_date(inner)
16779 }
16780 } else {
16781 Self::ensure_cast_date(arg0)
16782 };
16783 Ok(Expression::Function(Box::new(Function::new(
16784 "DATE_DIFF".to_string(),
16785 vec![Expression::string("DAY"), cast_d0, cast_d1],
16786 ))))
16787 }
16788 DialectType::Presto
16789 | DialectType::Trino
16790 | DialectType::Athena => {
16791 // For Hive/Spark source, apply double_cast_timestamp_date
16792 // For other sources (MySQL etc.), just swap args without casting
16793 if matches!(
16794 source,
16795 DialectType::Hive
16796 | DialectType::Spark
16797 | DialectType::Databricks
16798 ) {
16799 let cast_fn = |e: Expression| -> Expression {
16800 let (inner, was_to_date) = unwrap_to_date(e);
16801 if was_to_date {
16802 let first_cast =
16803 Self::double_cast_timestamp_date(inner);
16804 Self::double_cast_timestamp_date(first_cast)
16805 } else {
16806 Self::double_cast_timestamp_date(inner)
16807 }
16808 };
16809 Ok(Expression::Function(Box::new(Function::new(
16810 "DATE_DIFF".to_string(),
16811 vec![
16812 Expression::string("DAY"),
16813 cast_fn(arg1),
16814 cast_fn(arg0),
16815 ],
16816 ))))
16817 } else {
16818 Ok(Expression::Function(Box::new(Function::new(
16819 "DATE_DIFF".to_string(),
16820 vec![Expression::string("DAY"), arg1, arg0],
16821 ))))
16822 }
16823 }
16824 DialectType::Redshift => {
16825 let unit = Expression::Identifier(Identifier::new("DAY"));
16826 Ok(Expression::Function(Box::new(Function::new(
16827 "DATEDIFF".to_string(),
16828 vec![unit, arg1, arg0],
16829 ))))
16830 }
16831 _ => Ok(Expression::Function(Box::new(Function::new(
16832 "DATEDIFF".to_string(),
16833 vec![arg0, arg1],
16834 )))),
16835 }
16836 }
16837 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
16838 "DATE_DIFF" if f.args.len() == 3 => {
16839 let mut args = f.args;
16840 let arg0 = args.remove(0);
16841 let arg1 = args.remove(0);
16842 let arg2 = args.remove(0);
16843 let unit_str = Self::get_unit_str_static(&arg0);
16844
16845 match target {
16846 DialectType::DuckDB => {
16847 // DuckDB: DATE_DIFF('UNIT', start, end)
16848 Ok(Expression::Function(Box::new(Function::new(
16849 "DATE_DIFF".to_string(),
16850 vec![Expression::string(&unit_str), arg1, arg2],
16851 ))))
16852 }
16853 DialectType::Presto
16854 | DialectType::Trino
16855 | DialectType::Athena => {
16856 Ok(Expression::Function(Box::new(Function::new(
16857 "DATE_DIFF".to_string(),
16858 vec![Expression::string(&unit_str), arg1, arg2],
16859 ))))
16860 }
16861 DialectType::ClickHouse => {
16862 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
16863 let unit =
16864 Expression::Identifier(Identifier::new(&unit_str));
16865 Ok(Expression::Function(Box::new(Function::new(
16866 "DATE_DIFF".to_string(),
16867 vec![unit, arg1, arg2],
16868 ))))
16869 }
16870 DialectType::Snowflake | DialectType::Redshift => {
16871 let unit =
16872 Expression::Identifier(Identifier::new(&unit_str));
16873 Ok(Expression::Function(Box::new(Function::new(
16874 "DATEDIFF".to_string(),
16875 vec![unit, arg1, arg2],
16876 ))))
16877 }
16878 _ => {
16879 let unit =
16880 Expression::Identifier(Identifier::new(&unit_str));
16881 Ok(Expression::Function(Box::new(Function::new(
16882 "DATEDIFF".to_string(),
16883 vec![unit, arg1, arg2],
16884 ))))
16885 }
16886 }
16887 }
16888 // DATEADD(unit, val, date) - 3-arg form
16889 "DATEADD" if f.args.len() == 3 => {
16890 let mut args = f.args;
16891 let arg0 = args.remove(0);
16892 let arg1 = args.remove(0);
16893 let arg2 = args.remove(0);
16894 let unit_str = Self::get_unit_str_static(&arg0);
16895
16896 // Normalize TSQL unit abbreviations to standard names
16897 let unit_str = match unit_str.as_str() {
16898 "YY" | "YYYY" => "YEAR".to_string(),
16899 "QQ" | "Q" => "QUARTER".to_string(),
16900 "MM" | "M" => "MONTH".to_string(),
16901 "WK" | "WW" => "WEEK".to_string(),
16902 "DD" | "D" | "DY" => "DAY".to_string(),
16903 "HH" => "HOUR".to_string(),
16904 "MI" | "N" => "MINUTE".to_string(),
16905 "SS" | "S" => "SECOND".to_string(),
16906 "MS" => "MILLISECOND".to_string(),
16907 "MCS" | "US" => "MICROSECOND".to_string(),
16908 _ => unit_str,
16909 };
16910 match target {
16911 DialectType::Snowflake => {
16912 let unit =
16913 Expression::Identifier(Identifier::new(&unit_str));
16914 // Cast string literal to TIMESTAMP, but not for Snowflake source
16915 // (Snowflake natively accepts string literals in DATEADD)
16916 let arg2 = if matches!(
16917 &arg2,
16918 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16919 ) && !matches!(source, DialectType::Snowflake)
16920 {
16921 Expression::Cast(Box::new(Cast {
16922 this: arg2,
16923 to: DataType::Timestamp {
16924 precision: None,
16925 timezone: false,
16926 },
16927 trailing_comments: Vec::new(),
16928 double_colon_syntax: false,
16929 format: None,
16930 default: None,
16931 inferred_type: None,
16932 }))
16933 } else {
16934 arg2
16935 };
16936 Ok(Expression::Function(Box::new(Function::new(
16937 "DATEADD".to_string(),
16938 vec![unit, arg1, arg2],
16939 ))))
16940 }
16941 DialectType::TSQL => {
16942 let unit =
16943 Expression::Identifier(Identifier::new(&unit_str));
16944 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
16945 let arg2 = if matches!(
16946 &arg2,
16947 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16948 ) && !matches!(
16949 source,
16950 DialectType::Spark
16951 | DialectType::Databricks
16952 | DialectType::Hive
16953 ) {
16954 Expression::Cast(Box::new(Cast {
16955 this: arg2,
16956 to: DataType::Custom {
16957 name: "DATETIME2".to_string(),
16958 },
16959 trailing_comments: Vec::new(),
16960 double_colon_syntax: false,
16961 format: None,
16962 default: None,
16963 inferred_type: None,
16964 }))
16965 } else {
16966 arg2
16967 };
16968 Ok(Expression::Function(Box::new(Function::new(
16969 "DATEADD".to_string(),
16970 vec![unit, arg1, arg2],
16971 ))))
16972 }
16973 DialectType::Redshift => {
16974 let unit =
16975 Expression::Identifier(Identifier::new(&unit_str));
16976 Ok(Expression::Function(Box::new(Function::new(
16977 "DATEADD".to_string(),
16978 vec![unit, arg1, arg2],
16979 ))))
16980 }
16981 DialectType::Databricks => {
16982 let unit =
16983 Expression::Identifier(Identifier::new(&unit_str));
16984 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
16985 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
16986 let func_name = if matches!(
16987 source,
16988 DialectType::TSQL
16989 | DialectType::Fabric
16990 | DialectType::Databricks
16991 | DialectType::Snowflake
16992 ) {
16993 "DATEADD"
16994 } else {
16995 "DATE_ADD"
16996 };
16997 Ok(Expression::Function(Box::new(Function::new(
16998 func_name.to_string(),
16999 vec![unit, arg1, arg2],
17000 ))))
17001 }
17002 DialectType::DuckDB => {
17003 // Special handling for NANOSECOND from Snowflake
17004 if unit_str == "NANOSECOND"
17005 && matches!(source, DialectType::Snowflake)
17006 {
17007 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
17008 let cast_ts = Expression::Cast(Box::new(Cast {
17009 this: arg2,
17010 to: DataType::Custom {
17011 name: "TIMESTAMP_NS".to_string(),
17012 },
17013 trailing_comments: vec![],
17014 double_colon_syntax: false,
17015 format: None,
17016 default: None,
17017 inferred_type: None,
17018 }));
17019 let epoch_ns =
17020 Expression::Function(Box::new(Function::new(
17021 "EPOCH_NS".to_string(),
17022 vec![cast_ts],
17023 )));
17024 let sum = Expression::Add(Box::new(BinaryOp::new(
17025 epoch_ns, arg1,
17026 )));
17027 Ok(Expression::Function(Box::new(Function::new(
17028 "MAKE_TIMESTAMP_NS".to_string(),
17029 vec![sum],
17030 ))))
17031 } else {
17032 // DuckDB: convert to date + INTERVAL syntax with CAST
17033 let iu = Self::parse_interval_unit_static(&unit_str);
17034 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
17035 this: Some(arg1),
17036 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
17037 }));
17038 // Cast string literal to TIMESTAMP
17039 let arg2 = if matches!(
17040 &arg2,
17041 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
17042 ) {
17043 Expression::Cast(Box::new(Cast {
17044 this: arg2,
17045 to: DataType::Timestamp {
17046 precision: None,
17047 timezone: false,
17048 },
17049 trailing_comments: Vec::new(),
17050 double_colon_syntax: false,
17051 format: None,
17052 default: None,
17053 inferred_type: None,
17054 }))
17055 } else {
17056 arg2
17057 };
17058 Ok(Expression::Add(Box::new(
17059 crate::expressions::BinaryOp::new(arg2, interval),
17060 )))
17061 }
17062 }
17063 DialectType::Spark => {
17064 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
17065 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
17066 if matches!(source, DialectType::TSQL | DialectType::Fabric)
17067 {
17068 fn multiply_expr_spark(
17069 expr: Expression,
17070 factor: i64,
17071 ) -> Expression
17072 {
17073 if let Expression::Literal(lit) = &expr {
17074 if let crate::expressions::Literal::Number(n) =
17075 lit.as_ref()
17076 {
17077 if let Ok(val) = n.parse::<i64>() {
17078 return Expression::Literal(Box::new(
17079 crate::expressions::Literal::Number(
17080 (val * factor).to_string(),
17081 ),
17082 ));
17083 }
17084 }
17085 }
17086 Expression::Mul(Box::new(
17087 crate::expressions::BinaryOp::new(
17088 expr,
17089 Expression::Literal(Box::new(
17090 crate::expressions::Literal::Number(
17091 factor.to_string(),
17092 ),
17093 )),
17094 ),
17095 ))
17096 }
17097 let normalized_unit = match unit_str.as_str() {
17098 "YEAR" | "YY" | "YYYY" => "YEAR",
17099 "QUARTER" | "QQ" | "Q" => "QUARTER",
17100 "MONTH" | "MM" | "M" => "MONTH",
17101 "WEEK" | "WK" | "WW" => "WEEK",
17102 "DAY" | "DD" | "D" | "DY" => "DAY",
17103 _ => &unit_str,
17104 };
17105 match normalized_unit {
17106 "YEAR" => {
17107 let months = multiply_expr_spark(arg1, 12);
17108 Ok(Expression::Function(Box::new(
17109 Function::new(
17110 "ADD_MONTHS".to_string(),
17111 vec![arg2, months],
17112 ),
17113 )))
17114 }
17115 "QUARTER" => {
17116 let months = multiply_expr_spark(arg1, 3);
17117 Ok(Expression::Function(Box::new(
17118 Function::new(
17119 "ADD_MONTHS".to_string(),
17120 vec![arg2, months],
17121 ),
17122 )))
17123 }
17124 "MONTH" => Ok(Expression::Function(Box::new(
17125 Function::new(
17126 "ADD_MONTHS".to_string(),
17127 vec![arg2, arg1],
17128 ),
17129 ))),
17130 "WEEK" => {
17131 let days = multiply_expr_spark(arg1, 7);
17132 Ok(Expression::Function(Box::new(
17133 Function::new(
17134 "DATE_ADD".to_string(),
17135 vec![arg2, days],
17136 ),
17137 )))
17138 }
17139 "DAY" => Ok(Expression::Function(Box::new(
17140 Function::new(
17141 "DATE_ADD".to_string(),
17142 vec![arg2, arg1],
17143 ),
17144 ))),
17145 _ => {
17146 let unit = Expression::Identifier(
17147 Identifier::new(&unit_str),
17148 );
17149 Ok(Expression::Function(Box::new(
17150 Function::new(
17151 "DATE_ADD".to_string(),
17152 vec![unit, arg1, arg2],
17153 ),
17154 )))
17155 }
17156 }
17157 } else {
17158 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
17159 let unit =
17160 Expression::Identifier(Identifier::new(&unit_str));
17161 Ok(Expression::Function(Box::new(Function::new(
17162 "DATE_ADD".to_string(),
17163 vec![unit, arg1, arg2],
17164 ))))
17165 }
17166 }
17167 DialectType::Hive => match unit_str.as_str() {
17168 "MONTH" => {
17169 Ok(Expression::Function(Box::new(Function::new(
17170 "ADD_MONTHS".to_string(),
17171 vec![arg2, arg1],
17172 ))))
17173 }
17174 _ => Ok(Expression::Function(Box::new(Function::new(
17175 "DATE_ADD".to_string(),
17176 vec![arg2, arg1],
17177 )))),
17178 },
17179 DialectType::Presto
17180 | DialectType::Trino
17181 | DialectType::Athena => {
17182 // Cast string literal date to TIMESTAMP
17183 let arg2 = if matches!(
17184 &arg2,
17185 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
17186 ) {
17187 Expression::Cast(Box::new(Cast {
17188 this: arg2,
17189 to: DataType::Timestamp {
17190 precision: None,
17191 timezone: false,
17192 },
17193 trailing_comments: Vec::new(),
17194 double_colon_syntax: false,
17195 format: None,
17196 default: None,
17197 inferred_type: None,
17198 }))
17199 } else {
17200 arg2
17201 };
17202 Ok(Expression::Function(Box::new(Function::new(
17203 "DATE_ADD".to_string(),
17204 vec![Expression::string(&unit_str), arg1, arg2],
17205 ))))
17206 }
17207 DialectType::MySQL => {
17208 let iu = Self::parse_interval_unit_static(&unit_str);
17209 Ok(Expression::DateAdd(Box::new(
17210 crate::expressions::DateAddFunc {
17211 this: arg2,
17212 interval: arg1,
17213 unit: iu,
17214 },
17215 )))
17216 }
17217 DialectType::PostgreSQL => {
17218 // Cast string literal date to TIMESTAMP
17219 let arg2 = if matches!(
17220 &arg2,
17221 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
17222 ) {
17223 Expression::Cast(Box::new(Cast {
17224 this: arg2,
17225 to: DataType::Timestamp {
17226 precision: None,
17227 timezone: false,
17228 },
17229 trailing_comments: Vec::new(),
17230 double_colon_syntax: false,
17231 format: None,
17232 default: None,
17233 inferred_type: None,
17234 }))
17235 } else {
17236 arg2
17237 };
17238 let interval = Expression::Interval(Box::new(
17239 crate::expressions::Interval {
17240 this: Some(Expression::string(&format!(
17241 "{} {}",
17242 Self::expr_to_string_static(&arg1),
17243 unit_str
17244 ))),
17245 unit: None,
17246 },
17247 ));
17248 Ok(Expression::Add(Box::new(
17249 crate::expressions::BinaryOp::new(arg2, interval),
17250 )))
17251 }
17252 DialectType::BigQuery => {
17253 let iu = Self::parse_interval_unit_static(&unit_str);
17254 let interval = Expression::Interval(Box::new(
17255 crate::expressions::Interval {
17256 this: Some(arg1),
17257 unit: Some(
17258 crate::expressions::IntervalUnitSpec::Simple {
17259 unit: iu,
17260 use_plural: false,
17261 },
17262 ),
17263 },
17264 ));
17265 // Non-TSQL sources: CAST string literal to DATETIME
17266 let arg2 = if !matches!(
17267 source,
17268 DialectType::TSQL | DialectType::Fabric
17269 ) && matches!(
17270 &arg2,
17271 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
17272 ) {
17273 Expression::Cast(Box::new(Cast {
17274 this: arg2,
17275 to: DataType::Custom {
17276 name: "DATETIME".to_string(),
17277 },
17278 trailing_comments: Vec::new(),
17279 double_colon_syntax: false,
17280 format: None,
17281 default: None,
17282 inferred_type: None,
17283 }))
17284 } else {
17285 arg2
17286 };
17287 Ok(Expression::Function(Box::new(Function::new(
17288 "DATE_ADD".to_string(),
17289 vec![arg2, interval],
17290 ))))
17291 }
17292 _ => {
17293 let unit =
17294 Expression::Identifier(Identifier::new(&unit_str));
17295 Ok(Expression::Function(Box::new(Function::new(
17296 "DATEADD".to_string(),
17297 vec![unit, arg1, arg2],
17298 ))))
17299 }
17300 }
17301 }
17302 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
17303 // or (date, val, 'UNIT') from Generic canonical form
17304 "DATE_ADD" if f.args.len() == 3 => {
17305 let mut args = f.args;
17306 let arg0 = args.remove(0);
17307 let arg1 = args.remove(0);
17308 let arg2 = args.remove(0);
17309 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
17310 // where arg2 is a string literal matching a unit name
17311 let arg2_unit = match &arg2 {
17312 Expression::Literal(lit)
17313 if matches!(lit.as_ref(), Literal::String(_)) =>
17314 {
17315 let Literal::String(s) = lit.as_ref() else {
17316 unreachable!()
17317 };
17318 let u = s.to_ascii_uppercase();
17319 if matches!(
17320 u.as_str(),
17321 "DAY"
17322 | "MONTH"
17323 | "YEAR"
17324 | "HOUR"
17325 | "MINUTE"
17326 | "SECOND"
17327 | "WEEK"
17328 | "QUARTER"
17329 | "MILLISECOND"
17330 | "MICROSECOND"
17331 ) {
17332 Some(u)
17333 } else {
17334 None
17335 }
17336 }
17337 _ => None,
17338 };
17339 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
17340 let (unit_str, val, date) = if let Some(u) = arg2_unit {
17341 (u, arg1, arg0)
17342 } else {
17343 (Self::get_unit_str_static(&arg0), arg1, arg2)
17344 };
17345 // Alias for backward compat with the rest of the match
17346 let arg1 = val;
17347 let arg2 = date;
17348
17349 match target {
17350 DialectType::Presto
17351 | DialectType::Trino
17352 | DialectType::Athena => {
17353 Ok(Expression::Function(Box::new(Function::new(
17354 "DATE_ADD".to_string(),
17355 vec![Expression::string(&unit_str), arg1, arg2],
17356 ))))
17357 }
17358 DialectType::DuckDB => {
17359 let iu = Self::parse_interval_unit_static(&unit_str);
17360 let interval = Expression::Interval(Box::new(
17361 crate::expressions::Interval {
17362 this: Some(arg1),
17363 unit: Some(
17364 crate::expressions::IntervalUnitSpec::Simple {
17365 unit: iu,
17366 use_plural: false,
17367 },
17368 ),
17369 },
17370 ));
17371 Ok(Expression::Add(Box::new(
17372 crate::expressions::BinaryOp::new(arg2, interval),
17373 )))
17374 }
17375 DialectType::PostgreSQL
17376 | DialectType::Materialize
17377 | DialectType::RisingWave => {
17378 // PostgreSQL: x + INTERVAL '1 DAY'
17379 let amount_str = Self::expr_to_string_static(&arg1);
17380 let interval = Expression::Interval(Box::new(
17381 crate::expressions::Interval {
17382 this: Some(Expression::string(&format!(
17383 "{} {}",
17384 amount_str, unit_str
17385 ))),
17386 unit: None,
17387 },
17388 ));
17389 Ok(Expression::Add(Box::new(
17390 crate::expressions::BinaryOp::new(arg2, interval),
17391 )))
17392 }
17393 DialectType::Snowflake
17394 | DialectType::TSQL
17395 | DialectType::Redshift => {
17396 let unit =
17397 Expression::Identifier(Identifier::new(&unit_str));
17398 Ok(Expression::Function(Box::new(Function::new(
17399 "DATEADD".to_string(),
17400 vec![unit, arg1, arg2],
17401 ))))
17402 }
17403 DialectType::BigQuery
17404 | DialectType::MySQL
17405 | DialectType::Doris
17406 | DialectType::StarRocks
17407 | DialectType::Drill => {
17408 // DATE_ADD(date, INTERVAL amount UNIT)
17409 let iu = Self::parse_interval_unit_static(&unit_str);
17410 let interval = Expression::Interval(Box::new(
17411 crate::expressions::Interval {
17412 this: Some(arg1),
17413 unit: Some(
17414 crate::expressions::IntervalUnitSpec::Simple {
17415 unit: iu,
17416 use_plural: false,
17417 },
17418 ),
17419 },
17420 ));
17421 Ok(Expression::Function(Box::new(Function::new(
17422 "DATE_ADD".to_string(),
17423 vec![arg2, interval],
17424 ))))
17425 }
17426 DialectType::SQLite => {
17427 // SQLite: DATE(x, '1 DAY')
17428 // Build the string '1 DAY' from amount and unit
17429 let amount_str = match &arg1 {
17430 Expression::Literal(lit)
17431 if matches!(lit.as_ref(), Literal::Number(_)) =>
17432 {
17433 let Literal::Number(n) = lit.as_ref() else {
17434 unreachable!()
17435 };
17436 n.clone()
17437 }
17438 _ => "1".to_string(),
17439 };
17440 Ok(Expression::Function(Box::new(Function::new(
17441 "DATE".to_string(),
17442 vec![
17443 arg2,
17444 Expression::string(format!(
17445 "{} {}",
17446 amount_str, unit_str
17447 )),
17448 ],
17449 ))))
17450 }
17451 DialectType::Dremio => {
17452 // Dremio: DATE_ADD(date, amount) - drops unit
17453 Ok(Expression::Function(Box::new(Function::new(
17454 "DATE_ADD".to_string(),
17455 vec![arg2, arg1],
17456 ))))
17457 }
17458 DialectType::Spark => {
17459 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
17460 if unit_str == "DAY" {
17461 Ok(Expression::Function(Box::new(Function::new(
17462 "DATE_ADD".to_string(),
17463 vec![arg2, arg1],
17464 ))))
17465 } else {
17466 let unit =
17467 Expression::Identifier(Identifier::new(&unit_str));
17468 Ok(Expression::Function(Box::new(Function::new(
17469 "DATE_ADD".to_string(),
17470 vec![unit, arg1, arg2],
17471 ))))
17472 }
17473 }
17474 DialectType::Databricks => {
17475 let unit =
17476 Expression::Identifier(Identifier::new(&unit_str));
17477 Ok(Expression::Function(Box::new(Function::new(
17478 "DATE_ADD".to_string(),
17479 vec![unit, arg1, arg2],
17480 ))))
17481 }
17482 DialectType::Hive => {
17483 // Hive: DATE_ADD(date, val) for DAY
17484 Ok(Expression::Function(Box::new(Function::new(
17485 "DATE_ADD".to_string(),
17486 vec![arg2, arg1],
17487 ))))
17488 }
17489 _ => {
17490 let unit =
17491 Expression::Identifier(Identifier::new(&unit_str));
17492 Ok(Expression::Function(Box::new(Function::new(
17493 "DATE_ADD".to_string(),
17494 vec![unit, arg1, arg2],
17495 ))))
17496 }
17497 }
17498 }
17499 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
17500 "DATE_ADD"
17501 if f.args.len() == 2
17502 && matches!(
17503 source,
17504 DialectType::Hive
17505 | DialectType::Spark
17506 | DialectType::Databricks
17507 | DialectType::Generic
17508 ) =>
17509 {
17510 let mut args = f.args;
17511 let date = args.remove(0);
17512 let days = args.remove(0);
17513 match target {
17514 DialectType::Hive | DialectType::Spark => {
17515 // Keep as DATE_ADD(date, days) for Hive/Spark
17516 Ok(Expression::Function(Box::new(Function::new(
17517 "DATE_ADD".to_string(),
17518 vec![date, days],
17519 ))))
17520 }
17521 DialectType::Databricks => {
17522 // Databricks: DATEADD(DAY, days, date)
17523 Ok(Expression::Function(Box::new(Function::new(
17524 "DATEADD".to_string(),
17525 vec![
17526 Expression::Identifier(Identifier::new("DAY")),
17527 days,
17528 date,
17529 ],
17530 ))))
17531 }
17532 DialectType::DuckDB => {
17533 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
17534 let cast_date = Self::ensure_cast_date(date);
17535 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
17536 let interval_val = if matches!(
17537 days,
17538 Expression::Mul(_)
17539 | Expression::Sub(_)
17540 | Expression::Add(_)
17541 ) {
17542 Expression::Paren(Box::new(crate::expressions::Paren {
17543 this: days,
17544 trailing_comments: vec![],
17545 }))
17546 } else {
17547 days
17548 };
17549 let interval = Expression::Interval(Box::new(
17550 crate::expressions::Interval {
17551 this: Some(interval_val),
17552 unit: Some(
17553 crate::expressions::IntervalUnitSpec::Simple {
17554 unit: crate::expressions::IntervalUnit::Day,
17555 use_plural: false,
17556 },
17557 ),
17558 },
17559 ));
17560 Ok(Expression::Add(Box::new(
17561 crate::expressions::BinaryOp::new(cast_date, interval),
17562 )))
17563 }
17564 DialectType::Snowflake => {
17565 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
17566 let cast_date = if matches!(
17567 source,
17568 DialectType::Hive
17569 | DialectType::Spark
17570 | DialectType::Databricks
17571 ) {
17572 if matches!(
17573 date,
17574 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17575 ) {
17576 Self::double_cast_timestamp_date(date)
17577 } else {
17578 date
17579 }
17580 } else {
17581 date
17582 };
17583 Ok(Expression::Function(Box::new(Function::new(
17584 "DATEADD".to_string(),
17585 vec![
17586 Expression::Identifier(Identifier::new("DAY")),
17587 days,
17588 cast_date,
17589 ],
17590 ))))
17591 }
17592 DialectType::Redshift => {
17593 Ok(Expression::Function(Box::new(Function::new(
17594 "DATEADD".to_string(),
17595 vec![
17596 Expression::Identifier(Identifier::new("DAY")),
17597 days,
17598 date,
17599 ],
17600 ))))
17601 }
17602 DialectType::TSQL | DialectType::Fabric => {
17603 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
17604 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
17605 let cast_date = if matches!(
17606 source,
17607 DialectType::Hive | DialectType::Spark
17608 ) {
17609 if matches!(
17610 date,
17611 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17612 ) {
17613 Self::double_cast_datetime2_date(date)
17614 } else {
17615 date
17616 }
17617 } else {
17618 date
17619 };
17620 Ok(Expression::Function(Box::new(Function::new(
17621 "DATEADD".to_string(),
17622 vec![
17623 Expression::Identifier(Identifier::new("DAY")),
17624 days,
17625 cast_date,
17626 ],
17627 ))))
17628 }
17629 DialectType::Presto
17630 | DialectType::Trino
17631 | DialectType::Athena => {
17632 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
17633 let cast_date = if matches!(
17634 source,
17635 DialectType::Hive
17636 | DialectType::Spark
17637 | DialectType::Databricks
17638 ) {
17639 if matches!(
17640 date,
17641 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17642 ) {
17643 Self::double_cast_timestamp_date(date)
17644 } else {
17645 date
17646 }
17647 } else {
17648 date
17649 };
17650 Ok(Expression::Function(Box::new(Function::new(
17651 "DATE_ADD".to_string(),
17652 vec![Expression::string("DAY"), days, cast_date],
17653 ))))
17654 }
17655 DialectType::BigQuery => {
17656 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
17657 let cast_date = if matches!(
17658 source,
17659 DialectType::Hive
17660 | DialectType::Spark
17661 | DialectType::Databricks
17662 ) {
17663 Self::double_cast_datetime_date(date)
17664 } else {
17665 date
17666 };
17667 // Wrap complex expressions in Paren for interval
17668 let interval_val = if matches!(
17669 days,
17670 Expression::Mul(_)
17671 | Expression::Sub(_)
17672 | Expression::Add(_)
17673 ) {
17674 Expression::Paren(Box::new(crate::expressions::Paren {
17675 this: days,
17676 trailing_comments: vec![],
17677 }))
17678 } else {
17679 days
17680 };
17681 let interval = Expression::Interval(Box::new(
17682 crate::expressions::Interval {
17683 this: Some(interval_val),
17684 unit: Some(
17685 crate::expressions::IntervalUnitSpec::Simple {
17686 unit: crate::expressions::IntervalUnit::Day,
17687 use_plural: false,
17688 },
17689 ),
17690 },
17691 ));
17692 Ok(Expression::Function(Box::new(Function::new(
17693 "DATE_ADD".to_string(),
17694 vec![cast_date, interval],
17695 ))))
17696 }
17697 DialectType::MySQL => {
17698 let iu = crate::expressions::IntervalUnit::Day;
17699 Ok(Expression::DateAdd(Box::new(
17700 crate::expressions::DateAddFunc {
17701 this: date,
17702 interval: days,
17703 unit: iu,
17704 },
17705 )))
17706 }
17707 DialectType::PostgreSQL => {
17708 let interval = Expression::Interval(Box::new(
17709 crate::expressions::Interval {
17710 this: Some(Expression::string(&format!(
17711 "{} DAY",
17712 Self::expr_to_string_static(&days)
17713 ))),
17714 unit: None,
17715 },
17716 ));
17717 Ok(Expression::Add(Box::new(
17718 crate::expressions::BinaryOp::new(date, interval),
17719 )))
17720 }
17721 DialectType::Doris
17722 | DialectType::StarRocks
17723 | DialectType::Drill => {
17724 // DATE_ADD(date, INTERVAL days DAY)
17725 let interval = Expression::Interval(Box::new(
17726 crate::expressions::Interval {
17727 this: Some(days),
17728 unit: Some(
17729 crate::expressions::IntervalUnitSpec::Simple {
17730 unit: crate::expressions::IntervalUnit::Day,
17731 use_plural: false,
17732 },
17733 ),
17734 },
17735 ));
17736 Ok(Expression::Function(Box::new(Function::new(
17737 "DATE_ADD".to_string(),
17738 vec![date, interval],
17739 ))))
17740 }
17741 _ => Ok(Expression::Function(Box::new(Function::new(
17742 "DATE_ADD".to_string(),
17743 vec![date, days],
17744 )))),
17745 }
17746 }
17747 // DATE_ADD(date, INTERVAL val UNIT) - MySQL 2-arg form with INTERVAL as 2nd arg
17748 "DATE_ADD"
17749 if f.args.len() == 2
17750 && matches!(
17751 source,
17752 DialectType::MySQL | DialectType::SingleStore
17753 )
17754 && matches!(&f.args[1], Expression::Interval(_)) =>
17755 {
17756 let mut args = f.args;
17757 let date = args.remove(0);
17758 let interval_expr = args.remove(0);
17759 let (val, unit) = Self::extract_interval_parts(&interval_expr)
17760 .unwrap_or_else(|| {
17761 (
17762 interval_expr.clone(),
17763 crate::expressions::IntervalUnit::Day,
17764 )
17765 });
17766 let unit_str = Self::interval_unit_to_string(&unit);
17767 let is_literal = matches!(&val,
17768 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_))
17769 );
17770
17771 match target {
17772 DialectType::MySQL | DialectType::SingleStore => {
17773 // Keep as DATE_ADD(date, INTERVAL val UNIT)
17774 Ok(Expression::Function(Box::new(Function::new(
17775 "DATE_ADD".to_string(),
17776 vec![date, interval_expr],
17777 ))))
17778 }
17779 DialectType::PostgreSQL => {
17780 if is_literal {
17781 // Literal: date + INTERVAL 'val UNIT'
17782 let interval = Expression::Interval(Box::new(
17783 crate::expressions::Interval {
17784 this: Some(Expression::Literal(Box::new(
17785 Literal::String(format!(
17786 "{} {}",
17787 Self::expr_to_string(&val),
17788 unit_str
17789 )),
17790 ))),
17791 unit: None,
17792 },
17793 ));
17794 Ok(Expression::Add(Box::new(
17795 crate::expressions::BinaryOp::new(date, interval),
17796 )))
17797 } else {
17798 // Non-literal (column ref): date + INTERVAL '1 UNIT' * val
17799 let interval_one = Expression::Interval(Box::new(
17800 crate::expressions::Interval {
17801 this: Some(Expression::Literal(Box::new(
17802 Literal::String(format!("1 {}", unit_str)),
17803 ))),
17804 unit: None,
17805 },
17806 ));
17807 let mul = Expression::Mul(Box::new(
17808 crate::expressions::BinaryOp::new(
17809 interval_one,
17810 val,
17811 ),
17812 ));
17813 Ok(Expression::Add(Box::new(
17814 crate::expressions::BinaryOp::new(date, mul),
17815 )))
17816 }
17817 }
17818 _ => {
17819 // Default: keep as DATE_ADD(date, interval)
17820 Ok(Expression::Function(Box::new(Function::new(
17821 "DATE_ADD".to_string(),
17822 vec![date, interval_expr],
17823 ))))
17824 }
17825 }
17826 }
17827 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
17828 "DATE_SUB"
17829 if f.args.len() == 2
17830 && matches!(
17831 source,
17832 DialectType::Hive
17833 | DialectType::Spark
17834 | DialectType::Databricks
17835 ) =>
17836 {
17837 let mut args = f.args;
17838 let date = args.remove(0);
17839 let days = args.remove(0);
17840 // Helper to create days * -1
17841 let make_neg_days = |d: Expression| -> Expression {
17842 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
17843 d,
17844 Expression::Literal(Box::new(Literal::Number(
17845 "-1".to_string(),
17846 ))),
17847 )))
17848 };
17849 let is_string_literal = matches!(date, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)));
17850 match target {
17851 DialectType::Hive
17852 | DialectType::Spark
17853 | DialectType::Databricks => {
17854 // Keep as DATE_SUB(date, days) for Hive/Spark
17855 Ok(Expression::Function(Box::new(Function::new(
17856 "DATE_SUB".to_string(),
17857 vec![date, days],
17858 ))))
17859 }
17860 DialectType::DuckDB => {
17861 let cast_date = Self::ensure_cast_date(date);
17862 let neg = make_neg_days(days);
17863 let interval = Expression::Interval(Box::new(
17864 crate::expressions::Interval {
17865 this: Some(Expression::Paren(Box::new(
17866 crate::expressions::Paren {
17867 this: neg,
17868 trailing_comments: vec![],
17869 },
17870 ))),
17871 unit: Some(
17872 crate::expressions::IntervalUnitSpec::Simple {
17873 unit: crate::expressions::IntervalUnit::Day,
17874 use_plural: false,
17875 },
17876 ),
17877 },
17878 ));
17879 Ok(Expression::Add(Box::new(
17880 crate::expressions::BinaryOp::new(cast_date, interval),
17881 )))
17882 }
17883 DialectType::Snowflake => {
17884 let cast_date = if is_string_literal {
17885 Self::double_cast_timestamp_date(date)
17886 } else {
17887 date
17888 };
17889 let neg = make_neg_days(days);
17890 Ok(Expression::Function(Box::new(Function::new(
17891 "DATEADD".to_string(),
17892 vec![
17893 Expression::Identifier(Identifier::new("DAY")),
17894 neg,
17895 cast_date,
17896 ],
17897 ))))
17898 }
17899 DialectType::Redshift => {
17900 let neg = make_neg_days(days);
17901 Ok(Expression::Function(Box::new(Function::new(
17902 "DATEADD".to_string(),
17903 vec![
17904 Expression::Identifier(Identifier::new("DAY")),
17905 neg,
17906 date,
17907 ],
17908 ))))
17909 }
17910 DialectType::TSQL | DialectType::Fabric => {
17911 let cast_date = if is_string_literal {
17912 Self::double_cast_datetime2_date(date)
17913 } else {
17914 date
17915 };
17916 let neg = make_neg_days(days);
17917 Ok(Expression::Function(Box::new(Function::new(
17918 "DATEADD".to_string(),
17919 vec![
17920 Expression::Identifier(Identifier::new("DAY")),
17921 neg,
17922 cast_date,
17923 ],
17924 ))))
17925 }
17926 DialectType::Presto
17927 | DialectType::Trino
17928 | DialectType::Athena => {
17929 let cast_date = if is_string_literal {
17930 Self::double_cast_timestamp_date(date)
17931 } else {
17932 date
17933 };
17934 let neg = make_neg_days(days);
17935 Ok(Expression::Function(Box::new(Function::new(
17936 "DATE_ADD".to_string(),
17937 vec![Expression::string("DAY"), neg, cast_date],
17938 ))))
17939 }
17940 DialectType::BigQuery => {
17941 let cast_date = if is_string_literal {
17942 Self::double_cast_datetime_date(date)
17943 } else {
17944 date
17945 };
17946 let neg = make_neg_days(days);
17947 let interval = Expression::Interval(Box::new(
17948 crate::expressions::Interval {
17949 this: Some(Expression::Paren(Box::new(
17950 crate::expressions::Paren {
17951 this: neg,
17952 trailing_comments: vec![],
17953 },
17954 ))),
17955 unit: Some(
17956 crate::expressions::IntervalUnitSpec::Simple {
17957 unit: crate::expressions::IntervalUnit::Day,
17958 use_plural: false,
17959 },
17960 ),
17961 },
17962 ));
17963 Ok(Expression::Function(Box::new(Function::new(
17964 "DATE_ADD".to_string(),
17965 vec![cast_date, interval],
17966 ))))
17967 }
17968 _ => Ok(Expression::Function(Box::new(Function::new(
17969 "DATE_SUB".to_string(),
17970 vec![date, days],
17971 )))),
17972 }
17973 }
17974 // ADD_MONTHS(date, val) -> target-specific
17975 "ADD_MONTHS" if f.args.len() == 2 => {
17976 let mut args = f.args;
17977 let date = args.remove(0);
17978 let val = args.remove(0);
17979 match target {
17980 DialectType::TSQL => {
17981 let cast_date = Self::ensure_cast_datetime2(date);
17982 Ok(Expression::Function(Box::new(Function::new(
17983 "DATEADD".to_string(),
17984 vec![
17985 Expression::Identifier(Identifier::new("MONTH")),
17986 val,
17987 cast_date,
17988 ],
17989 ))))
17990 }
17991 DialectType::DuckDB => {
17992 let interval = Expression::Interval(Box::new(
17993 crate::expressions::Interval {
17994 this: Some(val),
17995 unit: Some(
17996 crate::expressions::IntervalUnitSpec::Simple {
17997 unit:
17998 crate::expressions::IntervalUnit::Month,
17999 use_plural: false,
18000 },
18001 ),
18002 },
18003 ));
18004 Ok(Expression::Add(Box::new(
18005 crate::expressions::BinaryOp::new(date, interval),
18006 )))
18007 }
18008 DialectType::Snowflake => {
18009 // Keep ADD_MONTHS when source is Snowflake
18010 if matches!(source, DialectType::Snowflake) {
18011 Ok(Expression::Function(Box::new(Function::new(
18012 "ADD_MONTHS".to_string(),
18013 vec![date, val],
18014 ))))
18015 } else {
18016 Ok(Expression::Function(Box::new(Function::new(
18017 "DATEADD".to_string(),
18018 vec![
18019 Expression::Identifier(Identifier::new(
18020 "MONTH",
18021 )),
18022 val,
18023 date,
18024 ],
18025 ))))
18026 }
18027 }
18028 DialectType::Redshift => {
18029 Ok(Expression::Function(Box::new(Function::new(
18030 "DATEADD".to_string(),
18031 vec![
18032 Expression::Identifier(Identifier::new("MONTH")),
18033 val,
18034 date,
18035 ],
18036 ))))
18037 }
18038 DialectType::Presto
18039 | DialectType::Trino
18040 | DialectType::Athena => {
18041 Ok(Expression::Function(Box::new(Function::new(
18042 "DATE_ADD".to_string(),
18043 vec![Expression::string("MONTH"), val, date],
18044 ))))
18045 }
18046 DialectType::BigQuery => {
18047 let interval = Expression::Interval(Box::new(
18048 crate::expressions::Interval {
18049 this: Some(val),
18050 unit: Some(
18051 crate::expressions::IntervalUnitSpec::Simple {
18052 unit:
18053 crate::expressions::IntervalUnit::Month,
18054 use_plural: false,
18055 },
18056 ),
18057 },
18058 ));
18059 Ok(Expression::Function(Box::new(Function::new(
18060 "DATE_ADD".to_string(),
18061 vec![date, interval],
18062 ))))
18063 }
18064 _ => Ok(Expression::Function(Box::new(Function::new(
18065 "ADD_MONTHS".to_string(),
18066 vec![date, val],
18067 )))),
18068 }
18069 }
18070 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
18071 "DATETRUNC" if f.args.len() == 2 => {
18072 let mut args = f.args;
18073 let arg0 = args.remove(0);
18074 let arg1 = args.remove(0);
18075 let unit_str = Self::get_unit_str_static(&arg0);
18076 match target {
18077 DialectType::TSQL | DialectType::Fabric => {
18078 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
18079 Ok(Expression::Function(Box::new(Function::new(
18080 "DATETRUNC".to_string(),
18081 vec![
18082 Expression::Identifier(Identifier::new(&unit_str)),
18083 arg1,
18084 ],
18085 ))))
18086 }
18087 DialectType::DuckDB => {
18088 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
18089 let date = Self::ensure_cast_timestamp(arg1);
18090 Ok(Expression::Function(Box::new(Function::new(
18091 "DATE_TRUNC".to_string(),
18092 vec![Expression::string(&unit_str), date],
18093 ))))
18094 }
18095 DialectType::ClickHouse => {
18096 // ClickHouse: dateTrunc('UNIT', expr)
18097 Ok(Expression::Function(Box::new(Function::new(
18098 "dateTrunc".to_string(),
18099 vec![Expression::string(&unit_str), arg1],
18100 ))))
18101 }
18102 _ => {
18103 // Standard: DATE_TRUNC('UNIT', expr)
18104 let unit = Expression::string(&unit_str);
18105 Ok(Expression::Function(Box::new(Function::new(
18106 "DATE_TRUNC".to_string(),
18107 vec![unit, arg1],
18108 ))))
18109 }
18110 }
18111 }
18112 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
18113 "GETDATE" if f.args.is_empty() => match target {
18114 DialectType::TSQL => Ok(Expression::Function(f)),
18115 DialectType::Redshift => Ok(Expression::Function(Box::new(
18116 Function::new("GETDATE".to_string(), vec![]),
18117 ))),
18118 _ => Ok(Expression::CurrentTimestamp(
18119 crate::expressions::CurrentTimestamp {
18120 precision: None,
18121 sysdate: false,
18122 },
18123 )),
18124 },
18125 // TO_HEX(x) / HEX(x) -> target-specific hex function
18126 "TO_HEX" | "HEX" if f.args.len() == 1 => {
18127 let name = match target {
18128 DialectType::Presto | DialectType::Trino => "TO_HEX",
18129 DialectType::Spark
18130 | DialectType::Databricks
18131 | DialectType::Hive => "HEX",
18132 DialectType::DuckDB
18133 | DialectType::PostgreSQL
18134 | DialectType::Redshift => "TO_HEX",
18135 _ => &f.name,
18136 };
18137 Ok(Expression::Function(Box::new(Function::new(
18138 name.to_string(),
18139 f.args,
18140 ))))
18141 }
18142 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
18143 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
18144 match target {
18145 DialectType::BigQuery => {
18146 // BigQuery: UNHEX(x) -> FROM_HEX(x)
18147 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
18148 // because BigQuery MD5 returns BYTES, not hex string
18149 let arg = &f.args[0];
18150 let wrapped_arg = match arg {
18151 Expression::Function(inner_f)
18152 if inner_f.name.eq_ignore_ascii_case("MD5")
18153 || inner_f
18154 .name
18155 .eq_ignore_ascii_case("SHA1")
18156 || inner_f
18157 .name
18158 .eq_ignore_ascii_case("SHA256")
18159 || inner_f
18160 .name
18161 .eq_ignore_ascii_case("SHA512") =>
18162 {
18163 // Wrap hash function in TO_HEX for BigQuery
18164 Expression::Function(Box::new(Function::new(
18165 "TO_HEX".to_string(),
18166 vec![arg.clone()],
18167 )))
18168 }
18169 _ => f.args.into_iter().next().unwrap(),
18170 };
18171 Ok(Expression::Function(Box::new(Function::new(
18172 "FROM_HEX".to_string(),
18173 vec![wrapped_arg],
18174 ))))
18175 }
18176 _ => {
18177 let name = match target {
18178 DialectType::Presto | DialectType::Trino => "FROM_HEX",
18179 DialectType::Spark
18180 | DialectType::Databricks
18181 | DialectType::Hive => "UNHEX",
18182 _ => &f.name,
18183 };
18184 Ok(Expression::Function(Box::new(Function::new(
18185 name.to_string(),
18186 f.args,
18187 ))))
18188 }
18189 }
18190 }
18191 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
18192 "TO_UTF8" if f.args.len() == 1 => match target {
18193 DialectType::Spark | DialectType::Databricks => {
18194 let mut args = f.args;
18195 args.push(Expression::string("utf-8"));
18196 Ok(Expression::Function(Box::new(Function::new(
18197 "ENCODE".to_string(),
18198 args,
18199 ))))
18200 }
18201 _ => Ok(Expression::Function(f)),
18202 },
18203 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
18204 "FROM_UTF8" if f.args.len() == 1 => match target {
18205 DialectType::Spark | DialectType::Databricks => {
18206 let mut args = f.args;
18207 args.push(Expression::string("utf-8"));
18208 Ok(Expression::Function(Box::new(Function::new(
18209 "DECODE".to_string(),
18210 args,
18211 ))))
18212 }
18213 _ => Ok(Expression::Function(f)),
18214 },
18215 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
18216 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
18217 let name = match target {
18218 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
18219 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
18220 DialectType::PostgreSQL | DialectType::Redshift => {
18221 "STARTS_WITH"
18222 }
18223 _ => &f.name,
18224 };
18225 Ok(Expression::Function(Box::new(Function::new(
18226 name.to_string(),
18227 f.args,
18228 ))))
18229 }
18230 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
18231 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
18232 let name = match target {
18233 DialectType::Presto
18234 | DialectType::Trino
18235 | DialectType::Athena => "APPROX_DISTINCT",
18236 _ => "APPROX_COUNT_DISTINCT",
18237 };
18238 Ok(Expression::Function(Box::new(Function::new(
18239 name.to_string(),
18240 f.args,
18241 ))))
18242 }
18243 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
18244 "JSON_EXTRACT"
18245 if f.args.len() == 2
18246 && !matches!(source, DialectType::BigQuery)
18247 && matches!(
18248 target,
18249 DialectType::Spark
18250 | DialectType::Databricks
18251 | DialectType::Hive
18252 ) =>
18253 {
18254 Ok(Expression::Function(Box::new(Function::new(
18255 "GET_JSON_OBJECT".to_string(),
18256 f.args,
18257 ))))
18258 }
18259 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
18260 "JSON_EXTRACT"
18261 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
18262 {
18263 let mut args = f.args;
18264 let path = args.remove(1);
18265 let this = args.remove(0);
18266 Ok(Expression::JsonExtract(Box::new(
18267 crate::expressions::JsonExtractFunc {
18268 this,
18269 path,
18270 returning: None,
18271 arrow_syntax: true,
18272 hash_arrow_syntax: false,
18273 wrapper_option: None,
18274 quotes_option: None,
18275 on_scalar_string: false,
18276 on_error: None,
18277 },
18278 )))
18279 }
18280 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
18281 "JSON_FORMAT" if f.args.len() == 1 => {
18282 match target {
18283 DialectType::Spark | DialectType::Databricks => {
18284 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
18285 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
18286 if matches!(
18287 source,
18288 DialectType::Presto
18289 | DialectType::Trino
18290 | DialectType::Athena
18291 ) {
18292 if let Some(Expression::ParseJson(pj)) = f.args.first()
18293 {
18294 if let Expression::Literal(lit) = &pj.this {
18295 if let Literal::String(s) = lit.as_ref() {
18296 let wrapped =
18297 Expression::Literal(Box::new(
18298 Literal::String(format!("[{}]", s)),
18299 ));
18300 let schema_of_json = Expression::Function(
18301 Box::new(Function::new(
18302 "SCHEMA_OF_JSON".to_string(),
18303 vec![wrapped.clone()],
18304 )),
18305 );
18306 let from_json = Expression::Function(
18307 Box::new(Function::new(
18308 "FROM_JSON".to_string(),
18309 vec![wrapped, schema_of_json],
18310 )),
18311 );
18312 let to_json = Expression::Function(
18313 Box::new(Function::new(
18314 "TO_JSON".to_string(),
18315 vec![from_json],
18316 )),
18317 );
18318 return Ok(Expression::Function(Box::new(
18319 Function::new(
18320 "REGEXP_EXTRACT".to_string(),
18321 vec![
18322 to_json,
18323 Expression::Literal(Box::new(
18324 Literal::String(
18325 "^.(.*).$".to_string(),
18326 ),
18327 )),
18328 Expression::Literal(Box::new(
18329 Literal::Number(
18330 "1".to_string(),
18331 ),
18332 )),
18333 ],
18334 ),
18335 )));
18336 }
18337 }
18338 }
18339 }
18340
18341 // Strip inner CAST(... AS JSON) or TO_JSON() if present
18342 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
18343 let mut args = f.args;
18344 if let Some(Expression::Cast(ref c)) = args.first() {
18345 if matches!(&c.to, DataType::Json | DataType::JsonB) {
18346 args = vec![c.this.clone()];
18347 }
18348 } else if let Some(Expression::Function(ref inner_f)) =
18349 args.first()
18350 {
18351 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
18352 && inner_f.args.len() == 1
18353 {
18354 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
18355 args = inner_f.args.clone();
18356 }
18357 }
18358 Ok(Expression::Function(Box::new(Function::new(
18359 "TO_JSON".to_string(),
18360 args,
18361 ))))
18362 }
18363 DialectType::BigQuery => Ok(Expression::Function(Box::new(
18364 Function::new("TO_JSON_STRING".to_string(), f.args),
18365 ))),
18366 DialectType::DuckDB => {
18367 // CAST(TO_JSON(x) AS TEXT)
18368 let to_json = Expression::Function(Box::new(
18369 Function::new("TO_JSON".to_string(), f.args),
18370 ));
18371 Ok(Expression::Cast(Box::new(Cast {
18372 this: to_json,
18373 to: DataType::Text,
18374 trailing_comments: Vec::new(),
18375 double_colon_syntax: false,
18376 format: None,
18377 default: None,
18378 inferred_type: None,
18379 })))
18380 }
18381 _ => Ok(Expression::Function(f)),
18382 }
18383 }
18384 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
18385 "SYSDATE" if f.args.is_empty() => {
18386 match target {
18387 DialectType::Oracle | DialectType::Redshift => {
18388 Ok(Expression::Function(f))
18389 }
18390 DialectType::Snowflake => {
18391 // Snowflake uses SYSDATE() with parens
18392 let mut f = *f;
18393 f.no_parens = false;
18394 Ok(Expression::Function(Box::new(f)))
18395 }
18396 DialectType::DuckDB => {
18397 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
18398 Ok(Expression::AtTimeZone(Box::new(
18399 crate::expressions::AtTimeZone {
18400 this: Expression::CurrentTimestamp(
18401 crate::expressions::CurrentTimestamp {
18402 precision: None,
18403 sysdate: false,
18404 },
18405 ),
18406 zone: Expression::Literal(Box::new(
18407 Literal::String("UTC".to_string()),
18408 )),
18409 },
18410 )))
18411 }
18412 _ => Ok(Expression::CurrentTimestamp(
18413 crate::expressions::CurrentTimestamp {
18414 precision: None,
18415 sysdate: true,
18416 },
18417 )),
18418 }
18419 }
18420 // LOGICAL_OR(x) -> BOOL_OR(x)
18421 "LOGICAL_OR" if f.args.len() == 1 => {
18422 let name = match target {
18423 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
18424 _ => &f.name,
18425 };
18426 Ok(Expression::Function(Box::new(Function::new(
18427 name.to_string(),
18428 f.args,
18429 ))))
18430 }
18431 // LOGICAL_AND(x) -> BOOL_AND(x)
18432 "LOGICAL_AND" if f.args.len() == 1 => {
18433 let name = match target {
18434 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
18435 _ => &f.name,
18436 };
18437 Ok(Expression::Function(Box::new(Function::new(
18438 name.to_string(),
18439 f.args,
18440 ))))
18441 }
18442 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
18443 "MONTHS_ADD" if f.args.len() == 2 => match target {
18444 DialectType::Oracle => Ok(Expression::Function(Box::new(
18445 Function::new("ADD_MONTHS".to_string(), f.args),
18446 ))),
18447 _ => Ok(Expression::Function(f)),
18448 },
18449 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
18450 "ARRAY_JOIN" if f.args.len() >= 2 => {
18451 match target {
18452 DialectType::Spark | DialectType::Databricks => {
18453 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
18454 Ok(Expression::Function(f))
18455 }
18456 DialectType::Hive => {
18457 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
18458 let mut args = f.args;
18459 let arr = args.remove(0);
18460 let sep = args.remove(0);
18461 // Drop any remaining args (null_replacement)
18462 Ok(Expression::Function(Box::new(Function::new(
18463 "CONCAT_WS".to_string(),
18464 vec![sep, arr],
18465 ))))
18466 }
18467 DialectType::Presto | DialectType::Trino => {
18468 Ok(Expression::Function(f))
18469 }
18470 _ => Ok(Expression::Function(f)),
18471 }
18472 }
18473 // LOCATE(substr, str, pos) 3-arg -> target-specific
18474 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
18475 "LOCATE"
18476 if f.args.len() == 3
18477 && matches!(
18478 target,
18479 DialectType::Presto
18480 | DialectType::Trino
18481 | DialectType::Athena
18482 | DialectType::DuckDB
18483 ) =>
18484 {
18485 let mut args = f.args;
18486 let substr = args.remove(0);
18487 let string = args.remove(0);
18488 let pos = args.remove(0);
18489 // STRPOS(SUBSTRING(string, pos), substr)
18490 let substring_call = Expression::Function(Box::new(Function::new(
18491 "SUBSTRING".to_string(),
18492 vec![string.clone(), pos.clone()],
18493 )));
18494 let strpos_call = Expression::Function(Box::new(Function::new(
18495 "STRPOS".to_string(),
18496 vec![substring_call, substr.clone()],
18497 )));
18498 // STRPOS(...) + pos - 1
18499 let pos_adjusted =
18500 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
18501 Expression::Add(Box::new(
18502 crate::expressions::BinaryOp::new(
18503 strpos_call.clone(),
18504 pos.clone(),
18505 ),
18506 )),
18507 Expression::number(1),
18508 )));
18509 // STRPOS(...) = 0
18510 let is_zero =
18511 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
18512 strpos_call.clone(),
18513 Expression::number(0),
18514 )));
18515
18516 match target {
18517 DialectType::Presto
18518 | DialectType::Trino
18519 | DialectType::Athena => {
18520 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
18521 Ok(Expression::Function(Box::new(Function::new(
18522 "IF".to_string(),
18523 vec![is_zero, Expression::number(0), pos_adjusted],
18524 ))))
18525 }
18526 DialectType::DuckDB => {
18527 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
18528 Ok(Expression::Case(Box::new(crate::expressions::Case {
18529 operand: None,
18530 whens: vec![(is_zero, Expression::number(0))],
18531 else_: Some(pos_adjusted),
18532 comments: Vec::new(),
18533 inferred_type: None,
18534 })))
18535 }
18536 _ => Ok(Expression::Function(Box::new(Function::new(
18537 "LOCATE".to_string(),
18538 vec![substr, string, pos],
18539 )))),
18540 }
18541 }
18542 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
18543 "STRPOS"
18544 if f.args.len() == 3
18545 && matches!(
18546 target,
18547 DialectType::BigQuery
18548 | DialectType::Oracle
18549 | DialectType::Teradata
18550 ) =>
18551 {
18552 let mut args = f.args;
18553 let haystack = args.remove(0);
18554 let needle = args.remove(0);
18555 let occurrence = args.remove(0);
18556 Ok(Expression::Function(Box::new(Function::new(
18557 "INSTR".to_string(),
18558 vec![haystack, needle, Expression::number(1), occurrence],
18559 ))))
18560 }
18561 // SCHEMA_NAME(id) -> target-specific
18562 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
18563 DialectType::MySQL | DialectType::SingleStore => {
18564 Ok(Expression::Function(Box::new(Function::new(
18565 "SCHEMA".to_string(),
18566 vec![],
18567 ))))
18568 }
18569 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
18570 crate::expressions::CurrentSchema { this: None },
18571 ))),
18572 DialectType::SQLite => Ok(Expression::string("main")),
18573 _ => Ok(Expression::Function(f)),
18574 },
18575 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
18576 "STRTOL" if f.args.len() == 2 => match target {
18577 DialectType::Presto | DialectType::Trino => {
18578 Ok(Expression::Function(Box::new(Function::new(
18579 "FROM_BASE".to_string(),
18580 f.args,
18581 ))))
18582 }
18583 _ => Ok(Expression::Function(f)),
18584 },
18585 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
18586 "EDITDIST3" if f.args.len() == 2 => match target {
18587 DialectType::Spark | DialectType::Databricks => {
18588 Ok(Expression::Function(Box::new(Function::new(
18589 "LEVENSHTEIN".to_string(),
18590 f.args,
18591 ))))
18592 }
18593 _ => Ok(Expression::Function(f)),
18594 },
18595 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
18596 "FORMAT"
18597 if f.args.len() == 2
18598 && matches!(
18599 source,
18600 DialectType::MySQL | DialectType::SingleStore
18601 )
18602 && matches!(target, DialectType::DuckDB) =>
18603 {
18604 let mut args = f.args;
18605 let num_expr = args.remove(0);
18606 let decimals_expr = args.remove(0);
18607 // Extract decimal count
18608 let dec_count = match &decimals_expr {
18609 Expression::Literal(lit)
18610 if matches!(lit.as_ref(), Literal::Number(_)) =>
18611 {
18612 let Literal::Number(n) = lit.as_ref() else {
18613 unreachable!()
18614 };
18615 n.clone()
18616 }
18617 _ => "0".to_string(),
18618 };
18619 let fmt_str = format!("{{:,.{}f}}", dec_count);
18620 Ok(Expression::Function(Box::new(Function::new(
18621 "FORMAT".to_string(),
18622 vec![Expression::string(&fmt_str), num_expr],
18623 ))))
18624 }
18625 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
18626 "FORMAT"
18627 if f.args.len() == 2
18628 && matches!(
18629 source,
18630 DialectType::TSQL | DialectType::Fabric
18631 ) =>
18632 {
18633 let val_expr = f.args[0].clone();
18634 let fmt_expr = f.args[1].clone();
18635 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
18636 // Only expand shortcodes that are NOT also valid numeric format specifiers.
18637 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
18638 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
18639 let (expanded_fmt, is_shortcode) = match &fmt_expr {
18640 Expression::Literal(lit)
18641 if matches!(
18642 lit.as_ref(),
18643 crate::expressions::Literal::String(_)
18644 ) =>
18645 {
18646 let crate::expressions::Literal::String(s) = lit.as_ref()
18647 else {
18648 unreachable!()
18649 };
18650 match s.as_str() {
18651 "m" | "M" => (Expression::string("MMMM d"), true),
18652 "t" => (Expression::string("h:mm tt"), true),
18653 "T" => (Expression::string("h:mm:ss tt"), true),
18654 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
18655 _ => (fmt_expr.clone(), false),
18656 }
18657 }
18658 _ => (fmt_expr.clone(), false),
18659 };
18660 // Check if the format looks like a date format
18661 let is_date_format = is_shortcode
18662 || match &expanded_fmt {
18663 Expression::Literal(lit)
18664 if matches!(
18665 lit.as_ref(),
18666 crate::expressions::Literal::String(_)
18667 ) =>
18668 {
18669 let crate::expressions::Literal::String(s) =
18670 lit.as_ref()
18671 else {
18672 unreachable!()
18673 };
18674 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
18675 s.contains("yyyy")
18676 || s.contains("YYYY")
18677 || s.contains("MM")
18678 || s.contains("dd")
18679 || s.contains("MMMM")
18680 || s.contains("HH")
18681 || s.contains("hh")
18682 || s.contains("ss")
18683 }
18684 _ => false,
18685 };
18686 match target {
18687 DialectType::Spark | DialectType::Databricks => {
18688 let func_name = if is_date_format {
18689 "DATE_FORMAT"
18690 } else {
18691 "FORMAT_NUMBER"
18692 };
18693 Ok(Expression::Function(Box::new(Function::new(
18694 func_name.to_string(),
18695 vec![val_expr, expanded_fmt],
18696 ))))
18697 }
18698 _ => {
18699 // For TSQL and other targets, expand shortcodes but keep FORMAT
18700 if is_shortcode {
18701 Ok(Expression::Function(Box::new(Function::new(
18702 "FORMAT".to_string(),
18703 vec![val_expr, expanded_fmt],
18704 ))))
18705 } else {
18706 Ok(Expression::Function(f))
18707 }
18708 }
18709 }
18710 }
18711 // FORMAT('%s', x) from Trino/Presto -> target-specific
18712 "FORMAT"
18713 if f.args.len() >= 2
18714 && matches!(
18715 source,
18716 DialectType::Trino
18717 | DialectType::Presto
18718 | DialectType::Athena
18719 ) =>
18720 {
18721 let fmt_expr = f.args[0].clone();
18722 let value_args: Vec<Expression> = f.args[1..].to_vec();
18723 match target {
18724 // DuckDB: replace %s with {} in format string
18725 DialectType::DuckDB => {
18726 let new_fmt = match &fmt_expr {
18727 Expression::Literal(lit)
18728 if matches!(lit.as_ref(), Literal::String(_)) =>
18729 {
18730 let Literal::String(s) = lit.as_ref() else {
18731 unreachable!()
18732 };
18733 Expression::Literal(Box::new(Literal::String(
18734 s.replace("%s", "{}"),
18735 )))
18736 }
18737 _ => fmt_expr,
18738 };
18739 let mut args = vec![new_fmt];
18740 args.extend(value_args);
18741 Ok(Expression::Function(Box::new(Function::new(
18742 "FORMAT".to_string(),
18743 args,
18744 ))))
18745 }
18746 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
18747 DialectType::Snowflake => match &fmt_expr {
18748 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "%s" && value_args.len() == 1) =>
18749 {
18750 let Literal::String(_) = lit.as_ref() else {
18751 unreachable!()
18752 };
18753 Ok(Expression::Function(Box::new(Function::new(
18754 "TO_CHAR".to_string(),
18755 value_args,
18756 ))))
18757 }
18758 _ => Ok(Expression::Function(f)),
18759 },
18760 // Default: keep FORMAT as-is
18761 _ => Ok(Expression::Function(f)),
18762 }
18763 }
18764 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
18765 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
18766 if f.args.len() == 2 =>
18767 {
18768 // When coming from Snowflake source: ARRAY_CONTAINS(value, array)
18769 // args[0]=value, args[1]=array. For DuckDB target, swap and add NULL-aware CASE.
18770 if matches!(target, DialectType::DuckDB)
18771 && matches!(source, DialectType::Snowflake)
18772 && f.name.eq_ignore_ascii_case("ARRAY_CONTAINS")
18773 {
18774 let value = f.args[0].clone();
18775 let array = f.args[1].clone();
18776
18777 // value IS NULL
18778 let value_is_null =
18779 Expression::IsNull(Box::new(crate::expressions::IsNull {
18780 this: value.clone(),
18781 not: false,
18782 postfix_form: false,
18783 }));
18784
18785 // ARRAY_LENGTH(array)
18786 let array_length =
18787 Expression::Function(Box::new(Function::new(
18788 "ARRAY_LENGTH".to_string(),
18789 vec![array.clone()],
18790 )));
18791 // LIST_COUNT(array)
18792 let list_count = Expression::Function(Box::new(Function::new(
18793 "LIST_COUNT".to_string(),
18794 vec![array.clone()],
18795 )));
18796 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
18797 let neq =
18798 Expression::Neq(Box::new(crate::expressions::BinaryOp {
18799 left: array_length,
18800 right: list_count,
18801 left_comments: vec![],
18802 operator_comments: vec![],
18803 trailing_comments: vec![],
18804 inferred_type: None,
18805 }));
18806 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
18807 let nullif =
18808 Expression::Nullif(Box::new(crate::expressions::Nullif {
18809 this: Box::new(neq),
18810 expression: Box::new(Expression::Boolean(
18811 crate::expressions::BooleanLiteral { value: false },
18812 )),
18813 }));
18814
18815 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
18816 let array_contains =
18817 Expression::Function(Box::new(Function::new(
18818 "ARRAY_CONTAINS".to_string(),
18819 vec![array, value],
18820 )));
18821
18822 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
18823 return Ok(Expression::Case(Box::new(Case {
18824 operand: None,
18825 whens: vec![(value_is_null, nullif)],
18826 else_: Some(array_contains),
18827 comments: Vec::new(),
18828 inferred_type: None,
18829 })));
18830 }
18831 match target {
18832 DialectType::PostgreSQL | DialectType::Redshift => {
18833 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
18834 let arr = f.args[0].clone();
18835 let needle = f.args[1].clone();
18836 // Convert [] to ARRAY[] for PostgreSQL
18837 let pg_arr = match arr {
18838 Expression::Array(a) => Expression::ArrayFunc(
18839 Box::new(crate::expressions::ArrayConstructor {
18840 expressions: a.expressions,
18841 bracket_notation: false,
18842 use_list_keyword: false,
18843 }),
18844 ),
18845 _ => arr,
18846 };
18847 // needle = ANY(arr) using the Any quantified expression
18848 let any_expr = Expression::Any(Box::new(
18849 crate::expressions::QuantifiedExpr {
18850 this: needle.clone(),
18851 subquery: pg_arr,
18852 op: Some(crate::expressions::QuantifiedOp::Eq),
18853 },
18854 ));
18855 let coalesce = Expression::Coalesce(Box::new(
18856 crate::expressions::VarArgFunc {
18857 expressions: vec![
18858 any_expr,
18859 Expression::Boolean(
18860 crate::expressions::BooleanLiteral {
18861 value: false,
18862 },
18863 ),
18864 ],
18865 original_name: None,
18866 inferred_type: None,
18867 },
18868 ));
18869 let is_null_check = Expression::IsNull(Box::new(
18870 crate::expressions::IsNull {
18871 this: needle,
18872 not: false,
18873 postfix_form: false,
18874 },
18875 ));
18876 Ok(Expression::Case(Box::new(Case {
18877 operand: None,
18878 whens: vec![(
18879 is_null_check,
18880 Expression::Null(crate::expressions::Null),
18881 )],
18882 else_: Some(coalesce),
18883 comments: Vec::new(),
18884 inferred_type: None,
18885 })))
18886 }
18887 _ => Ok(Expression::Function(Box::new(Function::new(
18888 "ARRAY_CONTAINS".to_string(),
18889 f.args,
18890 )))),
18891 }
18892 }
18893 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
18894 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
18895 match target {
18896 DialectType::PostgreSQL | DialectType::Redshift => {
18897 // arr1 && arr2 with ARRAY[] syntax
18898 let mut args = f.args;
18899 let arr1 = args.remove(0);
18900 let arr2 = args.remove(0);
18901 let pg_arr1 = match arr1 {
18902 Expression::Array(a) => Expression::ArrayFunc(
18903 Box::new(crate::expressions::ArrayConstructor {
18904 expressions: a.expressions,
18905 bracket_notation: false,
18906 use_list_keyword: false,
18907 }),
18908 ),
18909 _ => arr1,
18910 };
18911 let pg_arr2 = match arr2 {
18912 Expression::Array(a) => Expression::ArrayFunc(
18913 Box::new(crate::expressions::ArrayConstructor {
18914 expressions: a.expressions,
18915 bracket_notation: false,
18916 use_list_keyword: false,
18917 }),
18918 ),
18919 _ => arr2,
18920 };
18921 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
18922 pg_arr1, pg_arr2,
18923 ))))
18924 }
18925 DialectType::DuckDB => {
18926 // DuckDB: arr1 && arr2 (native support)
18927 let mut args = f.args;
18928 let arr1 = args.remove(0);
18929 let arr2 = args.remove(0);
18930 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
18931 arr1, arr2,
18932 ))))
18933 }
18934 _ => Ok(Expression::Function(Box::new(Function::new(
18935 "LIST_HAS_ANY".to_string(),
18936 f.args,
18937 )))),
18938 }
18939 }
18940 // APPROX_QUANTILE(x, q) -> target-specific
18941 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
18942 DialectType::Snowflake => Ok(Expression::Function(Box::new(
18943 Function::new("APPROX_PERCENTILE".to_string(), f.args),
18944 ))),
18945 DialectType::DuckDB => Ok(Expression::Function(f)),
18946 _ => Ok(Expression::Function(f)),
18947 },
18948 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
18949 "MAKE_DATE" if f.args.len() == 3 => match target {
18950 DialectType::BigQuery => Ok(Expression::Function(Box::new(
18951 Function::new("DATE".to_string(), f.args),
18952 ))),
18953 _ => Ok(Expression::Function(f)),
18954 },
18955 // RANGE(start, end[, step]) -> target-specific
18956 "RANGE"
18957 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
18958 {
18959 let start = f.args[0].clone();
18960 let end = f.args[1].clone();
18961 let step = f.args.get(2).cloned();
18962 match target {
18963 // Snowflake ARRAY_GENERATE_RANGE uses exclusive end (same as DuckDB RANGE),
18964 // so just rename without adjusting the end argument.
18965 DialectType::Snowflake => {
18966 let mut args = vec![start, end];
18967 if let Some(s) = step {
18968 args.push(s);
18969 }
18970 Ok(Expression::Function(Box::new(Function::new(
18971 "ARRAY_GENERATE_RANGE".to_string(),
18972 args,
18973 ))))
18974 }
18975 DialectType::Spark | DialectType::Databricks => {
18976 // RANGE(start, end) -> SEQUENCE(start, end-1)
18977 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
18978 // RANGE(start, start) -> ARRAY() (empty)
18979 // RANGE(start, end, 0) -> ARRAY() (empty)
18980 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
18981
18982 // Check for constant args
18983 fn extract_i64(e: &Expression) -> Option<i64> {
18984 match e {
18985 Expression::Literal(lit)
18986 if matches!(
18987 lit.as_ref(),
18988 Literal::Number(_)
18989 ) =>
18990 {
18991 let Literal::Number(n) = lit.as_ref() else {
18992 unreachable!()
18993 };
18994 n.parse::<i64>().ok()
18995 }
18996 Expression::Neg(u) => {
18997 if let Expression::Literal(lit) = &u.this {
18998 if let Literal::Number(n) = lit.as_ref() {
18999 n.parse::<i64>().ok().map(|v| -v)
19000 } else {
19001 None
19002 }
19003 } else {
19004 None
19005 }
19006 }
19007 _ => None,
19008 }
19009 }
19010 let start_val = extract_i64(&start);
19011 let end_val = extract_i64(&end);
19012 let step_val = step.as_ref().and_then(|s| extract_i64(s));
19013
19014 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
19015 if step_val == Some(0) {
19016 return Ok(Expression::Function(Box::new(
19017 Function::new("ARRAY".to_string(), vec![]),
19018 )));
19019 }
19020 if let (Some(s), Some(e_val)) = (start_val, end_val) {
19021 if s == e_val {
19022 return Ok(Expression::Function(Box::new(
19023 Function::new("ARRAY".to_string(), vec![]),
19024 )));
19025 }
19026 }
19027
19028 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
19029 // All constants - compute new end = end - step (if step provided) or end - 1
19030 match step_val {
19031 Some(st) if st < 0 => {
19032 // Negative step: SEQUENCE(start, end - step, step)
19033 let new_end = e_val - st; // end - step (= end + |step|)
19034 let mut args =
19035 vec![start, Expression::number(new_end)];
19036 if let Some(s) = step {
19037 args.push(s);
19038 }
19039 Ok(Expression::Function(Box::new(
19040 Function::new("SEQUENCE".to_string(), args),
19041 )))
19042 }
19043 Some(st) => {
19044 let new_end = e_val - st;
19045 let mut args =
19046 vec![start, Expression::number(new_end)];
19047 if let Some(s) = step {
19048 args.push(s);
19049 }
19050 Ok(Expression::Function(Box::new(
19051 Function::new("SEQUENCE".to_string(), args),
19052 )))
19053 }
19054 None => {
19055 // No step: SEQUENCE(start, end - 1)
19056 let new_end = e_val - 1;
19057 Ok(Expression::Function(Box::new(
19058 Function::new(
19059 "SEQUENCE".to_string(),
19060 vec![
19061 start,
19062 Expression::number(new_end),
19063 ],
19064 ),
19065 )))
19066 }
19067 }
19068 } else {
19069 // Variable end: IF((end - 1) < start, ARRAY(), SEQUENCE(start, (end - 1)))
19070 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
19071 end.clone(),
19072 Expression::number(1),
19073 )));
19074 let cond = Expression::Lt(Box::new(BinaryOp::new(
19075 Expression::Paren(Box::new(Paren {
19076 this: end_m1.clone(),
19077 trailing_comments: Vec::new(),
19078 })),
19079 start.clone(),
19080 )));
19081 let empty = Expression::Function(Box::new(
19082 Function::new("ARRAY".to_string(), vec![]),
19083 ));
19084 let mut seq_args = vec![
19085 start,
19086 Expression::Paren(Box::new(Paren {
19087 this: end_m1,
19088 trailing_comments: Vec::new(),
19089 })),
19090 ];
19091 if let Some(s) = step {
19092 seq_args.push(s);
19093 }
19094 let seq = Expression::Function(Box::new(
19095 Function::new("SEQUENCE".to_string(), seq_args),
19096 ));
19097 Ok(Expression::IfFunc(Box::new(
19098 crate::expressions::IfFunc {
19099 condition: cond,
19100 true_value: empty,
19101 false_value: Some(seq),
19102 original_name: None,
19103 inferred_type: None,
19104 },
19105 )))
19106 }
19107 }
19108 DialectType::SQLite => {
19109 // RANGE(start, end) -> GENERATE_SERIES(start, end)
19110 // The subquery wrapping is handled at the Alias level
19111 let mut args = vec![start, end];
19112 if let Some(s) = step {
19113 args.push(s);
19114 }
19115 Ok(Expression::Function(Box::new(Function::new(
19116 "GENERATE_SERIES".to_string(),
19117 args,
19118 ))))
19119 }
19120 _ => Ok(Expression::Function(f)),
19121 }
19122 }
19123 // ARRAY_REVERSE_SORT -> target-specific
19124 // (handled above as well, but also need DuckDB self-normalization)
19125 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
19126 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
19127 DialectType::Snowflake => Ok(Expression::Function(Box::new(
19128 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
19129 ))),
19130 DialectType::Spark | DialectType::Databricks => {
19131 Ok(Expression::Function(Box::new(Function::new(
19132 "MAP_FROM_ARRAYS".to_string(),
19133 f.args,
19134 ))))
19135 }
19136 _ => Ok(Expression::Function(Box::new(Function::new(
19137 "MAP".to_string(),
19138 f.args,
19139 )))),
19140 },
19141 // VARIANCE(x) -> varSamp(x) for ClickHouse
19142 "VARIANCE" if f.args.len() == 1 => match target {
19143 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
19144 Function::new("varSamp".to_string(), f.args),
19145 ))),
19146 _ => Ok(Expression::Function(f)),
19147 },
19148 // STDDEV(x) -> stddevSamp(x) for ClickHouse
19149 "STDDEV" if f.args.len() == 1 => match target {
19150 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
19151 Function::new("stddevSamp".to_string(), f.args),
19152 ))),
19153 _ => Ok(Expression::Function(f)),
19154 },
19155 // ISINF(x) -> IS_INF(x) for BigQuery
19156 "ISINF" if f.args.len() == 1 => match target {
19157 DialectType::BigQuery => Ok(Expression::Function(Box::new(
19158 Function::new("IS_INF".to_string(), f.args),
19159 ))),
19160 _ => Ok(Expression::Function(f)),
19161 },
19162 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
19163 "CONTAINS" if f.args.len() == 2 => match target {
19164 DialectType::Spark
19165 | DialectType::Databricks
19166 | DialectType::Hive => Ok(Expression::Function(Box::new(
19167 Function::new("ARRAY_CONTAINS".to_string(), f.args),
19168 ))),
19169 _ => Ok(Expression::Function(f)),
19170 },
19171 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
19172 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
19173 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
19174 Ok(Expression::Function(Box::new(Function::new(
19175 "CONTAINS".to_string(),
19176 f.args,
19177 ))))
19178 }
19179 DialectType::DuckDB => Ok(Expression::Function(Box::new(
19180 Function::new("ARRAY_CONTAINS".to_string(), f.args),
19181 ))),
19182 _ => Ok(Expression::Function(f)),
19183 },
19184 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
19185 "TO_UNIXTIME" if f.args.len() == 1 => match target {
19186 DialectType::Hive
19187 | DialectType::Spark
19188 | DialectType::Databricks => Ok(Expression::Function(Box::new(
19189 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
19190 ))),
19191 _ => Ok(Expression::Function(f)),
19192 },
19193 // FROM_UNIXTIME(x) -> target-specific
19194 "FROM_UNIXTIME" if f.args.len() == 1 => {
19195 match target {
19196 DialectType::Hive
19197 | DialectType::Spark
19198 | DialectType::Databricks
19199 | DialectType::Presto
19200 | DialectType::Trino => Ok(Expression::Function(f)),
19201 DialectType::DuckDB => {
19202 // DuckDB: TO_TIMESTAMP(x)
19203 let arg = f.args.into_iter().next().unwrap();
19204 Ok(Expression::Function(Box::new(Function::new(
19205 "TO_TIMESTAMP".to_string(),
19206 vec![arg],
19207 ))))
19208 }
19209 DialectType::PostgreSQL => {
19210 // PG: TO_TIMESTAMP(col)
19211 let arg = f.args.into_iter().next().unwrap();
19212 Ok(Expression::Function(Box::new(Function::new(
19213 "TO_TIMESTAMP".to_string(),
19214 vec![arg],
19215 ))))
19216 }
19217 DialectType::Redshift => {
19218 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
19219 let arg = f.args.into_iter().next().unwrap();
19220 let epoch_ts = Expression::Literal(Box::new(
19221 Literal::Timestamp("epoch".to_string()),
19222 ));
19223 let interval = Expression::Interval(Box::new(
19224 crate::expressions::Interval {
19225 this: Some(Expression::string("1 SECOND")),
19226 unit: None,
19227 },
19228 ));
19229 let mul =
19230 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
19231 let add =
19232 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
19233 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
19234 this: add,
19235 trailing_comments: Vec::new(),
19236 })))
19237 }
19238 _ => Ok(Expression::Function(f)),
19239 }
19240 }
19241 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
19242 "FROM_UNIXTIME"
19243 if f.args.len() == 2
19244 && matches!(
19245 source,
19246 DialectType::Hive
19247 | DialectType::Spark
19248 | DialectType::Databricks
19249 ) =>
19250 {
19251 let mut args = f.args;
19252 let unix_ts = args.remove(0);
19253 let fmt_expr = args.remove(0);
19254 match target {
19255 DialectType::DuckDB => {
19256 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
19257 let to_ts = Expression::Function(Box::new(Function::new(
19258 "TO_TIMESTAMP".to_string(),
19259 vec![unix_ts],
19260 )));
19261 if let Expression::Literal(lit) = &fmt_expr {
19262 if let crate::expressions::Literal::String(s) =
19263 lit.as_ref()
19264 {
19265 let c_fmt = Self::hive_format_to_c_format(s);
19266 Ok(Expression::Function(Box::new(Function::new(
19267 "STRFTIME".to_string(),
19268 vec![to_ts, Expression::string(&c_fmt)],
19269 ))))
19270 } else {
19271 Ok(Expression::Function(Box::new(Function::new(
19272 "STRFTIME".to_string(),
19273 vec![to_ts, fmt_expr],
19274 ))))
19275 }
19276 } else {
19277 Ok(Expression::Function(Box::new(Function::new(
19278 "STRFTIME".to_string(),
19279 vec![to_ts, fmt_expr],
19280 ))))
19281 }
19282 }
19283 DialectType::Presto
19284 | DialectType::Trino
19285 | DialectType::Athena => {
19286 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
19287 let from_unix =
19288 Expression::Function(Box::new(Function::new(
19289 "FROM_UNIXTIME".to_string(),
19290 vec![unix_ts],
19291 )));
19292 if let Expression::Literal(lit) = &fmt_expr {
19293 if let crate::expressions::Literal::String(s) =
19294 lit.as_ref()
19295 {
19296 let p_fmt = Self::hive_format_to_presto_format(s);
19297 Ok(Expression::Function(Box::new(Function::new(
19298 "DATE_FORMAT".to_string(),
19299 vec![from_unix, Expression::string(&p_fmt)],
19300 ))))
19301 } else {
19302 Ok(Expression::Function(Box::new(Function::new(
19303 "DATE_FORMAT".to_string(),
19304 vec![from_unix, fmt_expr],
19305 ))))
19306 }
19307 } else {
19308 Ok(Expression::Function(Box::new(Function::new(
19309 "DATE_FORMAT".to_string(),
19310 vec![from_unix, fmt_expr],
19311 ))))
19312 }
19313 }
19314 _ => {
19315 // Keep as FROM_UNIXTIME(x, fmt) for other targets
19316 Ok(Expression::Function(Box::new(Function::new(
19317 "FROM_UNIXTIME".to_string(),
19318 vec![unix_ts, fmt_expr],
19319 ))))
19320 }
19321 }
19322 }
19323 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
19324 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
19325 let unit_str = Self::get_unit_str_static(&f.args[0]);
19326 // Get the raw unit text preserving original case
19327 let raw_unit = match &f.args[0] {
19328 Expression::Identifier(id) => id.name.clone(),
19329 Expression::Var(v) => v.this.clone(),
19330 Expression::Literal(lit)
19331 if matches!(
19332 lit.as_ref(),
19333 crate::expressions::Literal::String(_)
19334 ) =>
19335 {
19336 let crate::expressions::Literal::String(s) = lit.as_ref()
19337 else {
19338 unreachable!()
19339 };
19340 s.clone()
19341 }
19342 Expression::Column(col) => col.name.name.clone(),
19343 _ => unit_str.clone(),
19344 };
19345 match target {
19346 DialectType::TSQL | DialectType::Fabric => {
19347 // Preserve original case of unit for TSQL
19348 let unit_name = match unit_str.as_str() {
19349 "YY" | "YYYY" => "YEAR".to_string(),
19350 "QQ" | "Q" => "QUARTER".to_string(),
19351 "MM" | "M" => "MONTH".to_string(),
19352 "WK" | "WW" => "WEEK".to_string(),
19353 "DD" | "D" | "DY" => "DAY".to_string(),
19354 "HH" => "HOUR".to_string(),
19355 "MI" | "N" => "MINUTE".to_string(),
19356 "SS" | "S" => "SECOND".to_string(),
19357 _ => raw_unit.clone(), // preserve original case
19358 };
19359 let mut args = f.args;
19360 args[0] =
19361 Expression::Identifier(Identifier::new(&unit_name));
19362 Ok(Expression::Function(Box::new(Function::new(
19363 "DATEPART".to_string(),
19364 args,
19365 ))))
19366 }
19367 DialectType::Spark | DialectType::Databricks => {
19368 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
19369 // Preserve original case for non-abbreviation units
19370 let unit = match unit_str.as_str() {
19371 "YY" | "YYYY" => "YEAR".to_string(),
19372 "QQ" | "Q" => "QUARTER".to_string(),
19373 "MM" | "M" => "MONTH".to_string(),
19374 "WK" | "WW" => "WEEK".to_string(),
19375 "DD" | "D" | "DY" => "DAY".to_string(),
19376 "HH" => "HOUR".to_string(),
19377 "MI" | "N" => "MINUTE".to_string(),
19378 "SS" | "S" => "SECOND".to_string(),
19379 _ => raw_unit, // preserve original case
19380 };
19381 Ok(Expression::Extract(Box::new(
19382 crate::expressions::ExtractFunc {
19383 this: f.args[1].clone(),
19384 field: crate::expressions::DateTimeField::Custom(
19385 unit,
19386 ),
19387 },
19388 )))
19389 }
19390 _ => Ok(Expression::Function(Box::new(Function::new(
19391 "DATE_PART".to_string(),
19392 f.args,
19393 )))),
19394 }
19395 }
19396 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
19397 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
19398 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
19399 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
19400 "DATENAME" if f.args.len() == 2 => {
19401 let unit_str = Self::get_unit_str_static(&f.args[0]);
19402 let date_expr = f.args[1].clone();
19403 match unit_str.as_str() {
19404 "MM" | "M" | "MONTH" => match target {
19405 DialectType::TSQL => {
19406 let cast_date = Expression::Cast(Box::new(
19407 crate::expressions::Cast {
19408 this: date_expr,
19409 to: DataType::Custom {
19410 name: "DATETIME2".to_string(),
19411 },
19412 trailing_comments: Vec::new(),
19413 double_colon_syntax: false,
19414 format: None,
19415 default: None,
19416 inferred_type: None,
19417 },
19418 ));
19419 Ok(Expression::Function(Box::new(Function::new(
19420 "FORMAT".to_string(),
19421 vec![cast_date, Expression::string("MMMM")],
19422 ))))
19423 }
19424 DialectType::Spark | DialectType::Databricks => {
19425 let cast_date = Expression::Cast(Box::new(
19426 crate::expressions::Cast {
19427 this: date_expr,
19428 to: DataType::Timestamp {
19429 timezone: false,
19430 precision: None,
19431 },
19432 trailing_comments: Vec::new(),
19433 double_colon_syntax: false,
19434 format: None,
19435 default: None,
19436 inferred_type: None,
19437 },
19438 ));
19439 Ok(Expression::Function(Box::new(Function::new(
19440 "DATE_FORMAT".to_string(),
19441 vec![cast_date, Expression::string("MMMM")],
19442 ))))
19443 }
19444 _ => Ok(Expression::Function(f)),
19445 },
19446 "DW" | "WEEKDAY" => match target {
19447 DialectType::TSQL => {
19448 let cast_date = Expression::Cast(Box::new(
19449 crate::expressions::Cast {
19450 this: date_expr,
19451 to: DataType::Custom {
19452 name: "DATETIME2".to_string(),
19453 },
19454 trailing_comments: Vec::new(),
19455 double_colon_syntax: false,
19456 format: None,
19457 default: None,
19458 inferred_type: None,
19459 },
19460 ));
19461 Ok(Expression::Function(Box::new(Function::new(
19462 "FORMAT".to_string(),
19463 vec![cast_date, Expression::string("dddd")],
19464 ))))
19465 }
19466 DialectType::Spark | DialectType::Databricks => {
19467 let cast_date = Expression::Cast(Box::new(
19468 crate::expressions::Cast {
19469 this: date_expr,
19470 to: DataType::Timestamp {
19471 timezone: false,
19472 precision: None,
19473 },
19474 trailing_comments: Vec::new(),
19475 double_colon_syntax: false,
19476 format: None,
19477 default: None,
19478 inferred_type: None,
19479 },
19480 ));
19481 Ok(Expression::Function(Box::new(Function::new(
19482 "DATE_FORMAT".to_string(),
19483 vec![cast_date, Expression::string("EEEE")],
19484 ))))
19485 }
19486 _ => Ok(Expression::Function(f)),
19487 },
19488 _ => Ok(Expression::Function(f)),
19489 }
19490 }
19491 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
19492 "STRING_AGG" if f.args.len() >= 2 => {
19493 let x = f.args[0].clone();
19494 let sep = f.args[1].clone();
19495 match target {
19496 DialectType::MySQL
19497 | DialectType::SingleStore
19498 | DialectType::Doris
19499 | DialectType::StarRocks => Ok(Expression::GroupConcat(
19500 Box::new(crate::expressions::GroupConcatFunc {
19501 this: x,
19502 separator: Some(sep),
19503 order_by: None,
19504 distinct: false,
19505 filter: None,
19506 limit: None,
19507 inferred_type: None,
19508 }),
19509 )),
19510 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
19511 crate::expressions::GroupConcatFunc {
19512 this: x,
19513 separator: Some(sep),
19514 order_by: None,
19515 distinct: false,
19516 filter: None,
19517 limit: None,
19518 inferred_type: None,
19519 },
19520 ))),
19521 DialectType::PostgreSQL | DialectType::Redshift => {
19522 Ok(Expression::StringAgg(Box::new(
19523 crate::expressions::StringAggFunc {
19524 this: x,
19525 separator: Some(sep),
19526 order_by: None,
19527 distinct: false,
19528 filter: None,
19529 limit: None,
19530 inferred_type: None,
19531 },
19532 )))
19533 }
19534 _ => Ok(Expression::Function(f)),
19535 }
19536 }
19537 "TRY_DIVIDE" if f.args.len() == 2 => {
19538 let mut args = f.args;
19539 let x = args.remove(0);
19540 let y = args.remove(0);
19541 match target {
19542 DialectType::Spark | DialectType::Databricks => {
19543 Ok(Expression::Function(Box::new(Function::new(
19544 "TRY_DIVIDE".to_string(),
19545 vec![x, y],
19546 ))))
19547 }
19548 DialectType::Snowflake => {
19549 let y_ref = match &y {
19550 Expression::Column(_)
19551 | Expression::Literal(_)
19552 | Expression::Identifier(_) => y.clone(),
19553 _ => Expression::Paren(Box::new(Paren {
19554 this: y.clone(),
19555 trailing_comments: vec![],
19556 })),
19557 };
19558 let x_ref = match &x {
19559 Expression::Column(_)
19560 | Expression::Literal(_)
19561 | Expression::Identifier(_) => x.clone(),
19562 _ => Expression::Paren(Box::new(Paren {
19563 this: x.clone(),
19564 trailing_comments: vec![],
19565 })),
19566 };
19567 let condition = Expression::Neq(Box::new(
19568 crate::expressions::BinaryOp::new(
19569 y_ref.clone(),
19570 Expression::number(0),
19571 ),
19572 ));
19573 let div_expr = Expression::Div(Box::new(
19574 crate::expressions::BinaryOp::new(x_ref, y_ref),
19575 ));
19576 Ok(Expression::IfFunc(Box::new(
19577 crate::expressions::IfFunc {
19578 condition,
19579 true_value: div_expr,
19580 false_value: Some(Expression::Null(Null)),
19581 original_name: Some("IFF".to_string()),
19582 inferred_type: None,
19583 },
19584 )))
19585 }
19586 DialectType::DuckDB => {
19587 let y_ref = match &y {
19588 Expression::Column(_)
19589 | Expression::Literal(_)
19590 | Expression::Identifier(_) => y.clone(),
19591 _ => Expression::Paren(Box::new(Paren {
19592 this: y.clone(),
19593 trailing_comments: vec![],
19594 })),
19595 };
19596 let x_ref = match &x {
19597 Expression::Column(_)
19598 | Expression::Literal(_)
19599 | Expression::Identifier(_) => x.clone(),
19600 _ => Expression::Paren(Box::new(Paren {
19601 this: x.clone(),
19602 trailing_comments: vec![],
19603 })),
19604 };
19605 let condition = Expression::Neq(Box::new(
19606 crate::expressions::BinaryOp::new(
19607 y_ref.clone(),
19608 Expression::number(0),
19609 ),
19610 ));
19611 let div_expr = Expression::Div(Box::new(
19612 crate::expressions::BinaryOp::new(x_ref, y_ref),
19613 ));
19614 Ok(Expression::Case(Box::new(Case {
19615 operand: None,
19616 whens: vec![(condition, div_expr)],
19617 else_: Some(Expression::Null(Null)),
19618 comments: Vec::new(),
19619 inferred_type: None,
19620 })))
19621 }
19622 _ => Ok(Expression::Function(Box::new(Function::new(
19623 "TRY_DIVIDE".to_string(),
19624 vec![x, y],
19625 )))),
19626 }
19627 }
19628 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
19629 "JSON_ARRAYAGG" => match target {
19630 DialectType::PostgreSQL => {
19631 Ok(Expression::Function(Box::new(Function {
19632 name: "JSON_AGG".to_string(),
19633 ..(*f)
19634 })))
19635 }
19636 _ => Ok(Expression::Function(f)),
19637 },
19638 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
19639 "SCHEMA_NAME" => match target {
19640 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
19641 crate::expressions::CurrentSchema { this: None },
19642 ))),
19643 DialectType::SQLite => Ok(Expression::string("main")),
19644 _ => Ok(Expression::Function(f)),
19645 },
19646 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
19647 "TO_TIMESTAMP"
19648 if f.args.len() == 2
19649 && matches!(
19650 source,
19651 DialectType::Spark
19652 | DialectType::Databricks
19653 | DialectType::Hive
19654 )
19655 && matches!(target, DialectType::DuckDB) =>
19656 {
19657 let mut args = f.args;
19658 let val = args.remove(0);
19659 let fmt_expr = args.remove(0);
19660 if let Expression::Literal(ref lit) = fmt_expr {
19661 if let Literal::String(ref s) = lit.as_ref() {
19662 // Convert Java/Spark format to C strptime format
19663 fn java_to_c_fmt(fmt: &str) -> String {
19664 let result = fmt
19665 .replace("yyyy", "%Y")
19666 .replace("SSSSSS", "%f")
19667 .replace("EEEE", "%W")
19668 .replace("MM", "%m")
19669 .replace("dd", "%d")
19670 .replace("HH", "%H")
19671 .replace("mm", "%M")
19672 .replace("ss", "%S")
19673 .replace("yy", "%y");
19674 let mut out = String::new();
19675 let chars: Vec<char> = result.chars().collect();
19676 let mut i = 0;
19677 while i < chars.len() {
19678 if chars[i] == '%' && i + 1 < chars.len() {
19679 out.push(chars[i]);
19680 out.push(chars[i + 1]);
19681 i += 2;
19682 } else if chars[i] == 'z' {
19683 out.push_str("%Z");
19684 i += 1;
19685 } else if chars[i] == 'Z' {
19686 out.push_str("%z");
19687 i += 1;
19688 } else {
19689 out.push(chars[i]);
19690 i += 1;
19691 }
19692 }
19693 out
19694 }
19695 let c_fmt = java_to_c_fmt(s);
19696 Ok(Expression::Function(Box::new(Function::new(
19697 "STRPTIME".to_string(),
19698 vec![val, Expression::string(&c_fmt)],
19699 ))))
19700 } else {
19701 Ok(Expression::Function(Box::new(Function::new(
19702 "STRPTIME".to_string(),
19703 vec![val, fmt_expr],
19704 ))))
19705 }
19706 } else {
19707 Ok(Expression::Function(Box::new(Function::new(
19708 "STRPTIME".to_string(),
19709 vec![val, fmt_expr],
19710 ))))
19711 }
19712 }
19713 // TO_DATE(x) 1-arg from Doris: date conversion
19714 "TO_DATE"
19715 if f.args.len() == 1
19716 && matches!(
19717 source,
19718 DialectType::Doris | DialectType::StarRocks
19719 ) =>
19720 {
19721 let arg = f.args.into_iter().next().unwrap();
19722 match target {
19723 DialectType::Oracle
19724 | DialectType::DuckDB
19725 | DialectType::TSQL => {
19726 // CAST(x AS DATE)
19727 Ok(Expression::Cast(Box::new(Cast {
19728 this: arg,
19729 to: DataType::Date,
19730 double_colon_syntax: false,
19731 trailing_comments: vec![],
19732 format: None,
19733 default: None,
19734 inferred_type: None,
19735 })))
19736 }
19737 DialectType::MySQL | DialectType::SingleStore => {
19738 // DATE(x)
19739 Ok(Expression::Function(Box::new(Function::new(
19740 "DATE".to_string(),
19741 vec![arg],
19742 ))))
19743 }
19744 _ => {
19745 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
19746 Ok(Expression::Function(Box::new(Function::new(
19747 "TO_DATE".to_string(),
19748 vec![arg],
19749 ))))
19750 }
19751 }
19752 }
19753 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
19754 "TO_DATE"
19755 if f.args.len() == 1
19756 && matches!(
19757 source,
19758 DialectType::Spark
19759 | DialectType::Databricks
19760 | DialectType::Hive
19761 ) =>
19762 {
19763 let arg = f.args.into_iter().next().unwrap();
19764 match target {
19765 DialectType::DuckDB => {
19766 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
19767 Ok(Expression::TryCast(Box::new(Cast {
19768 this: arg,
19769 to: DataType::Date,
19770 double_colon_syntax: false,
19771 trailing_comments: vec![],
19772 format: None,
19773 default: None,
19774 inferred_type: None,
19775 })))
19776 }
19777 DialectType::Presto
19778 | DialectType::Trino
19779 | DialectType::Athena => {
19780 // CAST(CAST(x AS TIMESTAMP) AS DATE)
19781 Ok(Self::double_cast_timestamp_date(arg))
19782 }
19783 DialectType::Snowflake => {
19784 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
19785 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
19786 Ok(Expression::Function(Box::new(Function::new(
19787 "TRY_TO_DATE".to_string(),
19788 vec![arg, Expression::string("yyyy-mm-DD")],
19789 ))))
19790 }
19791 _ => {
19792 // Default: keep as TO_DATE(x)
19793 Ok(Expression::Function(Box::new(Function::new(
19794 "TO_DATE".to_string(),
19795 vec![arg],
19796 ))))
19797 }
19798 }
19799 }
19800 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
19801 "TO_DATE"
19802 if f.args.len() == 2
19803 && matches!(
19804 source,
19805 DialectType::Spark
19806 | DialectType::Databricks
19807 | DialectType::Hive
19808 ) =>
19809 {
19810 let mut args = f.args;
19811 let val = args.remove(0);
19812 let fmt_expr = args.remove(0);
19813 let is_default_format = matches!(&fmt_expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "yyyy-MM-dd"));
19814
19815 if is_default_format {
19816 // Default format: same as 1-arg form
19817 match target {
19818 DialectType::DuckDB => {
19819 Ok(Expression::TryCast(Box::new(Cast {
19820 this: val,
19821 to: DataType::Date,
19822 double_colon_syntax: false,
19823 trailing_comments: vec![],
19824 format: None,
19825 default: None,
19826 inferred_type: None,
19827 })))
19828 }
19829 DialectType::Presto
19830 | DialectType::Trino
19831 | DialectType::Athena => {
19832 Ok(Self::double_cast_timestamp_date(val))
19833 }
19834 DialectType::Snowflake => {
19835 // TRY_TO_DATE(x, format) with Snowflake format mapping
19836 let sf_fmt = "yyyy-MM-dd"
19837 .replace("yyyy", "yyyy")
19838 .replace("MM", "mm")
19839 .replace("dd", "DD");
19840 Ok(Expression::Function(Box::new(Function::new(
19841 "TRY_TO_DATE".to_string(),
19842 vec![val, Expression::string(&sf_fmt)],
19843 ))))
19844 }
19845 _ => Ok(Expression::Function(Box::new(Function::new(
19846 "TO_DATE".to_string(),
19847 vec![val],
19848 )))),
19849 }
19850 } else {
19851 // Non-default format: use format-based parsing
19852 if let Expression::Literal(ref lit) = fmt_expr {
19853 if let Literal::String(ref s) = lit.as_ref() {
19854 match target {
19855 DialectType::DuckDB => {
19856 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
19857 fn java_to_c_fmt_todate(fmt: &str) -> String {
19858 let result = fmt
19859 .replace("yyyy", "%Y")
19860 .replace("SSSSSS", "%f")
19861 .replace("EEEE", "%W")
19862 .replace("MM", "%m")
19863 .replace("dd", "%d")
19864 .replace("HH", "%H")
19865 .replace("mm", "%M")
19866 .replace("ss", "%S")
19867 .replace("yy", "%y");
19868 let mut out = String::new();
19869 let chars: Vec<char> =
19870 result.chars().collect();
19871 let mut i = 0;
19872 while i < chars.len() {
19873 if chars[i] == '%'
19874 && i + 1 < chars.len()
19875 {
19876 out.push(chars[i]);
19877 out.push(chars[i + 1]);
19878 i += 2;
19879 } else if chars[i] == 'z' {
19880 out.push_str("%Z");
19881 i += 1;
19882 } else if chars[i] == 'Z' {
19883 out.push_str("%z");
19884 i += 1;
19885 } else {
19886 out.push(chars[i]);
19887 i += 1;
19888 }
19889 }
19890 out
19891 }
19892 let c_fmt = java_to_c_fmt_todate(s);
19893 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
19894 let try_strptime = Expression::Function(
19895 Box::new(Function::new(
19896 "TRY_STRPTIME".to_string(),
19897 vec![val, Expression::string(&c_fmt)],
19898 )),
19899 );
19900 let cast_ts =
19901 Expression::Cast(Box::new(Cast {
19902 this: try_strptime,
19903 to: DataType::Timestamp {
19904 precision: None,
19905 timezone: false,
19906 },
19907 double_colon_syntax: false,
19908 trailing_comments: vec![],
19909 format: None,
19910 default: None,
19911 inferred_type: None,
19912 }));
19913 Ok(Expression::Cast(Box::new(Cast {
19914 this: cast_ts,
19915 to: DataType::Date,
19916 double_colon_syntax: false,
19917 trailing_comments: vec![],
19918 format: None,
19919 default: None,
19920 inferred_type: None,
19921 })))
19922 }
19923 DialectType::Presto
19924 | DialectType::Trino
19925 | DialectType::Athena => {
19926 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
19927 let p_fmt = s
19928 .replace("yyyy", "%Y")
19929 .replace("SSSSSS", "%f")
19930 .replace("MM", "%m")
19931 .replace("dd", "%d")
19932 .replace("HH", "%H")
19933 .replace("mm", "%M")
19934 .replace("ss", "%S")
19935 .replace("yy", "%y");
19936 let date_parse = Expression::Function(
19937 Box::new(Function::new(
19938 "DATE_PARSE".to_string(),
19939 vec![val, Expression::string(&p_fmt)],
19940 )),
19941 );
19942 Ok(Expression::Cast(Box::new(Cast {
19943 this: date_parse,
19944 to: DataType::Date,
19945 double_colon_syntax: false,
19946 trailing_comments: vec![],
19947 format: None,
19948 default: None,
19949 inferred_type: None,
19950 })))
19951 }
19952 DialectType::Snowflake => {
19953 // TRY_TO_DATE(x, snowflake_fmt)
19954 Ok(Expression::Function(Box::new(
19955 Function::new(
19956 "TRY_TO_DATE".to_string(),
19957 vec![val, Expression::string(s)],
19958 ),
19959 )))
19960 }
19961 _ => Ok(Expression::Function(Box::new(
19962 Function::new(
19963 "TO_DATE".to_string(),
19964 vec![val, fmt_expr],
19965 ),
19966 ))),
19967 }
19968 } else {
19969 Ok(Expression::Function(Box::new(Function::new(
19970 "TO_DATE".to_string(),
19971 vec![val, fmt_expr],
19972 ))))
19973 }
19974 } else {
19975 Ok(Expression::Function(Box::new(Function::new(
19976 "TO_DATE".to_string(),
19977 vec![val, fmt_expr],
19978 ))))
19979 }
19980 }
19981 }
19982 // TO_TIMESTAMP(x) 1-arg: epoch conversion
19983 "TO_TIMESTAMP"
19984 if f.args.len() == 1
19985 && matches!(source, DialectType::DuckDB)
19986 && matches!(
19987 target,
19988 DialectType::BigQuery
19989 | DialectType::Presto
19990 | DialectType::Trino
19991 | DialectType::Hive
19992 | DialectType::Spark
19993 | DialectType::Databricks
19994 | DialectType::Athena
19995 ) =>
19996 {
19997 let arg = f.args.into_iter().next().unwrap();
19998 let func_name = match target {
19999 DialectType::BigQuery => "TIMESTAMP_SECONDS",
20000 DialectType::Presto
20001 | DialectType::Trino
20002 | DialectType::Athena
20003 | DialectType::Hive
20004 | DialectType::Spark
20005 | DialectType::Databricks => "FROM_UNIXTIME",
20006 _ => "TO_TIMESTAMP",
20007 };
20008 Ok(Expression::Function(Box::new(Function::new(
20009 func_name.to_string(),
20010 vec![arg],
20011 ))))
20012 }
20013 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
20014 "CONCAT" if f.args.len() == 1 => {
20015 let arg = f.args.into_iter().next().unwrap();
20016 match target {
20017 DialectType::Presto
20018 | DialectType::Trino
20019 | DialectType::Athena => {
20020 // CONCAT(a) -> CAST(a AS VARCHAR)
20021 Ok(Expression::Cast(Box::new(Cast {
20022 this: arg,
20023 to: DataType::VarChar {
20024 length: None,
20025 parenthesized_length: false,
20026 },
20027 trailing_comments: vec![],
20028 double_colon_syntax: false,
20029 format: None,
20030 default: None,
20031 inferred_type: None,
20032 })))
20033 }
20034 DialectType::TSQL => {
20035 // CONCAT(a) -> a
20036 Ok(arg)
20037 }
20038 DialectType::DuckDB => {
20039 // Keep CONCAT(a) for DuckDB (native support)
20040 Ok(Expression::Function(Box::new(Function::new(
20041 "CONCAT".to_string(),
20042 vec![arg],
20043 ))))
20044 }
20045 DialectType::Spark | DialectType::Databricks => {
20046 let coalesced = Expression::Coalesce(Box::new(
20047 crate::expressions::VarArgFunc {
20048 expressions: vec![arg, Expression::string("")],
20049 original_name: None,
20050 inferred_type: None,
20051 },
20052 ));
20053 Ok(Expression::Function(Box::new(Function::new(
20054 "CONCAT".to_string(),
20055 vec![coalesced],
20056 ))))
20057 }
20058 _ => Ok(Expression::Function(Box::new(Function::new(
20059 "CONCAT".to_string(),
20060 vec![arg],
20061 )))),
20062 }
20063 }
20064 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
20065 "REGEXP_EXTRACT"
20066 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
20067 {
20068 // If group_index is 0, drop it
20069 let drop_group = match &f.args[2] {
20070 Expression::Literal(lit)
20071 if matches!(lit.as_ref(), Literal::Number(_)) =>
20072 {
20073 let Literal::Number(n) = lit.as_ref() else {
20074 unreachable!()
20075 };
20076 n == "0"
20077 }
20078 _ => false,
20079 };
20080 if drop_group {
20081 let mut args = f.args;
20082 args.truncate(2);
20083 Ok(Expression::Function(Box::new(Function::new(
20084 "REGEXP_EXTRACT".to_string(),
20085 args,
20086 ))))
20087 } else {
20088 Ok(Expression::Function(f))
20089 }
20090 }
20091 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
20092 "REGEXP_EXTRACT"
20093 if f.args.len() == 4
20094 && matches!(target, DialectType::Snowflake) =>
20095 {
20096 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
20097 let mut args = f.args;
20098 let this = args.remove(0);
20099 let pattern = args.remove(0);
20100 let group = args.remove(0);
20101 let flags = args.remove(0);
20102 Ok(Expression::Function(Box::new(Function::new(
20103 "REGEXP_SUBSTR".to_string(),
20104 vec![
20105 this,
20106 pattern,
20107 Expression::number(1),
20108 Expression::number(1),
20109 flags,
20110 group,
20111 ],
20112 ))))
20113 }
20114 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
20115 "REGEXP_SUBSTR"
20116 if f.args.len() == 3
20117 && matches!(
20118 target,
20119 DialectType::DuckDB
20120 | DialectType::Presto
20121 | DialectType::Trino
20122 | DialectType::Spark
20123 | DialectType::Databricks
20124 ) =>
20125 {
20126 let mut args = f.args;
20127 let this = args.remove(0);
20128 let pattern = args.remove(0);
20129 let position = args.remove(0);
20130 // Wrap subject in SUBSTRING(this, position) to apply the offset
20131 let substring_expr = Expression::Function(Box::new(Function::new(
20132 "SUBSTRING".to_string(),
20133 vec![this, position],
20134 )));
20135 let target_name = match target {
20136 DialectType::DuckDB => "REGEXP_EXTRACT",
20137 _ => "REGEXP_EXTRACT",
20138 };
20139 Ok(Expression::Function(Box::new(Function::new(
20140 target_name.to_string(),
20141 vec![substring_expr, pattern],
20142 ))))
20143 }
20144 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
20145 "TO_DAYS" if f.args.len() == 1 => {
20146 let x = f.args.into_iter().next().unwrap();
20147 let epoch = Expression::string("0000-01-01");
20148 // Build the final target-specific expression directly
20149 let datediff_expr = match target {
20150 DialectType::MySQL | DialectType::SingleStore => {
20151 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
20152 Expression::Function(Box::new(Function::new(
20153 "DATEDIFF".to_string(),
20154 vec![x, epoch],
20155 )))
20156 }
20157 DialectType::DuckDB => {
20158 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
20159 let cast_epoch = Expression::Cast(Box::new(Cast {
20160 this: epoch,
20161 to: DataType::Date,
20162 trailing_comments: Vec::new(),
20163 double_colon_syntax: false,
20164 format: None,
20165 default: None,
20166 inferred_type: None,
20167 }));
20168 let cast_x = Expression::Cast(Box::new(Cast {
20169 this: x,
20170 to: DataType::Date,
20171 trailing_comments: Vec::new(),
20172 double_colon_syntax: false,
20173 format: None,
20174 default: None,
20175 inferred_type: None,
20176 }));
20177 Expression::Function(Box::new(Function::new(
20178 "DATE_DIFF".to_string(),
20179 vec![Expression::string("DAY"), cast_epoch, cast_x],
20180 )))
20181 }
20182 DialectType::Presto
20183 | DialectType::Trino
20184 | DialectType::Athena => {
20185 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
20186 let cast_epoch = Self::double_cast_timestamp_date(epoch);
20187 let cast_x = Self::double_cast_timestamp_date(x);
20188 Expression::Function(Box::new(Function::new(
20189 "DATE_DIFF".to_string(),
20190 vec![Expression::string("DAY"), cast_epoch, cast_x],
20191 )))
20192 }
20193 _ => {
20194 // Default: (DATEDIFF(x, '0000-01-01') + 1)
20195 Expression::Function(Box::new(Function::new(
20196 "DATEDIFF".to_string(),
20197 vec![x, epoch],
20198 )))
20199 }
20200 };
20201 let add_one = Expression::Add(Box::new(BinaryOp::new(
20202 datediff_expr,
20203 Expression::number(1),
20204 )));
20205 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
20206 this: add_one,
20207 trailing_comments: Vec::new(),
20208 })))
20209 }
20210 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
20211 "STR_TO_DATE"
20212 if f.args.len() == 2
20213 && matches!(
20214 target,
20215 DialectType::Presto | DialectType::Trino
20216 ) =>
20217 {
20218 let mut args = f.args;
20219 let x = args.remove(0);
20220 let format_expr = args.remove(0);
20221 // Check if the format contains time components
20222 let has_time = if let Expression::Literal(ref lit) = format_expr {
20223 if let Literal::String(ref fmt) = lit.as_ref() {
20224 fmt.contains("%H")
20225 || fmt.contains("%T")
20226 || fmt.contains("%M")
20227 || fmt.contains("%S")
20228 || fmt.contains("%I")
20229 || fmt.contains("%p")
20230 } else {
20231 false
20232 }
20233 } else {
20234 false
20235 };
20236 let date_parse = Expression::Function(Box::new(Function::new(
20237 "DATE_PARSE".to_string(),
20238 vec![x, format_expr],
20239 )));
20240 if has_time {
20241 // Has time components: just DATE_PARSE
20242 Ok(date_parse)
20243 } else {
20244 // Date-only: CAST(DATE_PARSE(...) AS DATE)
20245 Ok(Expression::Cast(Box::new(Cast {
20246 this: date_parse,
20247 to: DataType::Date,
20248 trailing_comments: Vec::new(),
20249 double_colon_syntax: false,
20250 format: None,
20251 default: None,
20252 inferred_type: None,
20253 })))
20254 }
20255 }
20256 "STR_TO_DATE"
20257 if f.args.len() == 2
20258 && matches!(
20259 target,
20260 DialectType::PostgreSQL | DialectType::Redshift
20261 ) =>
20262 {
20263 let mut args = f.args;
20264 let x = args.remove(0);
20265 let fmt = args.remove(0);
20266 let pg_fmt = match fmt {
20267 Expression::Literal(lit)
20268 if matches!(lit.as_ref(), Literal::String(_)) =>
20269 {
20270 let Literal::String(s) = lit.as_ref() else {
20271 unreachable!()
20272 };
20273 Expression::string(
20274 &s.replace("%Y", "YYYY")
20275 .replace("%m", "MM")
20276 .replace("%d", "DD")
20277 .replace("%H", "HH24")
20278 .replace("%M", "MI")
20279 .replace("%S", "SS"),
20280 )
20281 }
20282 other => other,
20283 };
20284 let to_date = Expression::Function(Box::new(Function::new(
20285 "TO_DATE".to_string(),
20286 vec![x, pg_fmt],
20287 )));
20288 Ok(Expression::Cast(Box::new(Cast {
20289 this: to_date,
20290 to: DataType::Timestamp {
20291 timezone: false,
20292 precision: None,
20293 },
20294 trailing_comments: Vec::new(),
20295 double_colon_syntax: false,
20296 format: None,
20297 default: None,
20298 inferred_type: None,
20299 })))
20300 }
20301 // RANGE(start, end) -> GENERATE_SERIES for SQLite
20302 "RANGE"
20303 if (f.args.len() == 1 || f.args.len() == 2)
20304 && matches!(target, DialectType::SQLite) =>
20305 {
20306 if f.args.len() == 2 {
20307 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
20308 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
20309 let mut args = f.args;
20310 let start = args.remove(0);
20311 let end = args.remove(0);
20312 Ok(Expression::Function(Box::new(Function::new(
20313 "GENERATE_SERIES".to_string(),
20314 vec![start, end],
20315 ))))
20316 } else {
20317 Ok(Expression::Function(f))
20318 }
20319 }
20320 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
20321 // When source is Snowflake, keep as-is (args already in correct form)
20322 "UNIFORM"
20323 if matches!(target, DialectType::Snowflake)
20324 && (f.args.len() == 2 || f.args.len() == 3) =>
20325 {
20326 if matches!(source, DialectType::Snowflake) {
20327 // Snowflake -> Snowflake: keep as-is
20328 Ok(Expression::Function(f))
20329 } else {
20330 let mut args = f.args;
20331 let low = args.remove(0);
20332 let high = args.remove(0);
20333 let random = if !args.is_empty() {
20334 let seed = args.remove(0);
20335 Expression::Function(Box::new(Function::new(
20336 "RANDOM".to_string(),
20337 vec![seed],
20338 )))
20339 } else {
20340 Expression::Function(Box::new(Function::new(
20341 "RANDOM".to_string(),
20342 vec![],
20343 )))
20344 };
20345 Ok(Expression::Function(Box::new(Function::new(
20346 "UNIFORM".to_string(),
20347 vec![low, high, random],
20348 ))))
20349 }
20350 }
20351 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
20352 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
20353 let mut args = f.args;
20354 let ts_arg = args.remove(0);
20355 let tz_arg = args.remove(0);
20356 // Cast string literal to TIMESTAMP for all targets
20357 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
20358 {
20359 Expression::Cast(Box::new(Cast {
20360 this: ts_arg,
20361 to: DataType::Timestamp {
20362 timezone: false,
20363 precision: None,
20364 },
20365 trailing_comments: vec![],
20366 double_colon_syntax: false,
20367 format: None,
20368 default: None,
20369 inferred_type: None,
20370 }))
20371 } else {
20372 ts_arg
20373 };
20374 match target {
20375 DialectType::Spark | DialectType::Databricks => {
20376 Ok(Expression::Function(Box::new(Function::new(
20377 "TO_UTC_TIMESTAMP".to_string(),
20378 vec![ts_cast, tz_arg],
20379 ))))
20380 }
20381 DialectType::Snowflake => {
20382 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
20383 Ok(Expression::Function(Box::new(Function::new(
20384 "CONVERT_TIMEZONE".to_string(),
20385 vec![tz_arg, Expression::string("UTC"), ts_cast],
20386 ))))
20387 }
20388 DialectType::Presto
20389 | DialectType::Trino
20390 | DialectType::Athena => {
20391 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
20392 let wtz = Expression::Function(Box::new(Function::new(
20393 "WITH_TIMEZONE".to_string(),
20394 vec![ts_cast, tz_arg],
20395 )));
20396 Ok(Expression::AtTimeZone(Box::new(
20397 crate::expressions::AtTimeZone {
20398 this: wtz,
20399 zone: Expression::string("UTC"),
20400 },
20401 )))
20402 }
20403 DialectType::BigQuery => {
20404 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
20405 let cast_dt = Expression::Cast(Box::new(Cast {
20406 this: if let Expression::Cast(c) = ts_cast {
20407 c.this
20408 } else {
20409 ts_cast.clone()
20410 },
20411 to: DataType::Custom {
20412 name: "DATETIME".to_string(),
20413 },
20414 trailing_comments: vec![],
20415 double_colon_syntax: false,
20416 format: None,
20417 default: None,
20418 inferred_type: None,
20419 }));
20420 let ts_func =
20421 Expression::Function(Box::new(Function::new(
20422 "TIMESTAMP".to_string(),
20423 vec![cast_dt, tz_arg],
20424 )));
20425 Ok(Expression::Function(Box::new(Function::new(
20426 "DATETIME".to_string(),
20427 vec![ts_func, Expression::string("UTC")],
20428 ))))
20429 }
20430 _ => {
20431 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
20432 let atz1 = Expression::AtTimeZone(Box::new(
20433 crate::expressions::AtTimeZone {
20434 this: ts_cast,
20435 zone: tz_arg,
20436 },
20437 ));
20438 Ok(Expression::AtTimeZone(Box::new(
20439 crate::expressions::AtTimeZone {
20440 this: atz1,
20441 zone: Expression::string("UTC"),
20442 },
20443 )))
20444 }
20445 }
20446 }
20447 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
20448 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
20449 let mut args = f.args;
20450 let ts_arg = args.remove(0);
20451 let tz_arg = args.remove(0);
20452 // Cast string literal to TIMESTAMP
20453 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
20454 {
20455 Expression::Cast(Box::new(Cast {
20456 this: ts_arg,
20457 to: DataType::Timestamp {
20458 timezone: false,
20459 precision: None,
20460 },
20461 trailing_comments: vec![],
20462 double_colon_syntax: false,
20463 format: None,
20464 default: None,
20465 inferred_type: None,
20466 }))
20467 } else {
20468 ts_arg
20469 };
20470 match target {
20471 DialectType::Spark | DialectType::Databricks => {
20472 Ok(Expression::Function(Box::new(Function::new(
20473 "FROM_UTC_TIMESTAMP".to_string(),
20474 vec![ts_cast, tz_arg],
20475 ))))
20476 }
20477 DialectType::Presto
20478 | DialectType::Trino
20479 | DialectType::Athena => {
20480 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
20481 Ok(Expression::Function(Box::new(Function::new(
20482 "AT_TIMEZONE".to_string(),
20483 vec![ts_cast, tz_arg],
20484 ))))
20485 }
20486 DialectType::Snowflake => {
20487 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
20488 Ok(Expression::Function(Box::new(Function::new(
20489 "CONVERT_TIMEZONE".to_string(),
20490 vec![Expression::string("UTC"), tz_arg, ts_cast],
20491 ))))
20492 }
20493 _ => {
20494 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
20495 Ok(Expression::AtTimeZone(Box::new(
20496 crate::expressions::AtTimeZone {
20497 this: ts_cast,
20498 zone: tz_arg,
20499 },
20500 )))
20501 }
20502 }
20503 }
20504 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
20505 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
20506 let name = match target {
20507 DialectType::Snowflake => "OBJECT_CONSTRUCT",
20508 _ => "MAP",
20509 };
20510 Ok(Expression::Function(Box::new(Function::new(
20511 name.to_string(),
20512 f.args,
20513 ))))
20514 }
20515 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
20516 "STR_TO_MAP" if f.args.len() >= 1 => match target {
20517 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20518 Ok(Expression::Function(Box::new(Function::new(
20519 "SPLIT_TO_MAP".to_string(),
20520 f.args,
20521 ))))
20522 }
20523 _ => Ok(Expression::Function(f)),
20524 },
20525 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
20526 "TIME_TO_STR" if f.args.len() == 2 => {
20527 let mut args = f.args;
20528 let this = args.remove(0);
20529 let fmt_expr = args.remove(0);
20530 let format = if let Expression::Literal(lit) = fmt_expr {
20531 if let Literal::String(s) = lit.as_ref() {
20532 s.clone()
20533 } else {
20534 String::new()
20535 }
20536 } else {
20537 "%Y-%m-%d %H:%M:%S".to_string()
20538 };
20539 Ok(Expression::TimeToStr(Box::new(
20540 crate::expressions::TimeToStr {
20541 this: Box::new(this),
20542 format,
20543 culture: None,
20544 zone: None,
20545 },
20546 )))
20547 }
20548 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
20549 "STR_TO_TIME" if f.args.len() == 2 => {
20550 let mut args = f.args;
20551 let this = args.remove(0);
20552 let fmt_expr = args.remove(0);
20553 let format = if let Expression::Literal(lit) = fmt_expr {
20554 if let Literal::String(s) = lit.as_ref() {
20555 s.clone()
20556 } else {
20557 String::new()
20558 }
20559 } else {
20560 "%Y-%m-%d %H:%M:%S".to_string()
20561 };
20562 Ok(Expression::StrToTime(Box::new(
20563 crate::expressions::StrToTime {
20564 this: Box::new(this),
20565 format,
20566 zone: None,
20567 safe: None,
20568 target_type: None,
20569 },
20570 )))
20571 }
20572 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
20573 "STR_TO_UNIX" if f.args.len() >= 1 => {
20574 let mut args = f.args;
20575 let this = args.remove(0);
20576 let format = if !args.is_empty() {
20577 if let Expression::Literal(lit) = args.remove(0) {
20578 if let Literal::String(s) = lit.as_ref() {
20579 Some(s.clone())
20580 } else {
20581 None
20582 }
20583 } else {
20584 None
20585 }
20586 } else {
20587 None
20588 };
20589 Ok(Expression::StrToUnix(Box::new(
20590 crate::expressions::StrToUnix {
20591 this: Some(Box::new(this)),
20592 format,
20593 },
20594 )))
20595 }
20596 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
20597 "TIME_TO_UNIX" if f.args.len() == 1 => {
20598 let mut args = f.args;
20599 let this = args.remove(0);
20600 Ok(Expression::TimeToUnix(Box::new(
20601 crate::expressions::UnaryFunc {
20602 this,
20603 original_name: None,
20604 inferred_type: None,
20605 },
20606 )))
20607 }
20608 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
20609 "UNIX_TO_STR" if f.args.len() >= 1 => {
20610 let mut args = f.args;
20611 let this = args.remove(0);
20612 let format = if !args.is_empty() {
20613 if let Expression::Literal(lit) = args.remove(0) {
20614 if let Literal::String(s) = lit.as_ref() {
20615 Some(s.clone())
20616 } else {
20617 None
20618 }
20619 } else {
20620 None
20621 }
20622 } else {
20623 None
20624 };
20625 Ok(Expression::UnixToStr(Box::new(
20626 crate::expressions::UnixToStr {
20627 this: Box::new(this),
20628 format,
20629 },
20630 )))
20631 }
20632 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
20633 "UNIX_TO_TIME" if f.args.len() == 1 => {
20634 let mut args = f.args;
20635 let this = args.remove(0);
20636 Ok(Expression::UnixToTime(Box::new(
20637 crate::expressions::UnixToTime {
20638 this: Box::new(this),
20639 scale: None,
20640 zone: None,
20641 hours: None,
20642 minutes: None,
20643 format: None,
20644 target_type: None,
20645 },
20646 )))
20647 }
20648 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
20649 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
20650 let mut args = f.args;
20651 let this = args.remove(0);
20652 Ok(Expression::TimeStrToDate(Box::new(
20653 crate::expressions::UnaryFunc {
20654 this,
20655 original_name: None,
20656 inferred_type: None,
20657 },
20658 )))
20659 }
20660 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
20661 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
20662 let mut args = f.args;
20663 let this = args.remove(0);
20664 Ok(Expression::TimeStrToTime(Box::new(
20665 crate::expressions::TimeStrToTime {
20666 this: Box::new(this),
20667 zone: None,
20668 },
20669 )))
20670 }
20671 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
20672 "MONTHS_BETWEEN" if f.args.len() == 2 => {
20673 match target {
20674 DialectType::DuckDB => {
20675 let mut args = f.args;
20676 let end_date = args.remove(0);
20677 let start_date = args.remove(0);
20678 let cast_end = Self::ensure_cast_date(end_date);
20679 let cast_start = Self::ensure_cast_date(start_date);
20680 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
20681 let dd = Expression::Function(Box::new(Function::new(
20682 "DATE_DIFF".to_string(),
20683 vec![
20684 Expression::string("MONTH"),
20685 cast_start.clone(),
20686 cast_end.clone(),
20687 ],
20688 )));
20689 let day_end =
20690 Expression::Function(Box::new(Function::new(
20691 "DAY".to_string(),
20692 vec![cast_end.clone()],
20693 )));
20694 let day_start =
20695 Expression::Function(Box::new(Function::new(
20696 "DAY".to_string(),
20697 vec![cast_start.clone()],
20698 )));
20699 let last_day_end =
20700 Expression::Function(Box::new(Function::new(
20701 "LAST_DAY".to_string(),
20702 vec![cast_end.clone()],
20703 )));
20704 let last_day_start =
20705 Expression::Function(Box::new(Function::new(
20706 "LAST_DAY".to_string(),
20707 vec![cast_start.clone()],
20708 )));
20709 let day_last_end = Expression::Function(Box::new(
20710 Function::new("DAY".to_string(), vec![last_day_end]),
20711 ));
20712 let day_last_start = Expression::Function(Box::new(
20713 Function::new("DAY".to_string(), vec![last_day_start]),
20714 ));
20715 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
20716 day_end.clone(),
20717 day_last_end,
20718 )));
20719 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
20720 day_start.clone(),
20721 day_last_start,
20722 )));
20723 let both_cond =
20724 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
20725 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
20726 day_end, day_start,
20727 )));
20728 let day_diff_paren = Expression::Paren(Box::new(
20729 crate::expressions::Paren {
20730 this: day_diff,
20731 trailing_comments: Vec::new(),
20732 },
20733 ));
20734 let frac = Expression::Div(Box::new(BinaryOp::new(
20735 day_diff_paren,
20736 Expression::Literal(Box::new(Literal::Number(
20737 "31.0".to_string(),
20738 ))),
20739 )));
20740 let case_expr = Expression::Case(Box::new(Case {
20741 operand: None,
20742 whens: vec![(both_cond, Expression::number(0))],
20743 else_: Some(frac),
20744 comments: Vec::new(),
20745 inferred_type: None,
20746 }));
20747 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
20748 }
20749 DialectType::Snowflake | DialectType::Redshift => {
20750 let mut args = f.args;
20751 let end_date = args.remove(0);
20752 let start_date = args.remove(0);
20753 let unit = Expression::Identifier(Identifier::new("MONTH"));
20754 Ok(Expression::Function(Box::new(Function::new(
20755 "DATEDIFF".to_string(),
20756 vec![unit, start_date, end_date],
20757 ))))
20758 }
20759 DialectType::Presto
20760 | DialectType::Trino
20761 | DialectType::Athena => {
20762 let mut args = f.args;
20763 let end_date = args.remove(0);
20764 let start_date = args.remove(0);
20765 Ok(Expression::Function(Box::new(Function::new(
20766 "DATE_DIFF".to_string(),
20767 vec![Expression::string("MONTH"), start_date, end_date],
20768 ))))
20769 }
20770 _ => Ok(Expression::Function(f)),
20771 }
20772 }
20773 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
20774 // Drop the roundOff arg for non-Spark targets, keep it for Spark
20775 "MONTHS_BETWEEN" if f.args.len() == 3 => {
20776 match target {
20777 DialectType::Spark | DialectType::Databricks => {
20778 Ok(Expression::Function(f))
20779 }
20780 _ => {
20781 // Drop the 3rd arg and delegate to the 2-arg logic
20782 let mut args = f.args;
20783 let end_date = args.remove(0);
20784 let start_date = args.remove(0);
20785 // Re-create as 2-arg and process
20786 let f2 = Function::new(
20787 "MONTHS_BETWEEN".to_string(),
20788 vec![end_date, start_date],
20789 );
20790 let e2 = Expression::Function(Box::new(f2));
20791 Self::cross_dialect_normalize(e2, source, target)
20792 }
20793 }
20794 }
20795 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
20796 "TO_TIMESTAMP"
20797 if f.args.len() == 1
20798 && matches!(
20799 source,
20800 DialectType::Spark
20801 | DialectType::Databricks
20802 | DialectType::Hive
20803 ) =>
20804 {
20805 let arg = f.args.into_iter().next().unwrap();
20806 Ok(Expression::Cast(Box::new(Cast {
20807 this: arg,
20808 to: DataType::Timestamp {
20809 timezone: false,
20810 precision: None,
20811 },
20812 trailing_comments: vec![],
20813 double_colon_syntax: false,
20814 format: None,
20815 default: None,
20816 inferred_type: None,
20817 })))
20818 }
20819 // STRING(x) -> CAST(x AS STRING) for Spark target
20820 "STRING"
20821 if f.args.len() == 1
20822 && matches!(
20823 source,
20824 DialectType::Spark | DialectType::Databricks
20825 ) =>
20826 {
20827 let arg = f.args.into_iter().next().unwrap();
20828 let dt = match target {
20829 DialectType::Spark
20830 | DialectType::Databricks
20831 | DialectType::Hive => DataType::Custom {
20832 name: "STRING".to_string(),
20833 },
20834 _ => DataType::Text,
20835 };
20836 Ok(Expression::Cast(Box::new(Cast {
20837 this: arg,
20838 to: dt,
20839 trailing_comments: vec![],
20840 double_colon_syntax: false,
20841 format: None,
20842 default: None,
20843 inferred_type: None,
20844 })))
20845 }
20846 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
20847 "LOGICAL_OR" if f.args.len() == 1 => {
20848 let name = match target {
20849 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
20850 _ => "LOGICAL_OR",
20851 };
20852 Ok(Expression::Function(Box::new(Function::new(
20853 name.to_string(),
20854 f.args,
20855 ))))
20856 }
20857 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
20858 "SPLIT"
20859 if f.args.len() == 2
20860 && matches!(
20861 source,
20862 DialectType::Spark
20863 | DialectType::Databricks
20864 | DialectType::Hive
20865 ) =>
20866 {
20867 let name = match target {
20868 DialectType::DuckDB => "STR_SPLIT_REGEX",
20869 DialectType::Presto
20870 | DialectType::Trino
20871 | DialectType::Athena => "REGEXP_SPLIT",
20872 DialectType::Spark
20873 | DialectType::Databricks
20874 | DialectType::Hive => "SPLIT",
20875 _ => "SPLIT",
20876 };
20877 Ok(Expression::Function(Box::new(Function::new(
20878 name.to_string(),
20879 f.args,
20880 ))))
20881 }
20882 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
20883 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
20884 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20885 Ok(Expression::Function(Box::new(Function::new(
20886 "ELEMENT_AT".to_string(),
20887 f.args,
20888 ))))
20889 }
20890 DialectType::DuckDB => {
20891 let mut args = f.args;
20892 let arr = args.remove(0);
20893 let idx = args.remove(0);
20894 Ok(Expression::Subscript(Box::new(
20895 crate::expressions::Subscript {
20896 this: arr,
20897 index: idx,
20898 },
20899 )))
20900 }
20901 _ => Ok(Expression::Function(f)),
20902 },
20903 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
20904 "ARRAY_FILTER" if f.args.len() == 2 => {
20905 let name = match target {
20906 DialectType::DuckDB => "LIST_FILTER",
20907 DialectType::StarRocks => "ARRAY_FILTER",
20908 _ => "FILTER",
20909 };
20910 Ok(Expression::Function(Box::new(Function::new(
20911 name.to_string(),
20912 f.args,
20913 ))))
20914 }
20915 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
20916 "FILTER" if f.args.len() == 2 => {
20917 let name = match target {
20918 DialectType::DuckDB => "LIST_FILTER",
20919 DialectType::StarRocks => "ARRAY_FILTER",
20920 _ => "FILTER",
20921 };
20922 Ok(Expression::Function(Box::new(Function::new(
20923 name.to_string(),
20924 f.args,
20925 ))))
20926 }
20927 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
20928 "REDUCE" if f.args.len() >= 3 => {
20929 let name = match target {
20930 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
20931 _ => "REDUCE",
20932 };
20933 Ok(Expression::Function(Box::new(Function::new(
20934 name.to_string(),
20935 f.args,
20936 ))))
20937 }
20938 // CURRENT_SCHEMA() -> dialect-specific
20939 "CURRENT_SCHEMA" => {
20940 match target {
20941 DialectType::PostgreSQL => {
20942 // PostgreSQL: CURRENT_SCHEMA (no parens)
20943 Ok(Expression::Function(Box::new(Function {
20944 name: "CURRENT_SCHEMA".to_string(),
20945 args: vec![],
20946 distinct: false,
20947 trailing_comments: vec![],
20948 use_bracket_syntax: false,
20949 no_parens: true,
20950 quoted: false,
20951 span: None,
20952 inferred_type: None,
20953 })))
20954 }
20955 DialectType::MySQL
20956 | DialectType::Doris
20957 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
20958 Function::new("SCHEMA".to_string(), vec![]),
20959 ))),
20960 DialectType::TSQL => Ok(Expression::Function(Box::new(
20961 Function::new("SCHEMA_NAME".to_string(), vec![]),
20962 ))),
20963 DialectType::SQLite => Ok(Expression::Literal(Box::new(
20964 Literal::String("main".to_string()),
20965 ))),
20966 _ => Ok(Expression::Function(f)),
20967 }
20968 }
20969 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
20970 "LTRIM" if f.args.len() == 2 => match target {
20971 DialectType::Spark
20972 | DialectType::Hive
20973 | DialectType::Databricks
20974 | DialectType::ClickHouse => {
20975 let mut args = f.args;
20976 let str_expr = args.remove(0);
20977 let chars = args.remove(0);
20978 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
20979 this: str_expr,
20980 characters: Some(chars),
20981 position: crate::expressions::TrimPosition::Leading,
20982 sql_standard_syntax: true,
20983 position_explicit: true,
20984 })))
20985 }
20986 _ => Ok(Expression::Function(f)),
20987 },
20988 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
20989 "RTRIM" if f.args.len() == 2 => match target {
20990 DialectType::Spark
20991 | DialectType::Hive
20992 | DialectType::Databricks
20993 | DialectType::ClickHouse => {
20994 let mut args = f.args;
20995 let str_expr = args.remove(0);
20996 let chars = args.remove(0);
20997 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
20998 this: str_expr,
20999 characters: Some(chars),
21000 position: crate::expressions::TrimPosition::Trailing,
21001 sql_standard_syntax: true,
21002 position_explicit: true,
21003 })))
21004 }
21005 _ => Ok(Expression::Function(f)),
21006 },
21007 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
21008 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
21009 DialectType::ClickHouse => {
21010 let mut new_f = *f;
21011 new_f.name = "arrayReverse".to_string();
21012 Ok(Expression::Function(Box::new(new_f)))
21013 }
21014 _ => Ok(Expression::Function(f)),
21015 },
21016 // UUID() -> NEWID() for TSQL
21017 "UUID" if f.args.is_empty() => match target {
21018 DialectType::TSQL | DialectType::Fabric => {
21019 Ok(Expression::Function(Box::new(Function::new(
21020 "NEWID".to_string(),
21021 vec![],
21022 ))))
21023 }
21024 _ => Ok(Expression::Function(f)),
21025 },
21026 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
21027 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
21028 DialectType::ClickHouse => {
21029 let mut new_f = *f;
21030 new_f.name = "farmFingerprint64".to_string();
21031 Ok(Expression::Function(Box::new(new_f)))
21032 }
21033 DialectType::Redshift => {
21034 let mut new_f = *f;
21035 new_f.name = "FARMFINGERPRINT64".to_string();
21036 Ok(Expression::Function(Box::new(new_f)))
21037 }
21038 _ => Ok(Expression::Function(f)),
21039 },
21040 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
21041 "JSON_KEYS" => match target {
21042 DialectType::Databricks | DialectType::Spark => {
21043 let mut new_f = *f;
21044 new_f.name = "JSON_OBJECT_KEYS".to_string();
21045 Ok(Expression::Function(Box::new(new_f)))
21046 }
21047 DialectType::Snowflake => {
21048 let mut new_f = *f;
21049 new_f.name = "OBJECT_KEYS".to_string();
21050 Ok(Expression::Function(Box::new(new_f)))
21051 }
21052 _ => Ok(Expression::Function(f)),
21053 },
21054 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
21055 "WEEKOFYEAR" => match target {
21056 DialectType::Snowflake => {
21057 let mut new_f = *f;
21058 new_f.name = "WEEKISO".to_string();
21059 Ok(Expression::Function(Box::new(new_f)))
21060 }
21061 _ => Ok(Expression::Function(f)),
21062 },
21063 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
21064 "FORMAT"
21065 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
21066 {
21067 match target {
21068 DialectType::Databricks | DialectType::Spark => {
21069 let mut new_f = *f;
21070 new_f.name = "FORMAT_STRING".to_string();
21071 Ok(Expression::Function(Box::new(new_f)))
21072 }
21073 _ => Ok(Expression::Function(f)),
21074 }
21075 }
21076 // CONCAT_WS from Generic is null-propagating in SQLGlot fixtures.
21077 // Trino also requires non-separator arguments cast to VARCHAR.
21078 "CONCAT_WS" if f.args.len() >= 2 => {
21079 fn concat_ws_null_case(
21080 args: Vec<Expression>,
21081 else_expr: Expression,
21082 ) -> Expression {
21083 let mut null_checks = args.iter().cloned().map(|arg| {
21084 Expression::IsNull(Box::new(crate::expressions::IsNull {
21085 this: arg,
21086 not: false,
21087 postfix_form: false,
21088 }))
21089 });
21090 let first_null_check = null_checks
21091 .next()
21092 .expect("CONCAT_WS with >= 2 args must yield a null check");
21093 let null_check =
21094 null_checks.fold(first_null_check, |left, right| {
21095 Expression::Or(Box::new(BinaryOp {
21096 left,
21097 right,
21098 left_comments: Vec::new(),
21099 operator_comments: Vec::new(),
21100 trailing_comments: Vec::new(),
21101 inferred_type: None,
21102 }))
21103 });
21104 Expression::Case(Box::new(Case {
21105 operand: None,
21106 whens: vec![(null_check, Expression::Null(Null))],
21107 else_: Some(else_expr),
21108 comments: vec![],
21109 inferred_type: None,
21110 }))
21111 }
21112
21113 match target {
21114 DialectType::Trino
21115 if matches!(source, DialectType::Generic) =>
21116 {
21117 let original_args = f.args.clone();
21118 let mut args = f.args;
21119 let sep = args.remove(0);
21120 let cast_args: Vec<Expression> = args
21121 .into_iter()
21122 .map(|a| {
21123 Expression::Cast(Box::new(Cast {
21124 this: a,
21125 to: DataType::VarChar {
21126 length: None,
21127 parenthesized_length: false,
21128 },
21129 double_colon_syntax: false,
21130 trailing_comments: Vec::new(),
21131 format: None,
21132 default: None,
21133 inferred_type: None,
21134 }))
21135 })
21136 .collect();
21137 let mut new_args = vec![sep];
21138 new_args.extend(cast_args);
21139 let else_expr = Expression::Function(Box::new(
21140 Function::new("CONCAT_WS".to_string(), new_args),
21141 ));
21142 Ok(concat_ws_null_case(original_args, else_expr))
21143 }
21144 DialectType::Presto
21145 | DialectType::Trino
21146 | DialectType::Athena => {
21147 let mut args = f.args;
21148 let sep = args.remove(0);
21149 let cast_args: Vec<Expression> = args
21150 .into_iter()
21151 .map(|a| {
21152 Expression::Cast(Box::new(Cast {
21153 this: a,
21154 to: DataType::VarChar {
21155 length: None,
21156 parenthesized_length: false,
21157 },
21158 double_colon_syntax: false,
21159 trailing_comments: Vec::new(),
21160 format: None,
21161 default: None,
21162 inferred_type: None,
21163 }))
21164 })
21165 .collect();
21166 let mut new_args = vec![sep];
21167 new_args.extend(cast_args);
21168 Ok(Expression::Function(Box::new(Function::new(
21169 "CONCAT_WS".to_string(),
21170 new_args,
21171 ))))
21172 }
21173 DialectType::Spark
21174 | DialectType::Hive
21175 | DialectType::DuckDB
21176 if matches!(source, DialectType::Generic) =>
21177 {
21178 let args = f.args;
21179 let else_expr = Expression::Function(Box::new(
21180 Function::new("CONCAT_WS".to_string(), args.clone()),
21181 ));
21182 Ok(concat_ws_null_case(args, else_expr))
21183 }
21184 _ => Ok(Expression::Function(f)),
21185 }
21186 }
21187 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
21188 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
21189 DialectType::DuckDB
21190 if f.args.len() == 3
21191 && matches!(source, DialectType::Snowflake) =>
21192 {
21193 // Snowflake ARRAY_SLICE (0-indexed, exclusive end)
21194 // -> DuckDB ARRAY_SLICE (1-indexed, inclusive end)
21195 let mut args = f.args;
21196 let arr = args.remove(0);
21197 let start = args.remove(0);
21198 let end = args.remove(0);
21199
21200 // CASE WHEN start >= 0 THEN start + 1 ELSE start END
21201 let adjusted_start = Expression::Case(Box::new(Case {
21202 operand: None,
21203 whens: vec![(
21204 Expression::Gte(Box::new(BinaryOp {
21205 left: start.clone(),
21206 right: Expression::number(0),
21207 left_comments: vec![],
21208 operator_comments: vec![],
21209 trailing_comments: vec![],
21210 inferred_type: None,
21211 })),
21212 Expression::Add(Box::new(BinaryOp {
21213 left: start.clone(),
21214 right: Expression::number(1),
21215 left_comments: vec![],
21216 operator_comments: vec![],
21217 trailing_comments: vec![],
21218 inferred_type: None,
21219 })),
21220 )],
21221 else_: Some(start),
21222 comments: vec![],
21223 inferred_type: None,
21224 }));
21225
21226 // CASE WHEN end < 0 THEN end - 1 ELSE end END
21227 let adjusted_end = Expression::Case(Box::new(Case {
21228 operand: None,
21229 whens: vec![(
21230 Expression::Lt(Box::new(BinaryOp {
21231 left: end.clone(),
21232 right: Expression::number(0),
21233 left_comments: vec![],
21234 operator_comments: vec![],
21235 trailing_comments: vec![],
21236 inferred_type: None,
21237 })),
21238 Expression::Sub(Box::new(BinaryOp {
21239 left: end.clone(),
21240 right: Expression::number(1),
21241 left_comments: vec![],
21242 operator_comments: vec![],
21243 trailing_comments: vec![],
21244 inferred_type: None,
21245 })),
21246 )],
21247 else_: Some(end),
21248 comments: vec![],
21249 inferred_type: None,
21250 }));
21251
21252 Ok(Expression::Function(Box::new(Function::new(
21253 "ARRAY_SLICE".to_string(),
21254 vec![arr, adjusted_start, adjusted_end],
21255 ))))
21256 }
21257 DialectType::Presto
21258 | DialectType::Trino
21259 | DialectType::Athena
21260 | DialectType::Databricks
21261 | DialectType::Spark => {
21262 let mut new_f = *f;
21263 new_f.name = "SLICE".to_string();
21264 Ok(Expression::Function(Box::new(new_f)))
21265 }
21266 DialectType::ClickHouse => {
21267 let mut new_f = *f;
21268 new_f.name = "arraySlice".to_string();
21269 Ok(Expression::Function(Box::new(new_f)))
21270 }
21271 _ => Ok(Expression::Function(f)),
21272 },
21273 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
21274 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
21275 DialectType::DuckDB => {
21276 let mut args = f.args;
21277 let arr = args.remove(0);
21278 let val = args.remove(0);
21279 Ok(Expression::Function(Box::new(Function::new(
21280 "LIST_PREPEND".to_string(),
21281 vec![val, arr],
21282 ))))
21283 }
21284 _ => Ok(Expression::Function(f)),
21285 },
21286 // ARRAY_REMOVE(arr, target) -> dialect-specific
21287 "ARRAY_REMOVE" if f.args.len() == 2 => {
21288 match target {
21289 DialectType::DuckDB => {
21290 let mut args = f.args;
21291 let arr = args.remove(0);
21292 let target_val = args.remove(0);
21293 let u_id = crate::expressions::Identifier::new("_u");
21294 // LIST_FILTER(arr, _u -> _u <> target)
21295 let lambda = Expression::Lambda(Box::new(
21296 crate::expressions::LambdaExpr {
21297 parameters: vec![u_id.clone()],
21298 body: Expression::Neq(Box::new(BinaryOp {
21299 left: Expression::Identifier(u_id),
21300 right: target_val,
21301 left_comments: Vec::new(),
21302 operator_comments: Vec::new(),
21303 trailing_comments: Vec::new(),
21304 inferred_type: None,
21305 })),
21306 colon: false,
21307 parameter_types: Vec::new(),
21308 },
21309 ));
21310 Ok(Expression::Function(Box::new(Function::new(
21311 "LIST_FILTER".to_string(),
21312 vec![arr, lambda],
21313 ))))
21314 }
21315 DialectType::ClickHouse => {
21316 let mut args = f.args;
21317 let arr = args.remove(0);
21318 let target_val = args.remove(0);
21319 let u_id = crate::expressions::Identifier::new("_u");
21320 // arrayFilter(_u -> _u <> target, arr)
21321 let lambda = Expression::Lambda(Box::new(
21322 crate::expressions::LambdaExpr {
21323 parameters: vec![u_id.clone()],
21324 body: Expression::Neq(Box::new(BinaryOp {
21325 left: Expression::Identifier(u_id),
21326 right: target_val,
21327 left_comments: Vec::new(),
21328 operator_comments: Vec::new(),
21329 trailing_comments: Vec::new(),
21330 inferred_type: None,
21331 })),
21332 colon: false,
21333 parameter_types: Vec::new(),
21334 },
21335 ));
21336 Ok(Expression::Function(Box::new(Function::new(
21337 "arrayFilter".to_string(),
21338 vec![lambda, arr],
21339 ))))
21340 }
21341 DialectType::BigQuery => {
21342 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
21343 let mut args = f.args;
21344 let arr = args.remove(0);
21345 let target_val = args.remove(0);
21346 let u_id = crate::expressions::Identifier::new("_u");
21347 let u_col = Expression::Column(Box::new(
21348 crate::expressions::Column {
21349 name: u_id.clone(),
21350 table: None,
21351 join_mark: false,
21352 trailing_comments: Vec::new(),
21353 span: None,
21354 inferred_type: None,
21355 },
21356 ));
21357 // UNNEST(the_array) AS _u
21358 let unnest_expr = Expression::Unnest(Box::new(
21359 crate::expressions::UnnestFunc {
21360 this: arr,
21361 expressions: Vec::new(),
21362 with_ordinality: false,
21363 alias: None,
21364 offset_alias: None,
21365 },
21366 ));
21367 let aliased_unnest = Expression::Alias(Box::new(
21368 crate::expressions::Alias {
21369 this: unnest_expr,
21370 alias: u_id.clone(),
21371 column_aliases: Vec::new(),
21372 alias_explicit_as: false,
21373 alias_keyword: None,
21374 pre_alias_comments: Vec::new(),
21375 trailing_comments: Vec::new(),
21376 inferred_type: None,
21377 },
21378 ));
21379 // _u <> target
21380 let where_cond = Expression::Neq(Box::new(BinaryOp {
21381 left: u_col.clone(),
21382 right: target_val,
21383 left_comments: Vec::new(),
21384 operator_comments: Vec::new(),
21385 trailing_comments: Vec::new(),
21386 inferred_type: None,
21387 }));
21388 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
21389 let subquery = Expression::Select(Box::new(
21390 crate::expressions::Select::new()
21391 .column(u_col)
21392 .from(aliased_unnest)
21393 .where_(where_cond),
21394 ));
21395 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
21396 Ok(Expression::ArrayFunc(Box::new(
21397 crate::expressions::ArrayConstructor {
21398 expressions: vec![subquery],
21399 bracket_notation: false,
21400 use_list_keyword: false,
21401 },
21402 )))
21403 }
21404 _ => Ok(Expression::Function(f)),
21405 }
21406 }
21407 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
21408 "PARSE_JSON" if f.args.len() == 1 => {
21409 match target {
21410 DialectType::SQLite
21411 | DialectType::Doris
21412 | DialectType::MySQL
21413 | DialectType::StarRocks => {
21414 // Strip PARSE_JSON, return the inner argument
21415 Ok(f.args.into_iter().next().unwrap())
21416 }
21417 _ => Ok(Expression::Function(f)),
21418 }
21419 }
21420 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
21421 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
21422 "JSON_REMOVE" => Ok(Expression::Function(f)),
21423 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
21424 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
21425 "JSON_SET" => Ok(Expression::Function(f)),
21426 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
21427 // Behavior per search value type:
21428 // NULL literal -> CASE WHEN x IS NULL THEN result
21429 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
21430 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
21431 "DECODE" if f.args.len() >= 3 => {
21432 // Keep as DECODE for targets that support it natively
21433 let keep_as_decode = matches!(
21434 target,
21435 DialectType::Oracle
21436 | DialectType::Snowflake
21437 | DialectType::Redshift
21438 | DialectType::Teradata
21439 | DialectType::Spark
21440 | DialectType::Databricks
21441 );
21442 if keep_as_decode {
21443 return Ok(Expression::Function(f));
21444 }
21445
21446 let mut args = f.args;
21447 let this_expr = args.remove(0);
21448 let mut pairs = Vec::new();
21449 let mut default = None;
21450 let mut i = 0;
21451 while i + 1 < args.len() {
21452 pairs.push((args[i].clone(), args[i + 1].clone()));
21453 i += 2;
21454 }
21455 if i < args.len() {
21456 default = Some(args[i].clone());
21457 }
21458 // Helper: check if expression is a literal value
21459 fn is_literal(e: &Expression) -> bool {
21460 matches!(
21461 e,
21462 Expression::Literal(_)
21463 | Expression::Boolean(_)
21464 | Expression::Neg(_)
21465 )
21466 }
21467 let whens: Vec<(Expression, Expression)> = pairs
21468 .into_iter()
21469 .map(|(search, result)| {
21470 if matches!(&search, Expression::Null(_)) {
21471 // NULL search -> IS NULL
21472 let condition = Expression::Is(Box::new(BinaryOp {
21473 left: this_expr.clone(),
21474 right: Expression::Null(crate::expressions::Null),
21475 left_comments: Vec::new(),
21476 operator_comments: Vec::new(),
21477 trailing_comments: Vec::new(),
21478 inferred_type: None,
21479 }));
21480 (condition, result)
21481 } else if is_literal(&search) {
21482 // Literal search -> simple equality
21483 let eq = Expression::Eq(Box::new(BinaryOp {
21484 left: this_expr.clone(),
21485 right: search,
21486 left_comments: Vec::new(),
21487 operator_comments: Vec::new(),
21488 trailing_comments: Vec::new(),
21489 inferred_type: None,
21490 }));
21491 (eq, result)
21492 } else {
21493 // Non-literal (column ref, expression) -> null-safe comparison
21494 let needs_paren = matches!(
21495 &search,
21496 Expression::Eq(_)
21497 | Expression::Neq(_)
21498 | Expression::Gt(_)
21499 | Expression::Gte(_)
21500 | Expression::Lt(_)
21501 | Expression::Lte(_)
21502 );
21503 let search_for_eq = if needs_paren {
21504 Expression::Paren(Box::new(
21505 crate::expressions::Paren {
21506 this: search.clone(),
21507 trailing_comments: Vec::new(),
21508 },
21509 ))
21510 } else {
21511 search.clone()
21512 };
21513 let eq = Expression::Eq(Box::new(BinaryOp {
21514 left: this_expr.clone(),
21515 right: search_for_eq,
21516 left_comments: Vec::new(),
21517 operator_comments: Vec::new(),
21518 trailing_comments: Vec::new(),
21519 inferred_type: None,
21520 }));
21521 let search_for_null = if needs_paren {
21522 Expression::Paren(Box::new(
21523 crate::expressions::Paren {
21524 this: search.clone(),
21525 trailing_comments: Vec::new(),
21526 },
21527 ))
21528 } else {
21529 search.clone()
21530 };
21531 let x_is_null = Expression::Is(Box::new(BinaryOp {
21532 left: this_expr.clone(),
21533 right: Expression::Null(crate::expressions::Null),
21534 left_comments: Vec::new(),
21535 operator_comments: Vec::new(),
21536 trailing_comments: Vec::new(),
21537 inferred_type: None,
21538 }));
21539 let s_is_null = Expression::Is(Box::new(BinaryOp {
21540 left: search_for_null,
21541 right: Expression::Null(crate::expressions::Null),
21542 left_comments: Vec::new(),
21543 operator_comments: Vec::new(),
21544 trailing_comments: Vec::new(),
21545 inferred_type: None,
21546 }));
21547 let both_null = Expression::And(Box::new(BinaryOp {
21548 left: x_is_null,
21549 right: s_is_null,
21550 left_comments: Vec::new(),
21551 operator_comments: Vec::new(),
21552 trailing_comments: Vec::new(),
21553 inferred_type: None,
21554 }));
21555 let condition = Expression::Or(Box::new(BinaryOp {
21556 left: eq,
21557 right: Expression::Paren(Box::new(
21558 crate::expressions::Paren {
21559 this: both_null,
21560 trailing_comments: Vec::new(),
21561 },
21562 )),
21563 left_comments: Vec::new(),
21564 operator_comments: Vec::new(),
21565 trailing_comments: Vec::new(),
21566 inferred_type: None,
21567 }));
21568 (condition, result)
21569 }
21570 })
21571 .collect();
21572 Ok(Expression::Case(Box::new(Case {
21573 operand: None,
21574 whens,
21575 else_: default,
21576 comments: Vec::new(),
21577 inferred_type: None,
21578 })))
21579 }
21580 // LEVENSHTEIN(a, b, ...) -> dialect-specific
21581 "LEVENSHTEIN" => {
21582 match target {
21583 DialectType::BigQuery => {
21584 let mut new_f = *f;
21585 new_f.name = "EDIT_DISTANCE".to_string();
21586 Ok(Expression::Function(Box::new(new_f)))
21587 }
21588 DialectType::Drill => {
21589 let mut new_f = *f;
21590 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
21591 Ok(Expression::Function(Box::new(new_f)))
21592 }
21593 DialectType::PostgreSQL if f.args.len() == 6 => {
21594 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
21595 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
21596 let mut new_f = *f;
21597 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
21598 Ok(Expression::Function(Box::new(new_f)))
21599 }
21600 _ => Ok(Expression::Function(f)),
21601 }
21602 }
21603 // ARRAY_MAX(x) -> arrayMax(x) for ClickHouse, LIST_MAX(x) for DuckDB
21604 "ARRAY_MAX" => {
21605 let name = match target {
21606 DialectType::ClickHouse => "arrayMax",
21607 DialectType::DuckDB => "LIST_MAX",
21608 _ => "ARRAY_MAX",
21609 };
21610 let mut new_f = *f;
21611 new_f.name = name.to_string();
21612 Ok(Expression::Function(Box::new(new_f)))
21613 }
21614 // ARRAY_MIN(x) -> arrayMin(x) for ClickHouse, LIST_MIN(x) for DuckDB
21615 "ARRAY_MIN" => {
21616 let name = match target {
21617 DialectType::ClickHouse => "arrayMin",
21618 DialectType::DuckDB => "LIST_MIN",
21619 _ => "ARRAY_MIN",
21620 };
21621 let mut new_f = *f;
21622 new_f.name = name.to_string();
21623 Ok(Expression::Function(Box::new(new_f)))
21624 }
21625 // JAROWINKLER_SIMILARITY(a, b) -> jaroWinklerSimilarity(UPPER(a), UPPER(b)) for ClickHouse
21626 // -> JARO_WINKLER_SIMILARITY(UPPER(a), UPPER(b)) for DuckDB
21627 "JAROWINKLER_SIMILARITY" if f.args.len() == 2 => {
21628 let mut args = f.args;
21629 let b = args.pop().unwrap();
21630 let a = args.pop().unwrap();
21631 match target {
21632 DialectType::ClickHouse => {
21633 let upper_a = Expression::Upper(Box::new(
21634 crate::expressions::UnaryFunc::new(a),
21635 ));
21636 let upper_b = Expression::Upper(Box::new(
21637 crate::expressions::UnaryFunc::new(b),
21638 ));
21639 Ok(Expression::Function(Box::new(Function::new(
21640 "jaroWinklerSimilarity".to_string(),
21641 vec![upper_a, upper_b],
21642 ))))
21643 }
21644 DialectType::DuckDB => {
21645 let upper_a = Expression::Upper(Box::new(
21646 crate::expressions::UnaryFunc::new(a),
21647 ));
21648 let upper_b = Expression::Upper(Box::new(
21649 crate::expressions::UnaryFunc::new(b),
21650 ));
21651 let score = Expression::Function(Box::new(Function::new(
21652 "JARO_WINKLER_SIMILARITY".to_string(),
21653 vec![upper_a, upper_b],
21654 )));
21655 let scaled = Expression::Mul(Box::new(BinaryOp {
21656 left: score,
21657 right: Expression::number(100),
21658 left_comments: Vec::new(),
21659 operator_comments: Vec::new(),
21660 trailing_comments: Vec::new(),
21661 inferred_type: None,
21662 }));
21663 Ok(Expression::Cast(Box::new(Cast {
21664 this: scaled,
21665 to: DataType::Int {
21666 length: None,
21667 integer_spelling: false,
21668 },
21669 trailing_comments: Vec::new(),
21670 double_colon_syntax: false,
21671 format: None,
21672 default: None,
21673 inferred_type: None,
21674 })))
21675 }
21676 _ => Ok(Expression::Function(Box::new(Function::new(
21677 "JAROWINKLER_SIMILARITY".to_string(),
21678 vec![a, b],
21679 )))),
21680 }
21681 }
21682 // CURRENT_SCHEMAS(x) -> CURRENT_SCHEMAS() for Snowflake (drop arg)
21683 "CURRENT_SCHEMAS" => match target {
21684 DialectType::Snowflake => Ok(Expression::Function(Box::new(
21685 Function::new("CURRENT_SCHEMAS".to_string(), vec![]),
21686 ))),
21687 _ => Ok(Expression::Function(f)),
21688 },
21689 // TRUNC/TRUNCATE (numeric) -> dialect-specific
21690 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 => {
21691 match target {
21692 DialectType::TSQL | DialectType::Fabric => {
21693 // ROUND(x, decimals, 1) - the 1 flag means truncation
21694 let mut args = f.args;
21695 let this = if args.is_empty() {
21696 return Ok(Expression::Function(Box::new(
21697 Function::new("TRUNC".to_string(), args),
21698 )));
21699 } else {
21700 args.remove(0)
21701 };
21702 let decimals = if args.is_empty() {
21703 Expression::Literal(Box::new(Literal::Number(
21704 "0".to_string(),
21705 )))
21706 } else {
21707 args.remove(0)
21708 };
21709 Ok(Expression::Function(Box::new(Function::new(
21710 "ROUND".to_string(),
21711 vec![
21712 this,
21713 decimals,
21714 Expression::Literal(Box::new(Literal::Number(
21715 "1".to_string(),
21716 ))),
21717 ],
21718 ))))
21719 }
21720 DialectType::Presto
21721 | DialectType::Trino
21722 | DialectType::Athena => {
21723 // TRUNCATE(x, decimals)
21724 let mut new_f = *f;
21725 new_f.name = "TRUNCATE".to_string();
21726 Ok(Expression::Function(Box::new(new_f)))
21727 }
21728 DialectType::MySQL
21729 | DialectType::SingleStore
21730 | DialectType::TiDB => {
21731 // TRUNCATE(x, decimals)
21732 let mut new_f = *f;
21733 new_f.name = "TRUNCATE".to_string();
21734 Ok(Expression::Function(Box::new(new_f)))
21735 }
21736 DialectType::DuckDB => {
21737 // DuckDB supports TRUNC(x, decimals) — preserve both args
21738 let mut args = f.args;
21739 // Snowflake fractions_supported: wrap non-INT decimals in CAST(... AS INT)
21740 if args.len() == 2
21741 && matches!(source, DialectType::Snowflake)
21742 {
21743 let decimals = args.remove(1);
21744 let is_int = matches!(&decimals, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
21745 || matches!(&decimals, Expression::Cast(c) if matches!(c.to, DataType::Int { .. } | DataType::SmallInt { .. } | DataType::BigInt { .. } | DataType::TinyInt { .. }));
21746 let wrapped = if !is_int {
21747 Expression::Cast(Box::new(
21748 crate::expressions::Cast {
21749 this: decimals,
21750 to: DataType::Int {
21751 length: None,
21752 integer_spelling: false,
21753 },
21754 double_colon_syntax: false,
21755 trailing_comments: Vec::new(),
21756 format: None,
21757 default: None,
21758 inferred_type: None,
21759 },
21760 ))
21761 } else {
21762 decimals
21763 };
21764 args.push(wrapped);
21765 }
21766 Ok(Expression::Function(Box::new(Function::new(
21767 "TRUNC".to_string(),
21768 args,
21769 ))))
21770 }
21771 DialectType::ClickHouse => {
21772 // trunc(x, decimals) - lowercase
21773 let mut new_f = *f;
21774 new_f.name = "trunc".to_string();
21775 Ok(Expression::Function(Box::new(new_f)))
21776 }
21777 DialectType::Spark | DialectType::Databricks => {
21778 // Spark: TRUNC is date-only; numeric TRUNC → CAST(x AS BIGINT)
21779 let this = f.args.into_iter().next().unwrap_or(
21780 Expression::Literal(Box::new(Literal::Number(
21781 "0".to_string(),
21782 ))),
21783 );
21784 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
21785 this,
21786 to: crate::expressions::DataType::BigInt {
21787 length: None,
21788 },
21789 double_colon_syntax: false,
21790 trailing_comments: Vec::new(),
21791 format: None,
21792 default: None,
21793 inferred_type: None,
21794 })))
21795 }
21796 _ => {
21797 // TRUNC(x, decimals) for PostgreSQL, Oracle, Snowflake, etc.
21798 let mut new_f = *f;
21799 new_f.name = "TRUNC".to_string();
21800 Ok(Expression::Function(Box::new(new_f)))
21801 }
21802 }
21803 }
21804 // CURRENT_VERSION() -> VERSION() for most dialects
21805 "CURRENT_VERSION" => match target {
21806 DialectType::Snowflake
21807 | DialectType::Databricks
21808 | DialectType::StarRocks => Ok(Expression::Function(f)),
21809 DialectType::SQLite => {
21810 let mut new_f = *f;
21811 new_f.name = "SQLITE_VERSION".to_string();
21812 Ok(Expression::Function(Box::new(new_f)))
21813 }
21814 _ => {
21815 let mut new_f = *f;
21816 new_f.name = "VERSION".to_string();
21817 Ok(Expression::Function(Box::new(new_f)))
21818 }
21819 },
21820 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
21821 "ARRAY_REVERSE" => match target {
21822 DialectType::ClickHouse => {
21823 let mut new_f = *f;
21824 new_f.name = "arrayReverse".to_string();
21825 Ok(Expression::Function(Box::new(new_f)))
21826 }
21827 _ => Ok(Expression::Function(f)),
21828 },
21829 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
21830 "GENERATE_DATE_ARRAY" => {
21831 let mut args = f.args;
21832 if matches!(target, DialectType::BigQuery) {
21833 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
21834 if args.len() == 2 {
21835 let default_interval = Expression::Interval(Box::new(
21836 crate::expressions::Interval {
21837 this: Some(Expression::Literal(Box::new(
21838 Literal::String("1".to_string()),
21839 ))),
21840 unit: Some(
21841 crate::expressions::IntervalUnitSpec::Simple {
21842 unit: crate::expressions::IntervalUnit::Day,
21843 use_plural: false,
21844 },
21845 ),
21846 },
21847 ));
21848 args.push(default_interval);
21849 }
21850 Ok(Expression::Function(Box::new(Function::new(
21851 "GENERATE_DATE_ARRAY".to_string(),
21852 args,
21853 ))))
21854 } else if matches!(target, DialectType::DuckDB) {
21855 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
21856 let start = args.get(0).cloned();
21857 let end = args.get(1).cloned();
21858 let step = args.get(2).cloned().or_else(|| {
21859 Some(Expression::Interval(Box::new(
21860 crate::expressions::Interval {
21861 this: Some(Expression::Literal(Box::new(
21862 Literal::String("1".to_string()),
21863 ))),
21864 unit: Some(
21865 crate::expressions::IntervalUnitSpec::Simple {
21866 unit: crate::expressions::IntervalUnit::Day,
21867 use_plural: false,
21868 },
21869 ),
21870 },
21871 )))
21872 });
21873 let gen_series = Expression::GenerateSeries(Box::new(
21874 crate::expressions::GenerateSeries {
21875 start: start.map(Box::new),
21876 end: end.map(Box::new),
21877 step: step.map(Box::new),
21878 is_end_exclusive: None,
21879 },
21880 ));
21881 Ok(Expression::Cast(Box::new(Cast {
21882 this: gen_series,
21883 to: DataType::Array {
21884 element_type: Box::new(DataType::Date),
21885 dimension: None,
21886 },
21887 trailing_comments: vec![],
21888 double_colon_syntax: false,
21889 format: None,
21890 default: None,
21891 inferred_type: None,
21892 })))
21893 } else if matches!(
21894 target,
21895 DialectType::Presto | DialectType::Trino | DialectType::Athena
21896 ) {
21897 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
21898 let start = args.get(0).cloned();
21899 let end = args.get(1).cloned();
21900 let step = args.get(2).cloned().or_else(|| {
21901 Some(Expression::Interval(Box::new(
21902 crate::expressions::Interval {
21903 this: Some(Expression::Literal(Box::new(
21904 Literal::String("1".to_string()),
21905 ))),
21906 unit: Some(
21907 crate::expressions::IntervalUnitSpec::Simple {
21908 unit: crate::expressions::IntervalUnit::Day,
21909 use_plural: false,
21910 },
21911 ),
21912 },
21913 )))
21914 });
21915 let gen_series = Expression::GenerateSeries(Box::new(
21916 crate::expressions::GenerateSeries {
21917 start: start.map(Box::new),
21918 end: end.map(Box::new),
21919 step: step.map(Box::new),
21920 is_end_exclusive: None,
21921 },
21922 ));
21923 Ok(gen_series)
21924 } else if matches!(
21925 target,
21926 DialectType::Spark | DialectType::Databricks
21927 ) {
21928 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
21929 let start = args.get(0).cloned();
21930 let end = args.get(1).cloned();
21931 let step = args.get(2).cloned().or_else(|| {
21932 Some(Expression::Interval(Box::new(
21933 crate::expressions::Interval {
21934 this: Some(Expression::Literal(Box::new(
21935 Literal::String("1".to_string()),
21936 ))),
21937 unit: Some(
21938 crate::expressions::IntervalUnitSpec::Simple {
21939 unit: crate::expressions::IntervalUnit::Day,
21940 use_plural: false,
21941 },
21942 ),
21943 },
21944 )))
21945 });
21946 let gen_series = Expression::GenerateSeries(Box::new(
21947 crate::expressions::GenerateSeries {
21948 start: start.map(Box::new),
21949 end: end.map(Box::new),
21950 step: step.map(Box::new),
21951 is_end_exclusive: None,
21952 },
21953 ));
21954 Ok(gen_series)
21955 } else if matches!(target, DialectType::Snowflake) {
21956 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
21957 if args.len() == 2 {
21958 let default_interval = Expression::Interval(Box::new(
21959 crate::expressions::Interval {
21960 this: Some(Expression::Literal(Box::new(
21961 Literal::String("1".to_string()),
21962 ))),
21963 unit: Some(
21964 crate::expressions::IntervalUnitSpec::Simple {
21965 unit: crate::expressions::IntervalUnit::Day,
21966 use_plural: false,
21967 },
21968 ),
21969 },
21970 ));
21971 args.push(default_interval);
21972 }
21973 Ok(Expression::Function(Box::new(Function::new(
21974 "GENERATE_DATE_ARRAY".to_string(),
21975 args,
21976 ))))
21977 } else if matches!(
21978 target,
21979 DialectType::MySQL
21980 | DialectType::TSQL
21981 | DialectType::Fabric
21982 | DialectType::Redshift
21983 ) {
21984 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
21985 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
21986 Ok(Expression::Function(Box::new(Function::new(
21987 "GENERATE_DATE_ARRAY".to_string(),
21988 args,
21989 ))))
21990 } else {
21991 // PostgreSQL/others: convert to GenerateSeries
21992 let start = args.get(0).cloned();
21993 let end = args.get(1).cloned();
21994 let step = args.get(2).cloned().or_else(|| {
21995 Some(Expression::Interval(Box::new(
21996 crate::expressions::Interval {
21997 this: Some(Expression::Literal(Box::new(
21998 Literal::String("1".to_string()),
21999 ))),
22000 unit: Some(
22001 crate::expressions::IntervalUnitSpec::Simple {
22002 unit: crate::expressions::IntervalUnit::Day,
22003 use_plural: false,
22004 },
22005 ),
22006 },
22007 )))
22008 });
22009 Ok(Expression::GenerateSeries(Box::new(
22010 crate::expressions::GenerateSeries {
22011 start: start.map(Box::new),
22012 end: end.map(Box::new),
22013 step: step.map(Box::new),
22014 is_end_exclusive: None,
22015 },
22016 )))
22017 }
22018 }
22019 // ARRAYS_OVERLAP(arr1, arr2) from Snowflake -> DuckDB:
22020 // (arr1 && arr2) OR (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
22021 "ARRAYS_OVERLAP"
22022 if f.args.len() == 2
22023 && matches!(source, DialectType::Snowflake)
22024 && matches!(target, DialectType::DuckDB) =>
22025 {
22026 let mut args = f.args;
22027 let arr1 = args.remove(0);
22028 let arr2 = args.remove(0);
22029
22030 // (arr1 && arr2)
22031 let overlap = Expression::Paren(Box::new(Paren {
22032 this: Expression::ArrayOverlaps(Box::new(BinaryOp {
22033 left: arr1.clone(),
22034 right: arr2.clone(),
22035 left_comments: vec![],
22036 operator_comments: vec![],
22037 trailing_comments: vec![],
22038 inferred_type: None,
22039 })),
22040 trailing_comments: vec![],
22041 }));
22042
22043 // ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1)
22044 let arr1_has_null = Expression::Neq(Box::new(BinaryOp {
22045 left: Expression::Function(Box::new(Function::new(
22046 "ARRAY_LENGTH".to_string(),
22047 vec![arr1.clone()],
22048 ))),
22049 right: Expression::Function(Box::new(Function::new(
22050 "LIST_COUNT".to_string(),
22051 vec![arr1],
22052 ))),
22053 left_comments: vec![],
22054 operator_comments: vec![],
22055 trailing_comments: vec![],
22056 inferred_type: None,
22057 }));
22058
22059 // ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2)
22060 let arr2_has_null = Expression::Neq(Box::new(BinaryOp {
22061 left: Expression::Function(Box::new(Function::new(
22062 "ARRAY_LENGTH".to_string(),
22063 vec![arr2.clone()],
22064 ))),
22065 right: Expression::Function(Box::new(Function::new(
22066 "LIST_COUNT".to_string(),
22067 vec![arr2],
22068 ))),
22069 left_comments: vec![],
22070 operator_comments: vec![],
22071 trailing_comments: vec![],
22072 inferred_type: None,
22073 }));
22074
22075 // (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
22076 let null_check = Expression::Paren(Box::new(Paren {
22077 this: Expression::And(Box::new(BinaryOp {
22078 left: arr1_has_null,
22079 right: arr2_has_null,
22080 left_comments: vec![],
22081 operator_comments: vec![],
22082 trailing_comments: vec![],
22083 inferred_type: None,
22084 })),
22085 trailing_comments: vec![],
22086 }));
22087
22088 // (arr1 && arr2) OR (null_check)
22089 Ok(Expression::Or(Box::new(BinaryOp {
22090 left: overlap,
22091 right: null_check,
22092 left_comments: vec![],
22093 operator_comments: vec![],
22094 trailing_comments: vec![],
22095 inferred_type: None,
22096 })))
22097 }
22098 // ARRAY_INTERSECTION([1, 2], [2, 3]) from Snowflake -> DuckDB:
22099 // Bag semantics using LIST_TRANSFORM/LIST_FILTER with GENERATE_SERIES
22100 "ARRAY_INTERSECTION"
22101 if f.args.len() == 2
22102 && matches!(source, DialectType::Snowflake)
22103 && matches!(target, DialectType::DuckDB) =>
22104 {
22105 let mut args = f.args;
22106 let arr1 = args.remove(0);
22107 let arr2 = args.remove(0);
22108
22109 // Build: arr1 IS NULL
22110 let arr1_is_null = Expression::IsNull(Box::new(IsNull {
22111 this: arr1.clone(),
22112 not: false,
22113 postfix_form: false,
22114 }));
22115 let arr2_is_null = Expression::IsNull(Box::new(IsNull {
22116 this: arr2.clone(),
22117 not: false,
22118 postfix_form: false,
22119 }));
22120 let null_check = Expression::Or(Box::new(BinaryOp {
22121 left: arr1_is_null,
22122 right: arr2_is_null,
22123 left_comments: vec![],
22124 operator_comments: vec![],
22125 trailing_comments: vec![],
22126 inferred_type: None,
22127 }));
22128
22129 // GENERATE_SERIES(1, LENGTH(arr1))
22130 let gen_series = Expression::Function(Box::new(Function::new(
22131 "GENERATE_SERIES".to_string(),
22132 vec![
22133 Expression::number(1),
22134 Expression::Function(Box::new(Function::new(
22135 "LENGTH".to_string(),
22136 vec![arr1.clone()],
22137 ))),
22138 ],
22139 )));
22140
22141 // LIST_ZIP(arr1, GENERATE_SERIES(1, LENGTH(arr1)))
22142 let list_zip = Expression::Function(Box::new(Function::new(
22143 "LIST_ZIP".to_string(),
22144 vec![arr1.clone(), gen_series],
22145 )));
22146
22147 // pair[1] and pair[2]
22148 let pair_col = Expression::column("pair");
22149 let pair_1 = Expression::Subscript(Box::new(
22150 crate::expressions::Subscript {
22151 this: pair_col.clone(),
22152 index: Expression::number(1),
22153 },
22154 ));
22155 let pair_2 = Expression::Subscript(Box::new(
22156 crate::expressions::Subscript {
22157 this: pair_col.clone(),
22158 index: Expression::number(2),
22159 },
22160 ));
22161
22162 // arr1[1:pair[2]]
22163 let arr1_slice = Expression::ArraySlice(Box::new(
22164 crate::expressions::ArraySlice {
22165 this: arr1.clone(),
22166 start: Some(Expression::number(1)),
22167 end: Some(pair_2),
22168 },
22169 ));
22170
22171 // e IS NOT DISTINCT FROM pair[1]
22172 let e_col = Expression::column("e");
22173 let is_not_distinct = Expression::NullSafeEq(Box::new(BinaryOp {
22174 left: e_col.clone(),
22175 right: pair_1.clone(),
22176 left_comments: vec![],
22177 operator_comments: vec![],
22178 trailing_comments: vec![],
22179 inferred_type: None,
22180 }));
22181
22182 // e -> e IS NOT DISTINCT FROM pair[1]
22183 let inner_lambda1 =
22184 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22185 parameters: vec![crate::expressions::Identifier::new("e")],
22186 body: is_not_distinct,
22187 colon: false,
22188 parameter_types: vec![],
22189 }));
22190
22191 // LIST_FILTER(arr1[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
22192 let inner_filter1 = Expression::Function(Box::new(Function::new(
22193 "LIST_FILTER".to_string(),
22194 vec![arr1_slice, inner_lambda1],
22195 )));
22196
22197 // LENGTH(LIST_FILTER(arr1[1:pair[2]], ...))
22198 let len1 = Expression::Function(Box::new(Function::new(
22199 "LENGTH".to_string(),
22200 vec![inner_filter1],
22201 )));
22202
22203 // e -> e IS NOT DISTINCT FROM pair[1]
22204 let inner_lambda2 =
22205 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22206 parameters: vec![crate::expressions::Identifier::new("e")],
22207 body: Expression::NullSafeEq(Box::new(BinaryOp {
22208 left: e_col,
22209 right: pair_1.clone(),
22210 left_comments: vec![],
22211 operator_comments: vec![],
22212 trailing_comments: vec![],
22213 inferred_type: None,
22214 })),
22215 colon: false,
22216 parameter_types: vec![],
22217 }));
22218
22219 // LIST_FILTER(arr2, e -> e IS NOT DISTINCT FROM pair[1])
22220 let inner_filter2 = Expression::Function(Box::new(Function::new(
22221 "LIST_FILTER".to_string(),
22222 vec![arr2.clone(), inner_lambda2],
22223 )));
22224
22225 // LENGTH(LIST_FILTER(arr2, ...))
22226 let len2 = Expression::Function(Box::new(Function::new(
22227 "LENGTH".to_string(),
22228 vec![inner_filter2],
22229 )));
22230
22231 // LENGTH(...) <= LENGTH(...)
22232 let cond = Expression::Paren(Box::new(Paren {
22233 this: Expression::Lte(Box::new(BinaryOp {
22234 left: len1,
22235 right: len2,
22236 left_comments: vec![],
22237 operator_comments: vec![],
22238 trailing_comments: vec![],
22239 inferred_type: None,
22240 })),
22241 trailing_comments: vec![],
22242 }));
22243
22244 // pair -> (condition)
22245 let filter_lambda =
22246 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22247 parameters: vec![crate::expressions::Identifier::new(
22248 "pair",
22249 )],
22250 body: cond,
22251 colon: false,
22252 parameter_types: vec![],
22253 }));
22254
22255 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
22256 let outer_filter = Expression::Function(Box::new(Function::new(
22257 "LIST_FILTER".to_string(),
22258 vec![list_zip, filter_lambda],
22259 )));
22260
22261 // pair -> pair[1]
22262 let transform_lambda =
22263 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
22264 parameters: vec![crate::expressions::Identifier::new(
22265 "pair",
22266 )],
22267 body: pair_1,
22268 colon: false,
22269 parameter_types: vec![],
22270 }));
22271
22272 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
22273 let list_transform = Expression::Function(Box::new(Function::new(
22274 "LIST_TRANSFORM".to_string(),
22275 vec![outer_filter, transform_lambda],
22276 )));
22277
22278 // CASE WHEN arr1 IS NULL OR arr2 IS NULL THEN NULL
22279 // ELSE LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
22280 // END
22281 Ok(Expression::Case(Box::new(Case {
22282 operand: None,
22283 whens: vec![(null_check, Expression::Null(Null))],
22284 else_: Some(list_transform),
22285 comments: vec![],
22286 inferred_type: None,
22287 })))
22288 }
22289 // ARRAY_CONSTRUCT(args) -> Expression::Array for all targets
22290 "ARRAY_CONSTRUCT" => {
22291 if matches!(target, DialectType::Snowflake) {
22292 Ok(Expression::Function(f))
22293 } else {
22294 Ok(Expression::Array(Box::new(crate::expressions::Array {
22295 expressions: f.args,
22296 })))
22297 }
22298 }
22299 // ARRAY(args) function -> Expression::Array for DuckDB/Snowflake/Presto/Trino/Athena
22300 "ARRAY"
22301 if !f.args.iter().any(|a| {
22302 matches!(a, Expression::Select(_) | Expression::Subquery(_))
22303 }) =>
22304 {
22305 match target {
22306 DialectType::DuckDB
22307 | DialectType::Snowflake
22308 | DialectType::Presto
22309 | DialectType::Trino
22310 | DialectType::Athena => {
22311 Ok(Expression::Array(Box::new(crate::expressions::Array {
22312 expressions: f.args,
22313 })))
22314 }
22315 _ => Ok(Expression::Function(f)),
22316 }
22317 }
22318 _ => Ok(Expression::Function(f)),
22319 }
22320 } else if let Expression::AggregateFunction(mut af) = e {
22321 let name = af.name.to_ascii_uppercase();
22322 match name.as_str() {
22323 "ARBITRARY" if af.args.len() == 1 => {
22324 let arg = af.args.into_iter().next().unwrap();
22325 Ok(convert_arbitrary(arg, target))
22326 }
22327 "JSON_ARRAYAGG" => {
22328 match target {
22329 DialectType::PostgreSQL => {
22330 af.name = "JSON_AGG".to_string();
22331 // Add NULLS FIRST to ORDER BY items for PostgreSQL
22332 for ordered in af.order_by.iter_mut() {
22333 if ordered.nulls_first.is_none() {
22334 ordered.nulls_first = Some(true);
22335 }
22336 }
22337 Ok(Expression::AggregateFunction(af))
22338 }
22339 _ => Ok(Expression::AggregateFunction(af)),
22340 }
22341 }
22342 _ => Ok(Expression::AggregateFunction(af)),
22343 }
22344 } else if let Expression::JSONArrayAgg(ja) = e {
22345 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
22346 match target {
22347 DialectType::PostgreSQL => {
22348 let mut order_by = Vec::new();
22349 if let Some(order_expr) = ja.order {
22350 if let Expression::OrderBy(ob) = *order_expr {
22351 for mut ordered in ob.expressions {
22352 if ordered.nulls_first.is_none() {
22353 ordered.nulls_first = Some(true);
22354 }
22355 order_by.push(ordered);
22356 }
22357 }
22358 }
22359 Ok(Expression::AggregateFunction(Box::new(
22360 crate::expressions::AggregateFunction {
22361 name: "JSON_AGG".to_string(),
22362 args: vec![*ja.this],
22363 distinct: false,
22364 filter: None,
22365 order_by,
22366 limit: None,
22367 ignore_nulls: None,
22368 inferred_type: None,
22369 },
22370 )))
22371 }
22372 _ => Ok(Expression::JSONArrayAgg(ja)),
22373 }
22374 } else if let Expression::JSONArray(ja) = e {
22375 match target {
22376 DialectType::Snowflake
22377 if ja.null_handling.is_none()
22378 && ja.return_type.is_none()
22379 && ja.strict.is_none() =>
22380 {
22381 let array_construct = Expression::ArrayFunc(Box::new(
22382 crate::expressions::ArrayConstructor {
22383 expressions: ja.expressions,
22384 bracket_notation: false,
22385 use_list_keyword: false,
22386 },
22387 ));
22388 Ok(Expression::Function(Box::new(Function::new(
22389 "TO_VARIANT".to_string(),
22390 vec![array_construct],
22391 ))))
22392 }
22393 _ => Ok(Expression::JSONArray(ja)),
22394 }
22395 } else if let Expression::JsonArray(f) = e {
22396 match target {
22397 DialectType::Snowflake => {
22398 let array_construct = Expression::ArrayFunc(Box::new(
22399 crate::expressions::ArrayConstructor {
22400 expressions: f.expressions,
22401 bracket_notation: false,
22402 use_list_keyword: false,
22403 },
22404 ));
22405 Ok(Expression::Function(Box::new(Function::new(
22406 "TO_VARIANT".to_string(),
22407 vec![array_construct],
22408 ))))
22409 }
22410 _ => Ok(Expression::JsonArray(f)),
22411 }
22412 } else if let Expression::CombinedParameterizedAgg(cpa) = e {
22413 let function_name = match cpa.this.as_ref() {
22414 Expression::Identifier(ident) => Some(ident.name.as_str()),
22415 _ => None,
22416 };
22417 match function_name {
22418 Some(name)
22419 if name.eq_ignore_ascii_case("groupConcat")
22420 && cpa.expressions.len() == 1 =>
22421 {
22422 match target {
22423 DialectType::MySQL | DialectType::SingleStore => {
22424 let this = cpa.expressions[0].clone();
22425 let separator = cpa.params.first().cloned();
22426 Ok(Expression::GroupConcat(Box::new(
22427 crate::expressions::GroupConcatFunc {
22428 this,
22429 separator,
22430 order_by: None,
22431 distinct: false,
22432 filter: None,
22433 limit: None,
22434 inferred_type: None,
22435 },
22436 )))
22437 }
22438 DialectType::DuckDB => Ok(Expression::ListAgg(Box::new({
22439 let this = cpa.expressions[0].clone();
22440 let separator = cpa.params.first().cloned();
22441 crate::expressions::ListAggFunc {
22442 this,
22443 separator,
22444 on_overflow: None,
22445 order_by: None,
22446 distinct: false,
22447 filter: None,
22448 inferred_type: None,
22449 }
22450 }))),
22451 _ => Ok(Expression::CombinedParameterizedAgg(cpa)),
22452 }
22453 }
22454 _ => Ok(Expression::CombinedParameterizedAgg(cpa)),
22455 }
22456 } else if let Expression::ToNumber(tn) = e {
22457 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
22458 let arg = *tn.this;
22459 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
22460 this: arg,
22461 to: crate::expressions::DataType::Double {
22462 precision: None,
22463 scale: None,
22464 },
22465 double_colon_syntax: false,
22466 trailing_comments: Vec::new(),
22467 format: None,
22468 default: None,
22469 inferred_type: None,
22470 })))
22471 } else {
22472 Ok(e)
22473 }
22474 }
22475
22476 Action::RegexpLikeToDuckDB => {
22477 if let Expression::RegexpLike(f) = e {
22478 let mut args = vec![f.this, f.pattern];
22479 if let Some(flags) = f.flags {
22480 args.push(flags);
22481 }
22482 Ok(Expression::Function(Box::new(Function::new(
22483 "REGEXP_MATCHES".to_string(),
22484 args,
22485 ))))
22486 } else {
22487 Ok(e)
22488 }
22489 }
22490 Action::EpochConvert => {
22491 if let Expression::Epoch(f) = e {
22492 let arg = f.this;
22493 let name = match target {
22494 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
22495 "UNIX_TIMESTAMP"
22496 }
22497 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
22498 DialectType::BigQuery => "TIME_TO_UNIX",
22499 _ => "EPOCH",
22500 };
22501 Ok(Expression::Function(Box::new(Function::new(
22502 name.to_string(),
22503 vec![arg],
22504 ))))
22505 } else {
22506 Ok(e)
22507 }
22508 }
22509 Action::EpochMsConvert => {
22510 use crate::expressions::{BinaryOp, Cast};
22511 if let Expression::EpochMs(f) = e {
22512 let arg = f.this;
22513 match target {
22514 DialectType::Spark | DialectType::Databricks => {
22515 Ok(Expression::Function(Box::new(Function::new(
22516 "TIMESTAMP_MILLIS".to_string(),
22517 vec![arg],
22518 ))))
22519 }
22520 DialectType::BigQuery => Ok(Expression::Function(Box::new(
22521 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
22522 ))),
22523 DialectType::Presto | DialectType::Trino => {
22524 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
22525 let cast_arg = Expression::Cast(Box::new(Cast {
22526 this: arg,
22527 to: DataType::Double {
22528 precision: None,
22529 scale: None,
22530 },
22531 trailing_comments: Vec::new(),
22532 double_colon_syntax: false,
22533 format: None,
22534 default: None,
22535 inferred_type: None,
22536 }));
22537 let div = Expression::Div(Box::new(BinaryOp::new(
22538 cast_arg,
22539 Expression::Function(Box::new(Function::new(
22540 "POW".to_string(),
22541 vec![Expression::number(10), Expression::number(3)],
22542 ))),
22543 )));
22544 Ok(Expression::Function(Box::new(Function::new(
22545 "FROM_UNIXTIME".to_string(),
22546 vec![div],
22547 ))))
22548 }
22549 DialectType::MySQL => {
22550 // FROM_UNIXTIME(x / POWER(10, 3))
22551 let div = Expression::Div(Box::new(BinaryOp::new(
22552 arg,
22553 Expression::Function(Box::new(Function::new(
22554 "POWER".to_string(),
22555 vec![Expression::number(10), Expression::number(3)],
22556 ))),
22557 )));
22558 Ok(Expression::Function(Box::new(Function::new(
22559 "FROM_UNIXTIME".to_string(),
22560 vec![div],
22561 ))))
22562 }
22563 DialectType::PostgreSQL | DialectType::Redshift => {
22564 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
22565 let cast_arg = Expression::Cast(Box::new(Cast {
22566 this: arg,
22567 to: DataType::Custom {
22568 name: "DOUBLE PRECISION".to_string(),
22569 },
22570 trailing_comments: Vec::new(),
22571 double_colon_syntax: false,
22572 format: None,
22573 default: None,
22574 inferred_type: None,
22575 }));
22576 let div = Expression::Div(Box::new(BinaryOp::new(
22577 cast_arg,
22578 Expression::Function(Box::new(Function::new(
22579 "POWER".to_string(),
22580 vec![Expression::number(10), Expression::number(3)],
22581 ))),
22582 )));
22583 Ok(Expression::Function(Box::new(Function::new(
22584 "TO_TIMESTAMP".to_string(),
22585 vec![div],
22586 ))))
22587 }
22588 DialectType::ClickHouse => {
22589 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
22590 let cast_arg = Expression::Cast(Box::new(Cast {
22591 this: arg,
22592 to: DataType::Nullable {
22593 inner: Box::new(DataType::BigInt { length: None }),
22594 },
22595 trailing_comments: Vec::new(),
22596 double_colon_syntax: false,
22597 format: None,
22598 default: None,
22599 inferred_type: None,
22600 }));
22601 Ok(Expression::Function(Box::new(Function::new(
22602 "fromUnixTimestamp64Milli".to_string(),
22603 vec![cast_arg],
22604 ))))
22605 }
22606 _ => Ok(Expression::Function(Box::new(Function::new(
22607 "EPOCH_MS".to_string(),
22608 vec![arg],
22609 )))),
22610 }
22611 } else {
22612 Ok(e)
22613 }
22614 }
22615 Action::TSQLTypeNormalize => {
22616 if let Expression::DataType(dt) = e {
22617 let new_dt = match &dt {
22618 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
22619 DataType::Decimal {
22620 precision: Some(15),
22621 scale: Some(4),
22622 }
22623 }
22624 DataType::Custom { name }
22625 if name.eq_ignore_ascii_case("SMALLMONEY") =>
22626 {
22627 DataType::Decimal {
22628 precision: Some(6),
22629 scale: Some(4),
22630 }
22631 }
22632 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
22633 DataType::Timestamp {
22634 timezone: false,
22635 precision: None,
22636 }
22637 }
22638 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
22639 DataType::Float {
22640 precision: None,
22641 scale: None,
22642 real_spelling: false,
22643 }
22644 }
22645 DataType::Float {
22646 real_spelling: true,
22647 ..
22648 } => DataType::Float {
22649 precision: None,
22650 scale: None,
22651 real_spelling: false,
22652 },
22653 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
22654 DataType::Custom {
22655 name: "BLOB".to_string(),
22656 }
22657 }
22658 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
22659 DataType::Boolean
22660 }
22661 DataType::Custom { name }
22662 if name.eq_ignore_ascii_case("ROWVERSION") =>
22663 {
22664 DataType::Custom {
22665 name: "BINARY".to_string(),
22666 }
22667 }
22668 DataType::Custom { name }
22669 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
22670 {
22671 match target {
22672 DialectType::Spark
22673 | DialectType::Databricks
22674 | DialectType::Hive => DataType::Custom {
22675 name: "STRING".to_string(),
22676 },
22677 _ => DataType::VarChar {
22678 length: Some(36),
22679 parenthesized_length: true,
22680 },
22681 }
22682 }
22683 DataType::Custom { name }
22684 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
22685 {
22686 match target {
22687 DialectType::Spark
22688 | DialectType::Databricks
22689 | DialectType::Hive => DataType::Timestamp {
22690 timezone: false,
22691 precision: None,
22692 },
22693 _ => DataType::Timestamp {
22694 timezone: true,
22695 precision: None,
22696 },
22697 }
22698 }
22699 DataType::Custom { ref name }
22700 if name.len() >= 10
22701 && name[..10].eq_ignore_ascii_case("DATETIME2(") =>
22702 {
22703 // DATETIME2(n) -> TIMESTAMP
22704 DataType::Timestamp {
22705 timezone: false,
22706 precision: None,
22707 }
22708 }
22709 DataType::Custom { ref name }
22710 if name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME(") =>
22711 {
22712 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
22713 match target {
22714 DialectType::Spark
22715 | DialectType::Databricks
22716 | DialectType::Hive => DataType::Timestamp {
22717 timezone: false,
22718 precision: None,
22719 },
22720 _ => return Ok(Expression::DataType(dt)),
22721 }
22722 }
22723 DataType::Custom { ref name }
22724 if name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC") =>
22725 {
22726 // Parse NUMERIC(p,s) back to Decimal(p,s)
22727 let upper = name.to_ascii_uppercase();
22728 if let Some(inner) = upper
22729 .strip_prefix("NUMERIC(")
22730 .and_then(|s| s.strip_suffix(')'))
22731 {
22732 let parts: Vec<&str> = inner.split(',').collect();
22733 let precision =
22734 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
22735 let scale =
22736 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
22737 DataType::Decimal { precision, scale }
22738 } else if upper == "NUMERIC" {
22739 DataType::Decimal {
22740 precision: None,
22741 scale: None,
22742 }
22743 } else {
22744 return Ok(Expression::DataType(dt));
22745 }
22746 }
22747 DataType::Float {
22748 precision: Some(p), ..
22749 } => {
22750 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
22751 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
22752 let boundary = match target {
22753 DialectType::Hive
22754 | DialectType::Spark
22755 | DialectType::Databricks => 32,
22756 _ => 24,
22757 };
22758 if *p <= boundary {
22759 DataType::Float {
22760 precision: None,
22761 scale: None,
22762 real_spelling: false,
22763 }
22764 } else {
22765 DataType::Double {
22766 precision: None,
22767 scale: None,
22768 }
22769 }
22770 }
22771 DataType::TinyInt { .. } => match target {
22772 DialectType::DuckDB => DataType::Custom {
22773 name: "UTINYINT".to_string(),
22774 },
22775 DialectType::Hive
22776 | DialectType::Spark
22777 | DialectType::Databricks => DataType::SmallInt { length: None },
22778 _ => return Ok(Expression::DataType(dt)),
22779 },
22780 // INTEGER -> INT for Spark/Databricks
22781 DataType::Int {
22782 length,
22783 integer_spelling: true,
22784 } => DataType::Int {
22785 length: *length,
22786 integer_spelling: false,
22787 },
22788 _ => return Ok(Expression::DataType(dt)),
22789 };
22790 Ok(Expression::DataType(new_dt))
22791 } else {
22792 Ok(e)
22793 }
22794 }
22795 Action::MySQLSafeDivide => {
22796 use crate::expressions::{BinaryOp, Cast};
22797 if let Expression::Div(op) = e {
22798 let left = op.left;
22799 let right = op.right;
22800 // For SQLite: CAST left as REAL but NO NULLIF wrapping
22801 if matches!(target, DialectType::SQLite) {
22802 let new_left = Expression::Cast(Box::new(Cast {
22803 this: left,
22804 to: DataType::Float {
22805 precision: None,
22806 scale: None,
22807 real_spelling: true,
22808 },
22809 trailing_comments: Vec::new(),
22810 double_colon_syntax: false,
22811 format: None,
22812 default: None,
22813 inferred_type: None,
22814 }));
22815 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
22816 }
22817 // Wrap right in NULLIF(right, 0)
22818 let nullif_right = Expression::Function(Box::new(Function::new(
22819 "NULLIF".to_string(),
22820 vec![right, Expression::number(0)],
22821 )));
22822 // For some dialects, also CAST the left side
22823 let new_left = match target {
22824 DialectType::PostgreSQL
22825 | DialectType::Redshift
22826 | DialectType::Teradata
22827 | DialectType::Materialize
22828 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
22829 this: left,
22830 to: DataType::Custom {
22831 name: "DOUBLE PRECISION".to_string(),
22832 },
22833 trailing_comments: Vec::new(),
22834 double_colon_syntax: false,
22835 format: None,
22836 default: None,
22837 inferred_type: None,
22838 })),
22839 DialectType::Drill
22840 | DialectType::Trino
22841 | DialectType::Presto
22842 | DialectType::Athena => Expression::Cast(Box::new(Cast {
22843 this: left,
22844 to: DataType::Double {
22845 precision: None,
22846 scale: None,
22847 },
22848 trailing_comments: Vec::new(),
22849 double_colon_syntax: false,
22850 format: None,
22851 default: None,
22852 inferred_type: None,
22853 })),
22854 DialectType::TSQL => Expression::Cast(Box::new(Cast {
22855 this: left,
22856 to: DataType::Float {
22857 precision: None,
22858 scale: None,
22859 real_spelling: false,
22860 },
22861 trailing_comments: Vec::new(),
22862 double_colon_syntax: false,
22863 format: None,
22864 default: None,
22865 inferred_type: None,
22866 })),
22867 _ => left,
22868 };
22869 Ok(Expression::Div(Box::new(BinaryOp::new(
22870 new_left,
22871 nullif_right,
22872 ))))
22873 } else {
22874 Ok(e)
22875 }
22876 }
22877 Action::AlterTableRenameStripSchema => {
22878 if let Expression::AlterTable(mut at) = e {
22879 if let Some(crate::expressions::AlterTableAction::RenameTable(
22880 ref mut new_tbl,
22881 )) = at.actions.first_mut()
22882 {
22883 new_tbl.schema = None;
22884 new_tbl.catalog = None;
22885 }
22886 Ok(Expression::AlterTable(at))
22887 } else {
22888 Ok(e)
22889 }
22890 }
22891 Action::NullsOrdering => {
22892 // Fill in the source dialect's implied null ordering default.
22893 // This makes implicit null ordering explicit so the target generator
22894 // can correctly strip or keep it.
22895 //
22896 // Dialect null ordering categories:
22897 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
22898 // ASC -> NULLS LAST, DESC -> NULLS FIRST
22899 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
22900 // ASC -> NULLS FIRST, DESC -> NULLS LAST
22901 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
22902 // NULLS LAST always (both ASC and DESC)
22903 if let Expression::Ordered(mut o) = e {
22904 let is_asc = !o.desc;
22905
22906 let is_source_nulls_large = matches!(
22907 source,
22908 DialectType::Oracle
22909 | DialectType::PostgreSQL
22910 | DialectType::Redshift
22911 | DialectType::Snowflake
22912 );
22913 let is_source_nulls_last = matches!(
22914 source,
22915 DialectType::DuckDB
22916 | DialectType::Presto
22917 | DialectType::Trino
22918 | DialectType::Dremio
22919 | DialectType::Athena
22920 | DialectType::ClickHouse
22921 | DialectType::Drill
22922 | DialectType::Exasol
22923 | DialectType::DataFusion
22924 );
22925
22926 // Determine target category to check if default matches
22927 let is_target_nulls_large = matches!(
22928 target,
22929 DialectType::Oracle
22930 | DialectType::PostgreSQL
22931 | DialectType::Redshift
22932 | DialectType::Snowflake
22933 );
22934 let is_target_nulls_last = matches!(
22935 target,
22936 DialectType::DuckDB
22937 | DialectType::Presto
22938 | DialectType::Trino
22939 | DialectType::Dremio
22940 | DialectType::Athena
22941 | DialectType::ClickHouse
22942 | DialectType::Drill
22943 | DialectType::Exasol
22944 | DialectType::DataFusion
22945 );
22946
22947 // Compute the implied nulls_first for source
22948 let source_nulls_first = if is_source_nulls_large {
22949 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
22950 } else if is_source_nulls_last {
22951 false // NULLS LAST always
22952 } else {
22953 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
22954 };
22955
22956 // Compute the target's default
22957 let target_nulls_first = if is_target_nulls_large {
22958 !is_asc
22959 } else if is_target_nulls_last {
22960 false
22961 } else {
22962 is_asc
22963 };
22964
22965 // Only add explicit nulls ordering if source and target defaults differ
22966 if source_nulls_first != target_nulls_first {
22967 o.nulls_first = Some(source_nulls_first);
22968 }
22969 // If they match, leave nulls_first as None so the generator won't output it
22970
22971 Ok(Expression::Ordered(o))
22972 } else {
22973 Ok(e)
22974 }
22975 }
22976 Action::StringAggConvert => {
22977 match e {
22978 Expression::WithinGroup(wg) => {
22979 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
22980 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
22981 let (x_opt, sep_opt, distinct) = match wg.this {
22982 Expression::AggregateFunction(ref af)
22983 if af.name.eq_ignore_ascii_case("STRING_AGG")
22984 && af.args.len() >= 2 =>
22985 {
22986 (
22987 Some(af.args[0].clone()),
22988 Some(af.args[1].clone()),
22989 af.distinct,
22990 )
22991 }
22992 Expression::Function(ref f)
22993 if f.name.eq_ignore_ascii_case("STRING_AGG")
22994 && f.args.len() >= 2 =>
22995 {
22996 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
22997 }
22998 Expression::StringAgg(ref sa) => {
22999 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
23000 }
23001 _ => (None, None, false),
23002 };
23003 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
23004 let order_by = wg.order_by;
23005
23006 match target {
23007 DialectType::TSQL | DialectType::Fabric => {
23008 // Keep as WithinGroup(StringAgg) for TSQL
23009 Ok(Expression::WithinGroup(Box::new(
23010 crate::expressions::WithinGroup {
23011 this: Expression::StringAgg(Box::new(
23012 crate::expressions::StringAggFunc {
23013 this: x,
23014 separator: Some(sep),
23015 order_by: None, // order_by goes in WithinGroup, not StringAgg
23016 distinct,
23017 filter: None,
23018 limit: None,
23019 inferred_type: None,
23020 },
23021 )),
23022 order_by,
23023 },
23024 )))
23025 }
23026 DialectType::MySQL
23027 | DialectType::SingleStore
23028 | DialectType::Doris
23029 | DialectType::StarRocks => {
23030 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
23031 Ok(Expression::GroupConcat(Box::new(
23032 crate::expressions::GroupConcatFunc {
23033 this: x,
23034 separator: Some(sep),
23035 order_by: Some(order_by),
23036 distinct,
23037 filter: None,
23038 limit: None,
23039 inferred_type: None,
23040 },
23041 )))
23042 }
23043 DialectType::SQLite => {
23044 // GROUP_CONCAT(x, sep) - no ORDER BY support
23045 Ok(Expression::GroupConcat(Box::new(
23046 crate::expressions::GroupConcatFunc {
23047 this: x,
23048 separator: Some(sep),
23049 order_by: None,
23050 distinct,
23051 filter: None,
23052 limit: None,
23053 inferred_type: None,
23054 },
23055 )))
23056 }
23057 DialectType::PostgreSQL | DialectType::Redshift => {
23058 // STRING_AGG(x, sep ORDER BY z)
23059 Ok(Expression::StringAgg(Box::new(
23060 crate::expressions::StringAggFunc {
23061 this: x,
23062 separator: Some(sep),
23063 order_by: Some(order_by),
23064 distinct,
23065 filter: None,
23066 limit: None,
23067 inferred_type: None,
23068 },
23069 )))
23070 }
23071 _ => {
23072 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
23073 Ok(Expression::StringAgg(Box::new(
23074 crate::expressions::StringAggFunc {
23075 this: x,
23076 separator: Some(sep),
23077 order_by: Some(order_by),
23078 distinct,
23079 filter: None,
23080 limit: None,
23081 inferred_type: None,
23082 },
23083 )))
23084 }
23085 }
23086 } else {
23087 Ok(Expression::WithinGroup(wg))
23088 }
23089 }
23090 Expression::StringAgg(sa) => {
23091 match target {
23092 DialectType::MySQL
23093 | DialectType::SingleStore
23094 | DialectType::Doris
23095 | DialectType::StarRocks => {
23096 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
23097 Ok(Expression::GroupConcat(Box::new(
23098 crate::expressions::GroupConcatFunc {
23099 this: sa.this,
23100 separator: sa.separator,
23101 order_by: sa.order_by,
23102 distinct: sa.distinct,
23103 filter: sa.filter,
23104 limit: None,
23105 inferred_type: None,
23106 },
23107 )))
23108 }
23109 DialectType::SQLite => {
23110 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
23111 Ok(Expression::GroupConcat(Box::new(
23112 crate::expressions::GroupConcatFunc {
23113 this: sa.this,
23114 separator: sa.separator,
23115 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
23116 distinct: sa.distinct,
23117 filter: sa.filter,
23118 limit: None,
23119 inferred_type: None,
23120 },
23121 )))
23122 }
23123 DialectType::Spark | DialectType::Databricks => {
23124 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
23125 Ok(Expression::ListAgg(Box::new(
23126 crate::expressions::ListAggFunc {
23127 this: sa.this,
23128 separator: sa.separator,
23129 on_overflow: None,
23130 order_by: sa.order_by,
23131 distinct: sa.distinct,
23132 filter: None,
23133 inferred_type: None,
23134 },
23135 )))
23136 }
23137 _ => Ok(Expression::StringAgg(sa)),
23138 }
23139 }
23140 _ => Ok(e),
23141 }
23142 }
23143 Action::GroupConcatConvert => {
23144 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
23145 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
23146 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
23147 if let Expression::Function(ref f) = expr {
23148 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
23149 let mut result = f.args[0].clone();
23150 for arg in &f.args[1..] {
23151 result = Expression::Concat(Box::new(BinaryOp {
23152 left: result,
23153 right: arg.clone(),
23154 left_comments: vec![],
23155 operator_comments: vec![],
23156 trailing_comments: vec![],
23157 inferred_type: None,
23158 }));
23159 }
23160 return result;
23161 }
23162 }
23163 expr
23164 }
23165 fn expand_concat_to_plus(expr: Expression) -> Expression {
23166 if let Expression::Function(ref f) = expr {
23167 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
23168 let mut result = f.args[0].clone();
23169 for arg in &f.args[1..] {
23170 result = Expression::Add(Box::new(BinaryOp {
23171 left: result,
23172 right: arg.clone(),
23173 left_comments: vec![],
23174 operator_comments: vec![],
23175 trailing_comments: vec![],
23176 inferred_type: None,
23177 }));
23178 }
23179 return result;
23180 }
23181 }
23182 expr
23183 }
23184 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
23185 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
23186 if let Expression::Function(ref f) = expr {
23187 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
23188 let new_args: Vec<Expression> = f
23189 .args
23190 .iter()
23191 .map(|arg| {
23192 Expression::Cast(Box::new(crate::expressions::Cast {
23193 this: arg.clone(),
23194 to: crate::expressions::DataType::VarChar {
23195 length: None,
23196 parenthesized_length: false,
23197 },
23198 trailing_comments: Vec::new(),
23199 double_colon_syntax: false,
23200 format: None,
23201 default: None,
23202 inferred_type: None,
23203 }))
23204 })
23205 .collect();
23206 return Expression::Function(Box::new(
23207 crate::expressions::Function::new(
23208 "CONCAT".to_string(),
23209 new_args,
23210 ),
23211 ));
23212 }
23213 }
23214 expr
23215 }
23216 if let Expression::GroupConcat(gc) = e {
23217 match target {
23218 DialectType::Presto => {
23219 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
23220 let sep = gc.separator.unwrap_or(Expression::string(","));
23221 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
23222 let this = wrap_concat_args_in_varchar_cast(gc.this);
23223 let array_agg =
23224 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
23225 this,
23226 distinct: gc.distinct,
23227 filter: gc.filter,
23228 order_by: gc.order_by.unwrap_or_default(),
23229 name: None,
23230 ignore_nulls: None,
23231 having_max: None,
23232 limit: None,
23233 inferred_type: None,
23234 }));
23235 Ok(Expression::ArrayJoin(Box::new(
23236 crate::expressions::ArrayJoinFunc {
23237 this: array_agg,
23238 separator: sep,
23239 null_replacement: None,
23240 },
23241 )))
23242 }
23243 DialectType::Trino => {
23244 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
23245 let sep = gc.separator.unwrap_or(Expression::string(","));
23246 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
23247 let this = wrap_concat_args_in_varchar_cast(gc.this);
23248 Ok(Expression::ListAgg(Box::new(
23249 crate::expressions::ListAggFunc {
23250 this,
23251 separator: Some(sep),
23252 on_overflow: None,
23253 order_by: gc.order_by,
23254 distinct: gc.distinct,
23255 filter: gc.filter,
23256 inferred_type: None,
23257 },
23258 )))
23259 }
23260 DialectType::PostgreSQL
23261 | DialectType::Redshift
23262 | DialectType::Snowflake
23263 | DialectType::DuckDB
23264 | DialectType::Hive
23265 | DialectType::ClickHouse => {
23266 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
23267 let sep = gc.separator.unwrap_or(Expression::string(","));
23268 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
23269 let this = expand_concat_to_dpipe(gc.this);
23270 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
23271 let order_by = if target == DialectType::PostgreSQL {
23272 gc.order_by.map(|ords| {
23273 ords.into_iter()
23274 .map(|mut o| {
23275 if o.nulls_first.is_none() {
23276 if o.desc {
23277 o.nulls_first = Some(false);
23278 // NULLS LAST
23279 } else {
23280 o.nulls_first = Some(true);
23281 // NULLS FIRST
23282 }
23283 }
23284 o
23285 })
23286 .collect()
23287 })
23288 } else {
23289 gc.order_by
23290 };
23291 Ok(Expression::StringAgg(Box::new(
23292 crate::expressions::StringAggFunc {
23293 this,
23294 separator: Some(sep),
23295 order_by,
23296 distinct: gc.distinct,
23297 filter: gc.filter,
23298 limit: None,
23299 inferred_type: None,
23300 },
23301 )))
23302 }
23303 DialectType::TSQL => {
23304 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
23305 // TSQL doesn't support DISTINCT in STRING_AGG
23306 let sep = gc.separator.unwrap_or(Expression::string(","));
23307 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
23308 let this = expand_concat_to_plus(gc.this);
23309 Ok(Expression::StringAgg(Box::new(
23310 crate::expressions::StringAggFunc {
23311 this,
23312 separator: Some(sep),
23313 order_by: gc.order_by,
23314 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
23315 filter: gc.filter,
23316 limit: None,
23317 inferred_type: None,
23318 },
23319 )))
23320 }
23321 DialectType::SQLite => {
23322 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
23323 // SQLite GROUP_CONCAT doesn't support ORDER BY
23324 // Expand CONCAT(a,b,c) -> a || b || c
23325 let this = expand_concat_to_dpipe(gc.this);
23326 Ok(Expression::GroupConcat(Box::new(
23327 crate::expressions::GroupConcatFunc {
23328 this,
23329 separator: gc.separator,
23330 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
23331 distinct: gc.distinct,
23332 filter: gc.filter,
23333 limit: None,
23334 inferred_type: None,
23335 },
23336 )))
23337 }
23338 DialectType::Spark | DialectType::Databricks => {
23339 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
23340 let sep = gc.separator.unwrap_or(Expression::string(","));
23341 Ok(Expression::ListAgg(Box::new(
23342 crate::expressions::ListAggFunc {
23343 this: gc.this,
23344 separator: Some(sep),
23345 on_overflow: None,
23346 order_by: gc.order_by,
23347 distinct: gc.distinct,
23348 filter: None,
23349 inferred_type: None,
23350 },
23351 )))
23352 }
23353 DialectType::MySQL
23354 | DialectType::SingleStore
23355 | DialectType::StarRocks => {
23356 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
23357 if gc.separator.is_none() {
23358 let mut gc = gc;
23359 gc.separator = Some(Expression::string(","));
23360 Ok(Expression::GroupConcat(gc))
23361 } else {
23362 Ok(Expression::GroupConcat(gc))
23363 }
23364 }
23365 _ => Ok(Expression::GroupConcat(gc)),
23366 }
23367 } else {
23368 Ok(e)
23369 }
23370 }
23371 Action::TempTableHash => {
23372 match e {
23373 Expression::CreateTable(mut ct) => {
23374 // TSQL #table -> TEMPORARY TABLE with # stripped from name
23375 let name = &ct.name.name.name;
23376 if name.starts_with('#') {
23377 ct.name.name.name = name.trim_start_matches('#').to_string();
23378 }
23379 // Set temporary flag
23380 ct.temporary = true;
23381 Ok(Expression::CreateTable(ct))
23382 }
23383 Expression::Table(mut tr) => {
23384 // Strip # from table references
23385 let name = &tr.name.name;
23386 if name.starts_with('#') {
23387 tr.name.name = name.trim_start_matches('#').to_string();
23388 }
23389 Ok(Expression::Table(tr))
23390 }
23391 Expression::DropTable(mut dt) => {
23392 // Strip # from DROP TABLE names
23393 for table_ref in &mut dt.names {
23394 if table_ref.name.name.starts_with('#') {
23395 table_ref.name.name =
23396 table_ref.name.name.trim_start_matches('#').to_string();
23397 }
23398 }
23399 Ok(Expression::DropTable(dt))
23400 }
23401 _ => Ok(e),
23402 }
23403 }
23404 Action::NvlClearOriginal => {
23405 if let Expression::Nvl(mut f) = e {
23406 f.original_name = None;
23407 Ok(Expression::Nvl(f))
23408 } else {
23409 Ok(e)
23410 }
23411 }
23412 Action::HiveCastToTryCast => {
23413 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
23414 if let Expression::Cast(mut c) = e {
23415 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
23416 // (Spark's TIMESTAMP is always timezone-aware)
23417 if matches!(target, DialectType::DuckDB)
23418 && matches!(source, DialectType::Spark | DialectType::Databricks)
23419 && matches!(
23420 c.to,
23421 DataType::Timestamp {
23422 timezone: false,
23423 ..
23424 }
23425 )
23426 {
23427 c.to = DataType::Custom {
23428 name: "TIMESTAMPTZ".to_string(),
23429 };
23430 }
23431 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
23432 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
23433 if matches!(target, DialectType::Databricks | DialectType::Spark)
23434 && matches!(
23435 source,
23436 DialectType::Spark | DialectType::Databricks | DialectType::Hive
23437 )
23438 && Self::has_varchar_char_type(&c.to)
23439 {
23440 c.to = Self::normalize_varchar_to_string(c.to);
23441 }
23442 Ok(Expression::TryCast(c))
23443 } else {
23444 Ok(e)
23445 }
23446 }
23447 Action::XorExpand => {
23448 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
23449 // Snowflake: use BOOLXOR(a, b) instead
23450 if let Expression::Xor(xor) = e {
23451 // Collect all XOR operands
23452 let mut operands = Vec::new();
23453 if let Some(this) = xor.this {
23454 operands.push(*this);
23455 }
23456 if let Some(expr) = xor.expression {
23457 operands.push(*expr);
23458 }
23459 operands.extend(xor.expressions);
23460
23461 // Snowflake: use BOOLXOR(a, b)
23462 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
23463 let a = operands.remove(0);
23464 let b = operands.remove(0);
23465 return Ok(Expression::Function(Box::new(Function::new(
23466 "BOOLXOR".to_string(),
23467 vec![a, b],
23468 ))));
23469 }
23470
23471 // Helper to build (a AND NOT b) OR (NOT a AND b)
23472 let make_xor = |a: Expression, b: Expression| -> Expression {
23473 let not_b = Expression::Not(Box::new(
23474 crate::expressions::UnaryOp::new(b.clone()),
23475 ));
23476 let not_a = Expression::Not(Box::new(
23477 crate::expressions::UnaryOp::new(a.clone()),
23478 ));
23479 let left_and = Expression::And(Box::new(BinaryOp {
23480 left: a,
23481 right: Expression::Paren(Box::new(Paren {
23482 this: not_b,
23483 trailing_comments: Vec::new(),
23484 })),
23485 left_comments: Vec::new(),
23486 operator_comments: Vec::new(),
23487 trailing_comments: Vec::new(),
23488 inferred_type: None,
23489 }));
23490 let right_and = Expression::And(Box::new(BinaryOp {
23491 left: Expression::Paren(Box::new(Paren {
23492 this: not_a,
23493 trailing_comments: Vec::new(),
23494 })),
23495 right: b,
23496 left_comments: Vec::new(),
23497 operator_comments: Vec::new(),
23498 trailing_comments: Vec::new(),
23499 inferred_type: None,
23500 }));
23501 Expression::Or(Box::new(BinaryOp {
23502 left: Expression::Paren(Box::new(Paren {
23503 this: left_and,
23504 trailing_comments: Vec::new(),
23505 })),
23506 right: Expression::Paren(Box::new(Paren {
23507 this: right_and,
23508 trailing_comments: Vec::new(),
23509 })),
23510 left_comments: Vec::new(),
23511 operator_comments: Vec::new(),
23512 trailing_comments: Vec::new(),
23513 inferred_type: None,
23514 }))
23515 };
23516
23517 if operands.len() >= 2 {
23518 let mut result = make_xor(operands.remove(0), operands.remove(0));
23519 for operand in operands {
23520 result = make_xor(result, operand);
23521 }
23522 Ok(result)
23523 } else if operands.len() == 1 {
23524 Ok(operands.remove(0))
23525 } else {
23526 // No operands - return FALSE (shouldn't happen)
23527 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
23528 value: false,
23529 }))
23530 }
23531 } else {
23532 Ok(e)
23533 }
23534 }
23535 Action::DatePartUnquote => {
23536 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
23537 // Convert the quoted string first arg to a bare Column/Identifier
23538 if let Expression::Function(mut f) = e {
23539 if let Some(Expression::Literal(lit)) = f.args.first() {
23540 if let crate::expressions::Literal::String(s) = lit.as_ref() {
23541 let bare_name = s.to_ascii_lowercase();
23542 f.args[0] =
23543 Expression::Column(Box::new(crate::expressions::Column {
23544 name: Identifier::new(bare_name),
23545 table: None,
23546 join_mark: false,
23547 trailing_comments: Vec::new(),
23548 span: None,
23549 inferred_type: None,
23550 }));
23551 }
23552 }
23553 Ok(Expression::Function(f))
23554 } else {
23555 Ok(e)
23556 }
23557 }
23558 Action::ArrayLengthConvert => {
23559 // Extract the argument from the expression
23560 let arg = match e {
23561 Expression::Cardinality(ref f) => f.this.clone(),
23562 Expression::ArrayLength(ref f) => f.this.clone(),
23563 Expression::ArraySize(ref f) => f.this.clone(),
23564 _ => return Ok(e),
23565 };
23566 match target {
23567 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
23568 Ok(Expression::Function(Box::new(Function::new(
23569 "SIZE".to_string(),
23570 vec![arg],
23571 ))))
23572 }
23573 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23574 Ok(Expression::Cardinality(Box::new(
23575 crate::expressions::UnaryFunc::new(arg),
23576 )))
23577 }
23578 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
23579 crate::expressions::UnaryFunc::new(arg),
23580 ))),
23581 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
23582 crate::expressions::UnaryFunc::new(arg),
23583 ))),
23584 DialectType::PostgreSQL | DialectType::Redshift => {
23585 // PostgreSQL ARRAY_LENGTH requires dimension arg
23586 Ok(Expression::Function(Box::new(Function::new(
23587 "ARRAY_LENGTH".to_string(),
23588 vec![arg, Expression::number(1)],
23589 ))))
23590 }
23591 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
23592 crate::expressions::UnaryFunc::new(arg),
23593 ))),
23594 _ => Ok(e), // Keep original
23595 }
23596 }
23597
23598 Action::JsonExtractToArrow => {
23599 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
23600 if let Expression::JsonExtract(mut f) = e {
23601 f.arrow_syntax = true;
23602 // Transform path: convert bracket notation to dot notation
23603 // SQLite strips wildcards, DuckDB preserves them
23604 if let Expression::Literal(ref lit) = f.path {
23605 if let Literal::String(ref s) = lit.as_ref() {
23606 let mut transformed = s.clone();
23607 if matches!(target, DialectType::SQLite) {
23608 transformed = Self::strip_json_wildcards(&transformed);
23609 }
23610 transformed = Self::bracket_to_dot_notation(&transformed);
23611 if transformed != *s {
23612 f.path = Expression::string(&transformed);
23613 }
23614 }
23615 }
23616 Ok(Expression::JsonExtract(f))
23617 } else {
23618 Ok(e)
23619 }
23620 }
23621
23622 Action::JsonExtractToGetJsonObject => {
23623 if let Expression::JsonExtract(f) = e {
23624 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
23625 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
23626 // Use proper decomposition that handles brackets
23627 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path {
23628 if let Literal::String(ref s) = lit.as_ref() {
23629 let parts = Self::decompose_json_path(s);
23630 parts.into_iter().map(|k| Expression::string(&k)).collect()
23631 } else {
23632 vec![]
23633 }
23634 } else {
23635 vec![f.path]
23636 };
23637 let func_name = if matches!(target, DialectType::Redshift) {
23638 "JSON_EXTRACT_PATH_TEXT"
23639 } else {
23640 "JSON_EXTRACT_PATH"
23641 };
23642 let mut args = vec![f.this];
23643 args.extend(keys);
23644 Ok(Expression::Function(Box::new(Function::new(
23645 func_name.to_string(),
23646 args,
23647 ))))
23648 } else {
23649 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
23650 // Convert bracket double quotes to single quotes
23651 let path = if let Expression::Literal(ref lit) = f.path {
23652 if let Literal::String(ref s) = lit.as_ref() {
23653 let normalized = Self::bracket_to_single_quotes(s);
23654 if normalized != *s {
23655 Expression::string(&normalized)
23656 } else {
23657 f.path.clone()
23658 }
23659 } else {
23660 f.path.clone()
23661 }
23662 } else {
23663 f.path.clone()
23664 };
23665 Ok(Expression::Function(Box::new(Function::new(
23666 "GET_JSON_OBJECT".to_string(),
23667 vec![f.this, path],
23668 ))))
23669 }
23670 } else {
23671 Ok(e)
23672 }
23673 }
23674
23675 Action::JsonExtractScalarToGetJsonObject => {
23676 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
23677 if let Expression::JsonExtractScalar(f) = e {
23678 Ok(Expression::Function(Box::new(Function::new(
23679 "GET_JSON_OBJECT".to_string(),
23680 vec![f.this, f.path],
23681 ))))
23682 } else {
23683 Ok(e)
23684 }
23685 }
23686
23687 Action::JsonExtractToTsql => {
23688 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
23689 let (this, path) = match e {
23690 Expression::JsonExtract(f) => (f.this, f.path),
23691 Expression::JsonExtractScalar(f) => (f.this, f.path),
23692 _ => return Ok(e),
23693 };
23694 // Transform path: strip wildcards, convert bracket notation to dot notation
23695 let transformed_path = if let Expression::Literal(ref lit) = path {
23696 if let Literal::String(ref s) = lit.as_ref() {
23697 let stripped = Self::strip_json_wildcards(s);
23698 let dotted = Self::bracket_to_dot_notation(&stripped);
23699 Expression::string(&dotted)
23700 } else {
23701 path.clone()
23702 }
23703 } else {
23704 path
23705 };
23706 let json_query = Expression::Function(Box::new(Function::new(
23707 "JSON_QUERY".to_string(),
23708 vec![this.clone(), transformed_path.clone()],
23709 )));
23710 let json_value = Expression::Function(Box::new(Function::new(
23711 "JSON_VALUE".to_string(),
23712 vec![this, transformed_path],
23713 )));
23714 Ok(Expression::Function(Box::new(Function::new(
23715 "ISNULL".to_string(),
23716 vec![json_query, json_value],
23717 ))))
23718 }
23719
23720 Action::JsonExtractToClickHouse => {
23721 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
23722 let (this, path) = match e {
23723 Expression::JsonExtract(f) => (f.this, f.path),
23724 Expression::JsonExtractScalar(f) => (f.this, f.path),
23725 _ => return Ok(e),
23726 };
23727 let args: Vec<Expression> = if let Expression::Literal(lit) = path {
23728 if let Literal::String(ref s) = lit.as_ref() {
23729 let parts = Self::decompose_json_path(s);
23730 let mut result = vec![this];
23731 for part in parts {
23732 // ClickHouse uses 1-based integer indices for array access
23733 if let Ok(idx) = part.parse::<i64>() {
23734 result.push(Expression::number(idx + 1));
23735 } else {
23736 result.push(Expression::string(&part));
23737 }
23738 }
23739 result
23740 } else {
23741 vec![]
23742 }
23743 } else {
23744 vec![this, path]
23745 };
23746 Ok(Expression::Function(Box::new(Function::new(
23747 "JSONExtractString".to_string(),
23748 args,
23749 ))))
23750 }
23751
23752 Action::JsonExtractScalarConvert => {
23753 // JSON_EXTRACT_SCALAR -> target-specific
23754 if let Expression::JsonExtractScalar(f) = e {
23755 match target {
23756 DialectType::PostgreSQL | DialectType::Redshift => {
23757 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
23758 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path
23759 {
23760 if let Literal::String(ref s) = lit.as_ref() {
23761 let parts = Self::decompose_json_path(s);
23762 parts.into_iter().map(|k| Expression::string(&k)).collect()
23763 } else {
23764 vec![]
23765 }
23766 } else {
23767 vec![f.path]
23768 };
23769 let mut args = vec![f.this];
23770 args.extend(keys);
23771 Ok(Expression::Function(Box::new(Function::new(
23772 "JSON_EXTRACT_PATH_TEXT".to_string(),
23773 args,
23774 ))))
23775 }
23776 DialectType::Snowflake => {
23777 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
23778 let stripped_path = if let Expression::Literal(ref lit) = f.path {
23779 if let Literal::String(ref s) = lit.as_ref() {
23780 let stripped = Self::strip_json_dollar_prefix(s);
23781 Expression::string(&stripped)
23782 } else {
23783 f.path.clone()
23784 }
23785 } else {
23786 f.path
23787 };
23788 Ok(Expression::Function(Box::new(Function::new(
23789 "JSON_EXTRACT_PATH_TEXT".to_string(),
23790 vec![f.this, stripped_path],
23791 ))))
23792 }
23793 DialectType::SQLite | DialectType::DuckDB => {
23794 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
23795 Ok(Expression::JsonExtractScalar(Box::new(
23796 crate::expressions::JsonExtractFunc {
23797 this: f.this,
23798 path: f.path,
23799 returning: f.returning,
23800 arrow_syntax: true,
23801 hash_arrow_syntax: false,
23802 wrapper_option: None,
23803 quotes_option: None,
23804 on_scalar_string: false,
23805 on_error: None,
23806 },
23807 )))
23808 }
23809 _ => Ok(Expression::JsonExtractScalar(f)),
23810 }
23811 } else {
23812 Ok(e)
23813 }
23814 }
23815
23816 Action::JsonPathNormalize => {
23817 // Normalize JSON path format for BigQuery, MySQL, etc.
23818 if let Expression::JsonExtract(mut f) = e {
23819 if let Expression::Literal(ref lit) = f.path {
23820 if let Literal::String(ref s) = lit.as_ref() {
23821 let mut normalized = s.clone();
23822 // Convert bracket notation and handle wildcards per dialect
23823 match target {
23824 DialectType::BigQuery => {
23825 // BigQuery strips wildcards and uses single quotes in brackets
23826 normalized = Self::strip_json_wildcards(&normalized);
23827 normalized = Self::bracket_to_single_quotes(&normalized);
23828 }
23829 DialectType::MySQL => {
23830 // MySQL preserves wildcards, converts brackets to dot notation
23831 normalized = Self::bracket_to_dot_notation(&normalized);
23832 }
23833 _ => {}
23834 }
23835 if normalized != *s {
23836 f.path = Expression::string(&normalized);
23837 }
23838 }
23839 }
23840 Ok(Expression::JsonExtract(f))
23841 } else {
23842 Ok(e)
23843 }
23844 }
23845
23846 Action::JsonQueryValueConvert => {
23847 // JsonQuery/JsonValue -> target-specific
23848 let (f, is_query) = match e {
23849 Expression::JsonQuery(f) => (f, true),
23850 Expression::JsonValue(f) => (f, false),
23851 _ => return Ok(e),
23852 };
23853 match target {
23854 DialectType::TSQL | DialectType::Fabric => {
23855 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
23856 let json_query = Expression::Function(Box::new(Function::new(
23857 "JSON_QUERY".to_string(),
23858 vec![f.this.clone(), f.path.clone()],
23859 )));
23860 let json_value = Expression::Function(Box::new(Function::new(
23861 "JSON_VALUE".to_string(),
23862 vec![f.this, f.path],
23863 )));
23864 Ok(Expression::Function(Box::new(Function::new(
23865 "ISNULL".to_string(),
23866 vec![json_query, json_value],
23867 ))))
23868 }
23869 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
23870 Ok(Expression::Function(Box::new(Function::new(
23871 "GET_JSON_OBJECT".to_string(),
23872 vec![f.this, f.path],
23873 ))))
23874 }
23875 DialectType::PostgreSQL | DialectType::Redshift => {
23876 Ok(Expression::Function(Box::new(Function::new(
23877 "JSON_EXTRACT_PATH_TEXT".to_string(),
23878 vec![f.this, f.path],
23879 ))))
23880 }
23881 DialectType::DuckDB | DialectType::SQLite => {
23882 // json -> path arrow syntax
23883 Ok(Expression::JsonExtract(Box::new(
23884 crate::expressions::JsonExtractFunc {
23885 this: f.this,
23886 path: f.path,
23887 returning: f.returning,
23888 arrow_syntax: true,
23889 hash_arrow_syntax: false,
23890 wrapper_option: f.wrapper_option,
23891 quotes_option: f.quotes_option,
23892 on_scalar_string: f.on_scalar_string,
23893 on_error: f.on_error,
23894 },
23895 )))
23896 }
23897 DialectType::Snowflake => {
23898 // GET_PATH(PARSE_JSON(json), 'path')
23899 // Strip $. prefix from path
23900 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
23901 let json_expr = match &f.this {
23902 Expression::Function(ref inner_f)
23903 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
23904 {
23905 f.this
23906 }
23907 Expression::ParseJson(_) => {
23908 // Already a ParseJson expression, which generates as PARSE_JSON(...)
23909 f.this
23910 }
23911 _ => Expression::Function(Box::new(Function::new(
23912 "PARSE_JSON".to_string(),
23913 vec![f.this],
23914 ))),
23915 };
23916 let path_str = match &f.path {
23917 Expression::Literal(lit)
23918 if matches!(lit.as_ref(), Literal::String(_)) =>
23919 {
23920 let Literal::String(s) = lit.as_ref() else {
23921 unreachable!()
23922 };
23923 let stripped = s.strip_prefix("$.").unwrap_or(s);
23924 Expression::Literal(Box::new(Literal::String(
23925 stripped.to_string(),
23926 )))
23927 }
23928 other => other.clone(),
23929 };
23930 Ok(Expression::Function(Box::new(Function::new(
23931 "GET_PATH".to_string(),
23932 vec![json_expr, path_str],
23933 ))))
23934 }
23935 _ => {
23936 // Default: keep as JSON_QUERY/JSON_VALUE function
23937 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
23938 Ok(Expression::Function(Box::new(Function::new(
23939 func_name.to_string(),
23940 vec![f.this, f.path],
23941 ))))
23942 }
23943 }
23944 }
23945
23946 Action::JsonLiteralToJsonParse => {
23947 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
23948 // Also DuckDB CAST(x AS JSON) -> JSON_PARSE(x) for Trino/Presto/Athena
23949 if let Expression::Cast(c) = e {
23950 let func_name = if matches!(target, DialectType::Snowflake) {
23951 "PARSE_JSON"
23952 } else {
23953 "JSON_PARSE"
23954 };
23955 Ok(Expression::Function(Box::new(Function::new(
23956 func_name.to_string(),
23957 vec![c.this],
23958 ))))
23959 } else {
23960 Ok(e)
23961 }
23962 }
23963
23964 Action::DuckDBCastJsonToVariant => {
23965 if let Expression::Cast(c) = e {
23966 Ok(Expression::Cast(Box::new(Cast {
23967 this: c.this,
23968 to: DataType::Custom {
23969 name: "VARIANT".to_string(),
23970 },
23971 trailing_comments: c.trailing_comments,
23972 double_colon_syntax: false,
23973 format: None,
23974 default: None,
23975 inferred_type: None,
23976 })))
23977 } else {
23978 Ok(e)
23979 }
23980 }
23981
23982 Action::DuckDBTryCastJsonToTryJsonParse => {
23983 // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
23984 if let Expression::TryCast(c) = e {
23985 let json_parse = Expression::Function(Box::new(Function::new(
23986 "JSON_PARSE".to_string(),
23987 vec![c.this],
23988 )));
23989 Ok(Expression::Function(Box::new(Function::new(
23990 "TRY".to_string(),
23991 vec![json_parse],
23992 ))))
23993 } else {
23994 Ok(e)
23995 }
23996 }
23997
23998 Action::DuckDBJsonFuncToJsonParse => {
23999 // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
24000 if let Expression::Function(f) = e {
24001 let args = f.args;
24002 Ok(Expression::Function(Box::new(Function::new(
24003 "JSON_PARSE".to_string(),
24004 args,
24005 ))))
24006 } else {
24007 Ok(e)
24008 }
24009 }
24010
24011 Action::DuckDBJsonValidToIsJson => {
24012 // DuckDB json_valid(x) -> x IS JSON (SQL:2016 predicate) for Trino/Presto/Athena
24013 if let Expression::Function(mut f) = e {
24014 let arg = f.args.remove(0);
24015 Ok(Expression::IsJson(Box::new(crate::expressions::IsJson {
24016 this: arg,
24017 json_type: None,
24018 unique_keys: None,
24019 negated: false,
24020 })))
24021 } else {
24022 Ok(e)
24023 }
24024 }
24025
24026 Action::AtTimeZoneConvert => {
24027 // AT TIME ZONE -> target-specific conversion
24028 if let Expression::AtTimeZone(atz) = e {
24029 match target {
24030 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24031 Ok(Expression::Function(Box::new(Function::new(
24032 "AT_TIMEZONE".to_string(),
24033 vec![atz.this, atz.zone],
24034 ))))
24035 }
24036 DialectType::Spark | DialectType::Databricks => {
24037 Ok(Expression::Function(Box::new(Function::new(
24038 "FROM_UTC_TIMESTAMP".to_string(),
24039 vec![atz.this, atz.zone],
24040 ))))
24041 }
24042 DialectType::Snowflake => {
24043 // CONVERT_TIMEZONE('zone', expr)
24044 Ok(Expression::Function(Box::new(Function::new(
24045 "CONVERT_TIMEZONE".to_string(),
24046 vec![atz.zone, atz.this],
24047 ))))
24048 }
24049 DialectType::BigQuery => {
24050 // TIMESTAMP(DATETIME(expr, 'zone'))
24051 let datetime_call = Expression::Function(Box::new(Function::new(
24052 "DATETIME".to_string(),
24053 vec![atz.this, atz.zone],
24054 )));
24055 Ok(Expression::Function(Box::new(Function::new(
24056 "TIMESTAMP".to_string(),
24057 vec![datetime_call],
24058 ))))
24059 }
24060 _ => Ok(Expression::Function(Box::new(Function::new(
24061 "AT_TIMEZONE".to_string(),
24062 vec![atz.this, atz.zone],
24063 )))),
24064 }
24065 } else {
24066 Ok(e)
24067 }
24068 }
24069
24070 Action::DayOfWeekConvert => {
24071 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
24072 if let Expression::DayOfWeek(f) = e {
24073 match target {
24074 DialectType::DuckDB => Ok(Expression::Function(Box::new(
24075 Function::new("ISODOW".to_string(), vec![f.this]),
24076 ))),
24077 DialectType::Spark | DialectType::Databricks => {
24078 // ((DAYOFWEEK(x) % 7) + 1)
24079 let dayofweek = Expression::Function(Box::new(Function::new(
24080 "DAYOFWEEK".to_string(),
24081 vec![f.this],
24082 )));
24083 let modulo = Expression::Mod(Box::new(BinaryOp {
24084 left: dayofweek,
24085 right: Expression::number(7),
24086 left_comments: Vec::new(),
24087 operator_comments: Vec::new(),
24088 trailing_comments: Vec::new(),
24089 inferred_type: None,
24090 }));
24091 let paren_mod = Expression::Paren(Box::new(Paren {
24092 this: modulo,
24093 trailing_comments: Vec::new(),
24094 }));
24095 let add_one = Expression::Add(Box::new(BinaryOp {
24096 left: paren_mod,
24097 right: Expression::number(1),
24098 left_comments: Vec::new(),
24099 operator_comments: Vec::new(),
24100 trailing_comments: Vec::new(),
24101 inferred_type: None,
24102 }));
24103 Ok(Expression::Paren(Box::new(Paren {
24104 this: add_one,
24105 trailing_comments: Vec::new(),
24106 })))
24107 }
24108 _ => Ok(Expression::DayOfWeek(f)),
24109 }
24110 } else {
24111 Ok(e)
24112 }
24113 }
24114
24115 Action::MaxByMinByConvert => {
24116 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
24117 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
24118 // Handle both Expression::Function and Expression::AggregateFunction
24119 let (is_max, args) = match &e {
24120 Expression::Function(f) => {
24121 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
24122 }
24123 Expression::AggregateFunction(af) => {
24124 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
24125 }
24126 _ => return Ok(e),
24127 };
24128 match target {
24129 DialectType::ClickHouse => {
24130 let name = if is_max { "argMax" } else { "argMin" };
24131 let mut args = args;
24132 args.truncate(2);
24133 Ok(Expression::Function(Box::new(Function::new(
24134 name.to_string(),
24135 args,
24136 ))))
24137 }
24138 DialectType::DuckDB => {
24139 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
24140 Ok(Expression::Function(Box::new(Function::new(
24141 name.to_string(),
24142 args,
24143 ))))
24144 }
24145 DialectType::Spark | DialectType::Databricks => {
24146 let mut args = args;
24147 args.truncate(2);
24148 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
24149 Ok(Expression::Function(Box::new(Function::new(
24150 name.to_string(),
24151 args,
24152 ))))
24153 }
24154 _ => Ok(e),
24155 }
24156 }
24157
24158 Action::ElementAtConvert => {
24159 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
24160 let (arr, idx) = if let Expression::ElementAt(bf) = e {
24161 (bf.this, bf.expression)
24162 } else if let Expression::Function(ref f) = e {
24163 if f.args.len() >= 2 {
24164 if let Expression::Function(f) = e {
24165 let mut args = f.args;
24166 let arr = args.remove(0);
24167 let idx = args.remove(0);
24168 (arr, idx)
24169 } else {
24170 unreachable!("outer condition already matched Expression::Function")
24171 }
24172 } else {
24173 return Ok(e);
24174 }
24175 } else {
24176 return Ok(e);
24177 };
24178 match target {
24179 DialectType::PostgreSQL => {
24180 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
24181 let arr_expr = Expression::Paren(Box::new(Paren {
24182 this: arr,
24183 trailing_comments: vec![],
24184 }));
24185 Ok(Expression::Subscript(Box::new(
24186 crate::expressions::Subscript {
24187 this: arr_expr,
24188 index: idx,
24189 },
24190 )))
24191 }
24192 DialectType::BigQuery => {
24193 // BigQuery: convert ARRAY[...] to bare [...] for subscript
24194 let arr_expr = match arr {
24195 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
24196 crate::expressions::ArrayConstructor {
24197 expressions: af.expressions,
24198 bracket_notation: true,
24199 use_list_keyword: false,
24200 },
24201 )),
24202 other => other,
24203 };
24204 let safe_ordinal = Expression::Function(Box::new(Function::new(
24205 "SAFE_ORDINAL".to_string(),
24206 vec![idx],
24207 )));
24208 Ok(Expression::Subscript(Box::new(
24209 crate::expressions::Subscript {
24210 this: arr_expr,
24211 index: safe_ordinal,
24212 },
24213 )))
24214 }
24215 _ => Ok(Expression::Function(Box::new(Function::new(
24216 "ELEMENT_AT".to_string(),
24217 vec![arr, idx],
24218 )))),
24219 }
24220 }
24221
24222 Action::CurrentUserParens => {
24223 // CURRENT_USER -> CURRENT_USER() for Snowflake
24224 Ok(Expression::Function(Box::new(Function::new(
24225 "CURRENT_USER".to_string(),
24226 vec![],
24227 ))))
24228 }
24229
24230 Action::ArrayAggToCollectList => {
24231 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
24232 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
24233 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
24234 match e {
24235 Expression::AggregateFunction(mut af) => {
24236 let is_simple =
24237 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
24238 let args = if af.args.is_empty() {
24239 vec![]
24240 } else {
24241 vec![af.args[0].clone()]
24242 };
24243 af.name = "COLLECT_LIST".to_string();
24244 af.args = args;
24245 if is_simple {
24246 af.order_by = Vec::new();
24247 }
24248 Ok(Expression::AggregateFunction(af))
24249 }
24250 Expression::ArrayAgg(agg) => {
24251 let is_simple =
24252 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
24253 Ok(Expression::AggregateFunction(Box::new(
24254 crate::expressions::AggregateFunction {
24255 name: "COLLECT_LIST".to_string(),
24256 args: vec![agg.this.clone()],
24257 distinct: agg.distinct,
24258 filter: agg.filter.clone(),
24259 order_by: if is_simple {
24260 Vec::new()
24261 } else {
24262 agg.order_by.clone()
24263 },
24264 limit: agg.limit.clone(),
24265 ignore_nulls: agg.ignore_nulls,
24266 inferred_type: None,
24267 },
24268 )))
24269 }
24270 _ => Ok(e),
24271 }
24272 }
24273
24274 Action::ArraySyntaxConvert => {
24275 match e {
24276 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
24277 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
24278 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
24279 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
24280 expressions: arr.expressions,
24281 bracket_notation: true,
24282 use_list_keyword: false,
24283 })),
24284 ),
24285 // ARRAY(y) function style -> ArrayFunc for target dialect
24286 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
24287 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
24288 let bracket = matches!(
24289 target,
24290 DialectType::BigQuery
24291 | DialectType::DuckDB
24292 | DialectType::Snowflake
24293 | DialectType::ClickHouse
24294 | DialectType::StarRocks
24295 );
24296 Ok(Expression::ArrayFunc(Box::new(
24297 crate::expressions::ArrayConstructor {
24298 expressions: f.args,
24299 bracket_notation: bracket,
24300 use_list_keyword: false,
24301 },
24302 )))
24303 }
24304 _ => Ok(e),
24305 }
24306 }
24307
24308 Action::CastToJsonForSpark => {
24309 // CAST(x AS JSON) -> TO_JSON(x) for Spark
24310 if let Expression::Cast(c) = e {
24311 Ok(Expression::Function(Box::new(Function::new(
24312 "TO_JSON".to_string(),
24313 vec![c.this],
24314 ))))
24315 } else {
24316 Ok(e)
24317 }
24318 }
24319
24320 Action::CastJsonToFromJson => {
24321 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
24322 if let Expression::Cast(c) = e {
24323 // Extract the string literal from ParseJson
24324 let literal_expr = if let Expression::ParseJson(pj) = c.this {
24325 pj.this
24326 } else {
24327 c.this
24328 };
24329 // Convert the target DataType to Spark's type string format
24330 let type_str = Self::data_type_to_spark_string(&c.to);
24331 Ok(Expression::Function(Box::new(Function::new(
24332 "FROM_JSON".to_string(),
24333 vec![
24334 literal_expr,
24335 Expression::Literal(Box::new(Literal::String(type_str))),
24336 ],
24337 ))))
24338 } else {
24339 Ok(e)
24340 }
24341 }
24342
24343 Action::ToJsonConvert => {
24344 // TO_JSON(x) -> target-specific conversion
24345 if let Expression::ToJson(f) = e {
24346 let arg = f.this;
24347 match target {
24348 DialectType::Presto | DialectType::Trino => {
24349 // JSON_FORMAT(CAST(x AS JSON))
24350 let cast_json = Expression::Cast(Box::new(Cast {
24351 this: arg,
24352 to: DataType::Custom {
24353 name: "JSON".to_string(),
24354 },
24355 trailing_comments: vec![],
24356 double_colon_syntax: false,
24357 format: None,
24358 default: None,
24359 inferred_type: None,
24360 }));
24361 Ok(Expression::Function(Box::new(Function::new(
24362 "JSON_FORMAT".to_string(),
24363 vec![cast_json],
24364 ))))
24365 }
24366 DialectType::BigQuery => Ok(Expression::Function(Box::new(
24367 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
24368 ))),
24369 DialectType::DuckDB => {
24370 // CAST(TO_JSON(x) AS TEXT)
24371 let to_json =
24372 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
24373 this: arg,
24374 original_name: None,
24375 inferred_type: None,
24376 }));
24377 Ok(Expression::Cast(Box::new(Cast {
24378 this: to_json,
24379 to: DataType::Text,
24380 trailing_comments: vec![],
24381 double_colon_syntax: false,
24382 format: None,
24383 default: None,
24384 inferred_type: None,
24385 })))
24386 }
24387 _ => Ok(Expression::ToJson(Box::new(
24388 crate::expressions::UnaryFunc {
24389 this: arg,
24390 original_name: None,
24391 inferred_type: None,
24392 },
24393 ))),
24394 }
24395 } else {
24396 Ok(e)
24397 }
24398 }
24399
24400 Action::VarianceToClickHouse => {
24401 if let Expression::Variance(f) = e {
24402 Ok(Expression::Function(Box::new(Function::new(
24403 "varSamp".to_string(),
24404 vec![f.this],
24405 ))))
24406 } else {
24407 Ok(e)
24408 }
24409 }
24410
24411 Action::StddevToClickHouse => {
24412 if let Expression::Stddev(f) = e {
24413 Ok(Expression::Function(Box::new(Function::new(
24414 "stddevSamp".to_string(),
24415 vec![f.this],
24416 ))))
24417 } else {
24418 Ok(e)
24419 }
24420 }
24421
24422 Action::ApproxQuantileConvert => {
24423 if let Expression::ApproxQuantile(aq) = e {
24424 let mut args = vec![*aq.this];
24425 if let Some(q) = aq.quantile {
24426 args.push(*q);
24427 }
24428 Ok(Expression::Function(Box::new(Function::new(
24429 "APPROX_PERCENTILE".to_string(),
24430 args,
24431 ))))
24432 } else {
24433 Ok(e)
24434 }
24435 }
24436
24437 Action::DollarParamConvert => {
24438 if let Expression::Parameter(p) = e {
24439 Ok(Expression::Parameter(Box::new(
24440 crate::expressions::Parameter {
24441 name: p.name,
24442 index: p.index,
24443 style: crate::expressions::ParameterStyle::At,
24444 quoted: p.quoted,
24445 string_quoted: p.string_quoted,
24446 expression: p.expression,
24447 },
24448 )))
24449 } else {
24450 Ok(e)
24451 }
24452 }
24453
24454 Action::EscapeStringNormalize => {
24455 if let Expression::Literal(ref lit) = e {
24456 if let Literal::EscapeString(s) = lit.as_ref() {
24457 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
24458 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
24459 s[2..].to_string()
24460 } else {
24461 s.clone()
24462 };
24463 let normalized = stripped
24464 .replace('\n', "\\n")
24465 .replace('\r', "\\r")
24466 .replace('\t', "\\t");
24467 match target {
24468 DialectType::BigQuery => {
24469 // BigQuery: e'...' -> CAST(b'...' AS STRING)
24470 // Use Raw for the b'...' part to avoid double-escaping
24471 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
24472 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
24473 }
24474 _ => Ok(Expression::Literal(Box::new(Literal::EscapeString(
24475 normalized,
24476 )))),
24477 }
24478 } else {
24479 Ok(e)
24480 }
24481 } else {
24482 Ok(e)
24483 }
24484 }
24485
24486 Action::StraightJoinCase => {
24487 // straight_join: keep lowercase for DuckDB, quote for MySQL
24488 if let Expression::Column(col) = e {
24489 if col.name.name == "STRAIGHT_JOIN" {
24490 let mut new_col = col;
24491 new_col.name.name = "straight_join".to_string();
24492 if matches!(target, DialectType::MySQL) {
24493 // MySQL: needs quoting since it's a reserved keyword
24494 new_col.name.quoted = true;
24495 }
24496 Ok(Expression::Column(new_col))
24497 } else {
24498 Ok(Expression::Column(col))
24499 }
24500 } else {
24501 Ok(e)
24502 }
24503 }
24504
24505 Action::TablesampleReservoir => {
24506 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
24507 if let Expression::TableSample(mut ts) = e {
24508 if let Some(ref mut sample) = ts.sample {
24509 sample.method = crate::expressions::SampleMethod::Reservoir;
24510 sample.explicit_method = true;
24511 }
24512 Ok(Expression::TableSample(ts))
24513 } else {
24514 Ok(e)
24515 }
24516 }
24517
24518 Action::TablesampleSnowflakeStrip => {
24519 // Strip method and PERCENT for Snowflake target from non-Snowflake source
24520 match e {
24521 Expression::TableSample(mut ts) => {
24522 if let Some(ref mut sample) = ts.sample {
24523 sample.suppress_method_output = true;
24524 sample.unit_after_size = false;
24525 sample.is_percent = false;
24526 }
24527 Ok(Expression::TableSample(ts))
24528 }
24529 Expression::Table(mut t) => {
24530 if let Some(ref mut sample) = t.table_sample {
24531 sample.suppress_method_output = true;
24532 sample.unit_after_size = false;
24533 sample.is_percent = false;
24534 }
24535 Ok(Expression::Table(t))
24536 }
24537 _ => Ok(e),
24538 }
24539 }
24540
24541 Action::FirstToAnyValue => {
24542 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
24543 if let Expression::First(mut agg) = e {
24544 agg.ignore_nulls = None;
24545 agg.name = Some("ANY_VALUE".to_string());
24546 Ok(Expression::AnyValue(agg))
24547 } else {
24548 Ok(e)
24549 }
24550 }
24551
24552 Action::ArrayIndexConvert => {
24553 // Subscript index: 1-based to 0-based for BigQuery
24554 if let Expression::Subscript(mut sub) = e {
24555 if let Expression::Literal(ref lit) = sub.index {
24556 if let Literal::Number(ref n) = lit.as_ref() {
24557 if let Ok(val) = n.parse::<i64>() {
24558 sub.index = Expression::Literal(Box::new(Literal::Number(
24559 (val - 1).to_string(),
24560 )));
24561 }
24562 }
24563 }
24564 Ok(Expression::Subscript(sub))
24565 } else {
24566 Ok(e)
24567 }
24568 }
24569
24570 Action::AnyValueIgnoreNulls => {
24571 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
24572 if let Expression::AnyValue(mut av) = e {
24573 if av.ignore_nulls.is_none() {
24574 av.ignore_nulls = Some(true);
24575 }
24576 Ok(Expression::AnyValue(av))
24577 } else {
24578 Ok(e)
24579 }
24580 }
24581
24582 Action::BigQueryNullsOrdering => {
24583 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
24584 if let Expression::WindowFunction(mut wf) = e {
24585 for o in &mut wf.over.order_by {
24586 o.nulls_first = None;
24587 }
24588 Ok(Expression::WindowFunction(wf))
24589 } else if let Expression::Ordered(mut o) = e {
24590 o.nulls_first = None;
24591 Ok(Expression::Ordered(o))
24592 } else {
24593 Ok(e)
24594 }
24595 }
24596
24597 Action::SnowflakeFloatProtect => {
24598 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
24599 // Snowflake's target transform from converting it to DOUBLE.
24600 // Non-Snowflake sources should keep their FLOAT spelling.
24601 if let Expression::DataType(DataType::Float { .. }) = e {
24602 Ok(Expression::DataType(DataType::Custom {
24603 name: "FLOAT".to_string(),
24604 }))
24605 } else {
24606 Ok(e)
24607 }
24608 }
24609
24610 Action::MysqlNullsOrdering => {
24611 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
24612 if let Expression::Ordered(mut o) = e {
24613 let nulls_last = o.nulls_first == Some(false);
24614 let desc = o.desc;
24615 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
24616 // If requested ordering matches default, just strip NULLS clause
24617 let matches_default = if desc {
24618 // DESC default is NULLS FIRST, so nulls_first=true matches
24619 o.nulls_first == Some(true)
24620 } else {
24621 // ASC default is NULLS LAST, so nulls_first=false matches
24622 nulls_last
24623 };
24624 if matches_default {
24625 o.nulls_first = None;
24626 Ok(Expression::Ordered(o))
24627 } else {
24628 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
24629 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
24630 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
24631 let null_val = if desc { 1 } else { 0 };
24632 let non_null_val = if desc { 0 } else { 1 };
24633 let _case_expr = Expression::Case(Box::new(Case {
24634 operand: None,
24635 whens: vec![(
24636 Expression::IsNull(Box::new(crate::expressions::IsNull {
24637 this: o.this.clone(),
24638 not: false,
24639 postfix_form: false,
24640 })),
24641 Expression::number(null_val),
24642 )],
24643 else_: Some(Expression::number(non_null_val)),
24644 comments: Vec::new(),
24645 inferred_type: None,
24646 }));
24647 o.nulls_first = None;
24648 // Return a tuple of [case_expr, ordered_expr]
24649 // We need to return both as part of the ORDER BY
24650 // But since transform_recursive processes individual expressions,
24651 // we can't easily add extra ORDER BY items here.
24652 // Instead, strip the nulls_first
24653 o.nulls_first = None;
24654 Ok(Expression::Ordered(o))
24655 }
24656 } else {
24657 Ok(e)
24658 }
24659 }
24660
24661 Action::MysqlNullsLastRewrite => {
24662 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
24663 // to simulate NULLS LAST for ASC ordering
24664 if let Expression::WindowFunction(mut wf) = e {
24665 let mut new_order_by = Vec::new();
24666 for o in wf.over.order_by {
24667 if !o.desc {
24668 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
24669 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
24670 let case_expr = Expression::Case(Box::new(Case {
24671 operand: None,
24672 whens: vec![(
24673 Expression::IsNull(Box::new(crate::expressions::IsNull {
24674 this: o.this.clone(),
24675 not: false,
24676 postfix_form: false,
24677 })),
24678 Expression::Literal(Box::new(Literal::Number(
24679 "1".to_string(),
24680 ))),
24681 )],
24682 else_: Some(Expression::Literal(Box::new(Literal::Number(
24683 "0".to_string(),
24684 )))),
24685 comments: Vec::new(),
24686 inferred_type: None,
24687 }));
24688 new_order_by.push(crate::expressions::Ordered {
24689 this: case_expr,
24690 desc: false,
24691 nulls_first: None,
24692 explicit_asc: false,
24693 with_fill: None,
24694 });
24695 let mut ordered = o;
24696 ordered.nulls_first = None;
24697 new_order_by.push(ordered);
24698 } else {
24699 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
24700 // No change needed
24701 let mut ordered = o;
24702 ordered.nulls_first = None;
24703 new_order_by.push(ordered);
24704 }
24705 }
24706 wf.over.order_by = new_order_by;
24707 Ok(Expression::WindowFunction(wf))
24708 } else {
24709 Ok(e)
24710 }
24711 }
24712
24713 Action::RespectNullsConvert => {
24714 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
24715 if let Expression::WindowFunction(mut wf) = e {
24716 match &mut wf.this {
24717 Expression::FirstValue(ref mut vf) => {
24718 if vf.ignore_nulls == Some(false) {
24719 vf.ignore_nulls = None;
24720 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
24721 // but that's handled by the generator's NULLS ordering
24722 }
24723 }
24724 Expression::LastValue(ref mut vf) => {
24725 if vf.ignore_nulls == Some(false) {
24726 vf.ignore_nulls = None;
24727 }
24728 }
24729 _ => {}
24730 }
24731 Ok(Expression::WindowFunction(wf))
24732 } else {
24733 Ok(e)
24734 }
24735 }
24736
24737 Action::SnowflakeWindowFrameStrip => {
24738 // Strip the default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
24739 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when targeting Snowflake
24740 if let Expression::WindowFunction(mut wf) = e {
24741 wf.over.frame = None;
24742 Ok(Expression::WindowFunction(wf))
24743 } else {
24744 Ok(e)
24745 }
24746 }
24747
24748 Action::SnowflakeWindowFrameAdd => {
24749 // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
24750 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when transpiling from Snowflake to non-Snowflake
24751 if let Expression::WindowFunction(mut wf) = e {
24752 wf.over.frame = Some(crate::expressions::WindowFrame {
24753 kind: crate::expressions::WindowFrameKind::Rows,
24754 start: crate::expressions::WindowFrameBound::UnboundedPreceding,
24755 end: Some(crate::expressions::WindowFrameBound::UnboundedFollowing),
24756 exclude: None,
24757 kind_text: None,
24758 start_side_text: None,
24759 end_side_text: None,
24760 });
24761 Ok(Expression::WindowFunction(wf))
24762 } else {
24763 Ok(e)
24764 }
24765 }
24766
24767 Action::CreateTableStripComment => {
24768 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
24769 if let Expression::CreateTable(mut ct) = e {
24770 for col in &mut ct.columns {
24771 col.comment = None;
24772 col.constraints.retain(|c| {
24773 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
24774 });
24775 // Also remove Comment from constraint_order
24776 col.constraint_order.retain(|c| {
24777 !matches!(c, crate::expressions::ConstraintType::Comment)
24778 });
24779 }
24780 // Strip properties (USING, PARTITIONED BY, etc.)
24781 ct.properties.clear();
24782 Ok(Expression::CreateTable(ct))
24783 } else {
24784 Ok(e)
24785 }
24786 }
24787
24788 Action::AlterTableToSpRename => {
24789 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
24790 if let Expression::AlterTable(ref at) = e {
24791 if let Some(crate::expressions::AlterTableAction::RenameTable(
24792 ref new_tbl,
24793 )) = at.actions.first()
24794 {
24795 // Build the old table name using TSQL bracket quoting
24796 let old_name = if let Some(ref schema) = at.name.schema {
24797 if at.name.name.quoted || schema.quoted {
24798 format!("[{}].[{}]", schema.name, at.name.name.name)
24799 } else {
24800 format!("{}.{}", schema.name, at.name.name.name)
24801 }
24802 } else {
24803 if at.name.name.quoted {
24804 format!("[{}]", at.name.name.name)
24805 } else {
24806 at.name.name.name.clone()
24807 }
24808 };
24809 let new_name = new_tbl.name.name.clone();
24810 // EXEC sp_rename 'old_name', 'new_name'
24811 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
24812 Ok(Expression::Raw(crate::expressions::Raw { sql }))
24813 } else {
24814 Ok(e)
24815 }
24816 } else {
24817 Ok(e)
24818 }
24819 }
24820
24821 Action::SnowflakeIntervalFormat => {
24822 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
24823 if let Expression::Interval(mut iv) = e {
24824 if let (Some(Expression::Literal(lit)), Some(ref unit_spec)) =
24825 (&iv.this, &iv.unit)
24826 {
24827 if let Literal::String(ref val) = lit.as_ref() {
24828 let unit_str = match unit_spec {
24829 crate::expressions::IntervalUnitSpec::Simple {
24830 unit, ..
24831 } => match unit {
24832 crate::expressions::IntervalUnit::Year => "YEAR",
24833 crate::expressions::IntervalUnit::Quarter => "QUARTER",
24834 crate::expressions::IntervalUnit::Month => "MONTH",
24835 crate::expressions::IntervalUnit::Week => "WEEK",
24836 crate::expressions::IntervalUnit::Day => "DAY",
24837 crate::expressions::IntervalUnit::Hour => "HOUR",
24838 crate::expressions::IntervalUnit::Minute => "MINUTE",
24839 crate::expressions::IntervalUnit::Second => "SECOND",
24840 crate::expressions::IntervalUnit::Millisecond => {
24841 "MILLISECOND"
24842 }
24843 crate::expressions::IntervalUnit::Microsecond => {
24844 "MICROSECOND"
24845 }
24846 crate::expressions::IntervalUnit::Nanosecond => {
24847 "NANOSECOND"
24848 }
24849 },
24850 _ => "",
24851 };
24852 if !unit_str.is_empty() {
24853 let combined = format!("{} {}", val, unit_str);
24854 iv.this = Some(Expression::Literal(Box::new(Literal::String(
24855 combined,
24856 ))));
24857 iv.unit = None;
24858 }
24859 }
24860 }
24861 Ok(Expression::Interval(iv))
24862 } else {
24863 Ok(e)
24864 }
24865 }
24866
24867 Action::ArrayConcatBracketConvert => {
24868 // Expression::Array/ArrayFunc -> target-specific
24869 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
24870 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
24871 match e {
24872 Expression::Array(arr) => {
24873 if matches!(target, DialectType::Redshift) {
24874 Ok(Expression::Function(Box::new(Function::new(
24875 "ARRAY".to_string(),
24876 arr.expressions,
24877 ))))
24878 } else {
24879 Ok(Expression::ArrayFunc(Box::new(
24880 crate::expressions::ArrayConstructor {
24881 expressions: arr.expressions,
24882 bracket_notation: false,
24883 use_list_keyword: false,
24884 },
24885 )))
24886 }
24887 }
24888 Expression::ArrayFunc(arr) => {
24889 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
24890 if matches!(target, DialectType::Redshift) {
24891 Ok(Expression::Function(Box::new(Function::new(
24892 "ARRAY".to_string(),
24893 arr.expressions,
24894 ))))
24895 } else {
24896 Ok(Expression::ArrayFunc(arr))
24897 }
24898 }
24899 _ => Ok(e),
24900 }
24901 }
24902
24903 Action::BitAggFloatCast => {
24904 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
24905 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
24906 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
24907 let int_type = DataType::Int {
24908 length: None,
24909 integer_spelling: false,
24910 };
24911 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
24912 if let Expression::Cast(c) = agg_this {
24913 match &c.to {
24914 DataType::Float { .. }
24915 | DataType::Double { .. }
24916 | DataType::Custom { .. } => {
24917 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
24918 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
24919 let inner_type = match &c.to {
24920 DataType::Float {
24921 precision, scale, ..
24922 } => DataType::Float {
24923 precision: *precision,
24924 scale: *scale,
24925 real_spelling: true,
24926 },
24927 other => other.clone(),
24928 };
24929 let inner_cast =
24930 Expression::Cast(Box::new(crate::expressions::Cast {
24931 this: c.this.clone(),
24932 to: inner_type,
24933 trailing_comments: Vec::new(),
24934 double_colon_syntax: false,
24935 format: None,
24936 default: None,
24937 inferred_type: None,
24938 }));
24939 let rounded = Expression::Function(Box::new(Function::new(
24940 "ROUND".to_string(),
24941 vec![inner_cast],
24942 )));
24943 Expression::Cast(Box::new(crate::expressions::Cast {
24944 this: rounded,
24945 to: int_dt,
24946 trailing_comments: Vec::new(),
24947 double_colon_syntax: false,
24948 format: None,
24949 default: None,
24950 inferred_type: None,
24951 }))
24952 }
24953 DataType::Decimal { .. } => {
24954 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
24955 Expression::Cast(Box::new(crate::expressions::Cast {
24956 this: Expression::Cast(c),
24957 to: int_dt,
24958 trailing_comments: Vec::new(),
24959 double_colon_syntax: false,
24960 format: None,
24961 default: None,
24962 inferred_type: None,
24963 }))
24964 }
24965 _ => Expression::Cast(c),
24966 }
24967 } else {
24968 agg_this
24969 }
24970 };
24971 match e {
24972 Expression::BitwiseOrAgg(mut f) => {
24973 f.this = wrap_agg(f.this, int_type);
24974 Ok(Expression::BitwiseOrAgg(f))
24975 }
24976 Expression::BitwiseAndAgg(mut f) => {
24977 let int_type = DataType::Int {
24978 length: None,
24979 integer_spelling: false,
24980 };
24981 f.this = wrap_agg(f.this, int_type);
24982 Ok(Expression::BitwiseAndAgg(f))
24983 }
24984 Expression::BitwiseXorAgg(mut f) => {
24985 let int_type = DataType::Int {
24986 length: None,
24987 integer_spelling: false,
24988 };
24989 f.this = wrap_agg(f.this, int_type);
24990 Ok(Expression::BitwiseXorAgg(f))
24991 }
24992 _ => Ok(e),
24993 }
24994 }
24995
24996 Action::BitAggSnowflakeRename => {
24997 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
24998 match e {
24999 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
25000 Function::new("BITORAGG".to_string(), vec![f.this]),
25001 ))),
25002 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
25003 Function::new("BITANDAGG".to_string(), vec![f.this]),
25004 ))),
25005 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
25006 Function::new("BITXORAGG".to_string(), vec![f.this]),
25007 ))),
25008 _ => Ok(e),
25009 }
25010 }
25011
25012 Action::StrftimeCastTimestamp => {
25013 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
25014 if let Expression::Cast(mut c) = e {
25015 if matches!(
25016 c.to,
25017 DataType::Timestamp {
25018 timezone: false,
25019 ..
25020 }
25021 ) {
25022 c.to = DataType::Custom {
25023 name: "TIMESTAMP_NTZ".to_string(),
25024 };
25025 }
25026 Ok(Expression::Cast(c))
25027 } else {
25028 Ok(e)
25029 }
25030 }
25031
25032 Action::DecimalDefaultPrecision => {
25033 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
25034 if let Expression::Cast(mut c) = e {
25035 if matches!(
25036 c.to,
25037 DataType::Decimal {
25038 precision: None,
25039 ..
25040 }
25041 ) {
25042 c.to = DataType::Decimal {
25043 precision: Some(18),
25044 scale: Some(3),
25045 };
25046 }
25047 Ok(Expression::Cast(c))
25048 } else {
25049 Ok(e)
25050 }
25051 }
25052
25053 Action::FilterToIff => {
25054 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
25055 if let Expression::Filter(f) = e {
25056 let condition = *f.expression;
25057 let agg = *f.this;
25058 // Strip WHERE from condition
25059 let cond = match condition {
25060 Expression::Where(w) => w.this,
25061 other => other,
25062 };
25063 // Extract the aggregate function and its argument
25064 // We want AVG(IFF(condition, x, NULL))
25065 match agg {
25066 Expression::Function(mut func) => {
25067 if !func.args.is_empty() {
25068 let orig_arg = func.args[0].clone();
25069 let iff_call = Expression::Function(Box::new(Function::new(
25070 "IFF".to_string(),
25071 vec![cond, orig_arg, Expression::Null(Null)],
25072 )));
25073 func.args[0] = iff_call;
25074 Ok(Expression::Function(func))
25075 } else {
25076 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
25077 this: Box::new(Expression::Function(func)),
25078 expression: Box::new(cond),
25079 })))
25080 }
25081 }
25082 Expression::Avg(mut avg) => {
25083 let iff_call = Expression::Function(Box::new(Function::new(
25084 "IFF".to_string(),
25085 vec![cond, avg.this.clone(), Expression::Null(Null)],
25086 )));
25087 avg.this = iff_call;
25088 Ok(Expression::Avg(avg))
25089 }
25090 Expression::Sum(mut s) => {
25091 let iff_call = Expression::Function(Box::new(Function::new(
25092 "IFF".to_string(),
25093 vec![cond, s.this.clone(), Expression::Null(Null)],
25094 )));
25095 s.this = iff_call;
25096 Ok(Expression::Sum(s))
25097 }
25098 Expression::Count(mut c) => {
25099 if let Some(ref this_expr) = c.this {
25100 let iff_call = Expression::Function(Box::new(Function::new(
25101 "IFF".to_string(),
25102 vec![cond, this_expr.clone(), Expression::Null(Null)],
25103 )));
25104 c.this = Some(iff_call);
25105 }
25106 Ok(Expression::Count(c))
25107 }
25108 other => {
25109 // Fallback: keep as Filter
25110 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
25111 this: Box::new(other),
25112 expression: Box::new(cond),
25113 })))
25114 }
25115 }
25116 } else {
25117 Ok(e)
25118 }
25119 }
25120
25121 Action::AggFilterToIff => {
25122 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
25123 // Helper macro to handle the common AggFunc case
25124 macro_rules! handle_agg_filter_to_iff {
25125 ($variant:ident, $agg:expr) => {{
25126 let mut agg = $agg;
25127 if let Some(filter_cond) = agg.filter.take() {
25128 let iff_call = Expression::Function(Box::new(Function::new(
25129 "IFF".to_string(),
25130 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
25131 )));
25132 agg.this = iff_call;
25133 }
25134 Ok(Expression::$variant(agg))
25135 }};
25136 }
25137
25138 match e {
25139 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
25140 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
25141 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
25142 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
25143 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
25144 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
25145 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
25146 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
25147 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
25148 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
25149 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
25150 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
25151 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
25152 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
25153 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
25154 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
25155 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
25156 Expression::ApproxDistinct(agg) => {
25157 handle_agg_filter_to_iff!(ApproxDistinct, agg)
25158 }
25159 Expression::Count(mut c) => {
25160 if let Some(filter_cond) = c.filter.take() {
25161 if let Some(ref this_expr) = c.this {
25162 let iff_call = Expression::Function(Box::new(Function::new(
25163 "IFF".to_string(),
25164 vec![
25165 filter_cond,
25166 this_expr.clone(),
25167 Expression::Null(Null),
25168 ],
25169 )));
25170 c.this = Some(iff_call);
25171 }
25172 }
25173 Ok(Expression::Count(c))
25174 }
25175 other => Ok(other),
25176 }
25177 }
25178
25179 Action::JsonToGetPath => {
25180 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
25181 if let Expression::JsonExtract(je) = e {
25182 // Convert to PARSE_JSON() wrapper:
25183 // - JSON(x) -> PARSE_JSON(x)
25184 // - PARSE_JSON(x) -> keep as-is
25185 // - anything else -> wrap in PARSE_JSON()
25186 let this = match &je.this {
25187 Expression::Function(f)
25188 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
25189 {
25190 Expression::Function(Box::new(Function::new(
25191 "PARSE_JSON".to_string(),
25192 f.args.clone(),
25193 )))
25194 }
25195 Expression::Function(f)
25196 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
25197 {
25198 je.this.clone()
25199 }
25200 // GET_PATH result is already JSON, don't wrap
25201 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
25202 je.this.clone()
25203 }
25204 other => {
25205 // Wrap non-JSON expressions in PARSE_JSON()
25206 Expression::Function(Box::new(Function::new(
25207 "PARSE_JSON".to_string(),
25208 vec![other.clone()],
25209 )))
25210 }
25211 };
25212 // Convert path: extract key from JSONPath or strip $. prefix from string
25213 let path = match &je.path {
25214 Expression::JSONPath(jp) => {
25215 // Extract the key from JSONPath: $root.key -> 'key'
25216 let mut key_parts = Vec::new();
25217 for expr in &jp.expressions {
25218 match expr {
25219 Expression::JSONPathRoot(_) => {} // skip root
25220 Expression::JSONPathKey(k) => {
25221 if let Expression::Literal(lit) = &*k.this {
25222 if let Literal::String(s) = lit.as_ref() {
25223 key_parts.push(s.clone());
25224 }
25225 }
25226 }
25227 _ => {}
25228 }
25229 }
25230 if !key_parts.is_empty() {
25231 Expression::Literal(Box::new(Literal::String(
25232 key_parts.join("."),
25233 )))
25234 } else {
25235 je.path.clone()
25236 }
25237 }
25238 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with("$.")) =>
25239 {
25240 let Literal::String(s) = lit.as_ref() else {
25241 unreachable!()
25242 };
25243 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
25244 Expression::Literal(Box::new(Literal::String(stripped)))
25245 }
25246 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$')) =>
25247 {
25248 let Literal::String(s) = lit.as_ref() else {
25249 unreachable!()
25250 };
25251 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
25252 Expression::Literal(Box::new(Literal::String(stripped)))
25253 }
25254 _ => je.path.clone(),
25255 };
25256 Ok(Expression::Function(Box::new(Function::new(
25257 "GET_PATH".to_string(),
25258 vec![this, path],
25259 ))))
25260 } else {
25261 Ok(e)
25262 }
25263 }
25264
25265 Action::StructToRow => {
25266 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
25267 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
25268
25269 // Extract key-value pairs from either Struct or MapFunc
25270 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
25271 Expression::Struct(s) => Some(
25272 s.fields
25273 .iter()
25274 .map(|(opt_name, field_expr)| {
25275 if let Some(name) = opt_name {
25276 (name.clone(), field_expr.clone())
25277 } else if let Expression::NamedArgument(na) = field_expr {
25278 (na.name.name.clone(), na.value.clone())
25279 } else {
25280 (String::new(), field_expr.clone())
25281 }
25282 })
25283 .collect(),
25284 ),
25285 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
25286 m.keys
25287 .iter()
25288 .zip(m.values.iter())
25289 .map(|(key, value)| {
25290 let key_name = match key {
25291 Expression::Literal(lit)
25292 if matches!(lit.as_ref(), Literal::String(_)) =>
25293 {
25294 let Literal::String(s) = lit.as_ref() else {
25295 unreachable!()
25296 };
25297 s.clone()
25298 }
25299 Expression::Identifier(id) => id.name.clone(),
25300 _ => String::new(),
25301 };
25302 (key_name, value.clone())
25303 })
25304 .collect(),
25305 ),
25306 _ => None,
25307 };
25308
25309 if let Some(pairs) = kv_pairs {
25310 let mut named_args = Vec::new();
25311 for (key_name, value) in pairs {
25312 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
25313 named_args.push(Expression::Alias(Box::new(
25314 crate::expressions::Alias::new(
25315 value,
25316 Identifier::new(key_name),
25317 ),
25318 )));
25319 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
25320 named_args.push(value);
25321 } else {
25322 named_args.push(value);
25323 }
25324 }
25325
25326 if matches!(target, DialectType::BigQuery) {
25327 Ok(Expression::Function(Box::new(Function::new(
25328 "STRUCT".to_string(),
25329 named_args,
25330 ))))
25331 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
25332 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
25333 let row_func = Expression::Function(Box::new(Function::new(
25334 "ROW".to_string(),
25335 named_args,
25336 )));
25337
25338 // Try to infer types for each pair
25339 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
25340 Expression::Struct(s) => Some(
25341 s.fields
25342 .iter()
25343 .map(|(opt_name, field_expr)| {
25344 if let Some(name) = opt_name {
25345 (name.clone(), field_expr.clone())
25346 } else if let Expression::NamedArgument(na) = field_expr
25347 {
25348 (na.name.name.clone(), na.value.clone())
25349 } else {
25350 (String::new(), field_expr.clone())
25351 }
25352 })
25353 .collect(),
25354 ),
25355 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
25356 m.keys
25357 .iter()
25358 .zip(m.values.iter())
25359 .map(|(key, value)| {
25360 let key_name = match key {
25361 Expression::Literal(lit)
25362 if matches!(
25363 lit.as_ref(),
25364 Literal::String(_)
25365 ) =>
25366 {
25367 let Literal::String(s) = lit.as_ref() else {
25368 unreachable!()
25369 };
25370 s.clone()
25371 }
25372 Expression::Identifier(id) => id.name.clone(),
25373 _ => String::new(),
25374 };
25375 (key_name, value.clone())
25376 })
25377 .collect(),
25378 ),
25379 _ => None,
25380 };
25381
25382 if let Some(pairs) = kv_pairs_again {
25383 // Infer types for all values
25384 let mut all_inferred = true;
25385 let mut fields = Vec::new();
25386 for (name, value) in &pairs {
25387 let inferred_type = match value {
25388 Expression::Literal(lit)
25389 if matches!(lit.as_ref(), Literal::Number(_)) =>
25390 {
25391 let Literal::Number(n) = lit.as_ref() else {
25392 unreachable!()
25393 };
25394 if n.contains('.') {
25395 Some(DataType::Double {
25396 precision: None,
25397 scale: None,
25398 })
25399 } else {
25400 Some(DataType::Int {
25401 length: None,
25402 integer_spelling: true,
25403 })
25404 }
25405 }
25406 Expression::Literal(lit)
25407 if matches!(lit.as_ref(), Literal::String(_)) =>
25408 {
25409 Some(DataType::VarChar {
25410 length: None,
25411 parenthesized_length: false,
25412 })
25413 }
25414 Expression::Boolean(_) => Some(DataType::Boolean),
25415 _ => None,
25416 };
25417 if let Some(dt) = inferred_type {
25418 fields.push(crate::expressions::StructField::new(
25419 name.clone(),
25420 dt,
25421 ));
25422 } else {
25423 all_inferred = false;
25424 break;
25425 }
25426 }
25427
25428 if all_inferred && !fields.is_empty() {
25429 let row_type = DataType::Struct {
25430 fields,
25431 nested: true,
25432 };
25433 Ok(Expression::Cast(Box::new(Cast {
25434 this: row_func,
25435 to: row_type,
25436 trailing_comments: Vec::new(),
25437 double_colon_syntax: false,
25438 format: None,
25439 default: None,
25440 inferred_type: None,
25441 })))
25442 } else {
25443 Ok(row_func)
25444 }
25445 } else {
25446 Ok(row_func)
25447 }
25448 } else {
25449 Ok(Expression::Function(Box::new(Function::new(
25450 "ROW".to_string(),
25451 named_args,
25452 ))))
25453 }
25454 } else {
25455 Ok(e)
25456 }
25457 }
25458
25459 Action::SparkStructConvert => {
25460 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
25461 // or DuckDB {'name': val, ...}
25462 if let Expression::Function(f) = e {
25463 // Extract name-value pairs from aliased args
25464 let mut pairs: Vec<(String, Expression)> = Vec::new();
25465 for arg in &f.args {
25466 match arg {
25467 Expression::Alias(a) => {
25468 pairs.push((a.alias.name.clone(), a.this.clone()));
25469 }
25470 _ => {
25471 pairs.push((String::new(), arg.clone()));
25472 }
25473 }
25474 }
25475
25476 match target {
25477 DialectType::DuckDB => {
25478 // Convert to DuckDB struct literal {'name': value, ...}
25479 let mut keys = Vec::new();
25480 let mut values = Vec::new();
25481 for (name, value) in &pairs {
25482 keys.push(Expression::Literal(Box::new(Literal::String(
25483 name.clone(),
25484 ))));
25485 values.push(value.clone());
25486 }
25487 Ok(Expression::MapFunc(Box::new(
25488 crate::expressions::MapConstructor {
25489 keys,
25490 values,
25491 curly_brace_syntax: true,
25492 with_map_keyword: false,
25493 },
25494 )))
25495 }
25496 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25497 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
25498 let row_args: Vec<Expression> =
25499 pairs.iter().map(|(_, v)| v.clone()).collect();
25500 let row_func = Expression::Function(Box::new(Function::new(
25501 "ROW".to_string(),
25502 row_args,
25503 )));
25504
25505 // Infer types
25506 let mut all_inferred = true;
25507 let mut fields = Vec::new();
25508 for (name, value) in &pairs {
25509 let inferred_type = match value {
25510 Expression::Literal(lit)
25511 if matches!(lit.as_ref(), Literal::Number(_)) =>
25512 {
25513 let Literal::Number(n) = lit.as_ref() else {
25514 unreachable!()
25515 };
25516 if n.contains('.') {
25517 Some(DataType::Double {
25518 precision: None,
25519 scale: None,
25520 })
25521 } else {
25522 Some(DataType::Int {
25523 length: None,
25524 integer_spelling: true,
25525 })
25526 }
25527 }
25528 Expression::Literal(lit)
25529 if matches!(lit.as_ref(), Literal::String(_)) =>
25530 {
25531 Some(DataType::VarChar {
25532 length: None,
25533 parenthesized_length: false,
25534 })
25535 }
25536 Expression::Boolean(_) => Some(DataType::Boolean),
25537 _ => None,
25538 };
25539 if let Some(dt) = inferred_type {
25540 fields.push(crate::expressions::StructField::new(
25541 name.clone(),
25542 dt,
25543 ));
25544 } else {
25545 all_inferred = false;
25546 break;
25547 }
25548 }
25549
25550 if all_inferred && !fields.is_empty() {
25551 let row_type = DataType::Struct {
25552 fields,
25553 nested: true,
25554 };
25555 Ok(Expression::Cast(Box::new(Cast {
25556 this: row_func,
25557 to: row_type,
25558 trailing_comments: Vec::new(),
25559 double_colon_syntax: false,
25560 format: None,
25561 default: None,
25562 inferred_type: None,
25563 })))
25564 } else {
25565 Ok(row_func)
25566 }
25567 }
25568 _ => Ok(Expression::Function(f)),
25569 }
25570 } else {
25571 Ok(e)
25572 }
25573 }
25574
25575 Action::ApproxCountDistinctToApproxDistinct => {
25576 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
25577 if let Expression::ApproxCountDistinct(f) = e {
25578 Ok(Expression::ApproxDistinct(f))
25579 } else {
25580 Ok(e)
25581 }
25582 }
25583
25584 Action::CollectListToArrayAgg => {
25585 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
25586 if let Expression::AggregateFunction(f) = e {
25587 let filter_expr = if !f.args.is_empty() {
25588 let arg = f.args[0].clone();
25589 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
25590 this: arg,
25591 not: true,
25592 postfix_form: false,
25593 })))
25594 } else {
25595 None
25596 };
25597 let agg = crate::expressions::AggFunc {
25598 this: if f.args.is_empty() {
25599 Expression::Null(crate::expressions::Null)
25600 } else {
25601 f.args[0].clone()
25602 },
25603 distinct: f.distinct,
25604 order_by: f.order_by.clone(),
25605 filter: filter_expr,
25606 ignore_nulls: None,
25607 name: None,
25608 having_max: None,
25609 limit: None,
25610 inferred_type: None,
25611 };
25612 Ok(Expression::ArrayAgg(Box::new(agg)))
25613 } else {
25614 Ok(e)
25615 }
25616 }
25617
25618 Action::CollectSetConvert => {
25619 // COLLECT_SET(x) -> target-specific
25620 if let Expression::AggregateFunction(f) = e {
25621 match target {
25622 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
25623 crate::expressions::AggregateFunction {
25624 name: "SET_AGG".to_string(),
25625 args: f.args,
25626 distinct: false,
25627 order_by: f.order_by,
25628 filter: f.filter,
25629 limit: f.limit,
25630 ignore_nulls: f.ignore_nulls,
25631 inferred_type: None,
25632 },
25633 ))),
25634 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
25635 crate::expressions::AggregateFunction {
25636 name: "ARRAY_UNIQUE_AGG".to_string(),
25637 args: f.args,
25638 distinct: false,
25639 order_by: f.order_by,
25640 filter: f.filter,
25641 limit: f.limit,
25642 ignore_nulls: f.ignore_nulls,
25643 inferred_type: None,
25644 },
25645 ))),
25646 DialectType::Trino | DialectType::DuckDB => {
25647 let agg = crate::expressions::AggFunc {
25648 this: if f.args.is_empty() {
25649 Expression::Null(crate::expressions::Null)
25650 } else {
25651 f.args[0].clone()
25652 },
25653 distinct: true,
25654 order_by: Vec::new(),
25655 filter: None,
25656 ignore_nulls: None,
25657 name: None,
25658 having_max: None,
25659 limit: None,
25660 inferred_type: None,
25661 };
25662 Ok(Expression::ArrayAgg(Box::new(agg)))
25663 }
25664 _ => Ok(Expression::AggregateFunction(f)),
25665 }
25666 } else {
25667 Ok(e)
25668 }
25669 }
25670
25671 Action::PercentileConvert => {
25672 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
25673 if let Expression::AggregateFunction(f) = e {
25674 let name = match target {
25675 DialectType::DuckDB => "QUANTILE",
25676 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
25677 _ => "PERCENTILE",
25678 };
25679 Ok(Expression::AggregateFunction(Box::new(
25680 crate::expressions::AggregateFunction {
25681 name: name.to_string(),
25682 args: f.args,
25683 distinct: f.distinct,
25684 order_by: f.order_by,
25685 filter: f.filter,
25686 limit: f.limit,
25687 ignore_nulls: f.ignore_nulls,
25688 inferred_type: None,
25689 },
25690 )))
25691 } else {
25692 Ok(e)
25693 }
25694 }
25695
25696 Action::CorrIsnanWrap => {
25697 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
25698 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
25699 let corr_clone = e.clone();
25700 let isnan = Expression::Function(Box::new(Function::new(
25701 "ISNAN".to_string(),
25702 vec![corr_clone.clone()],
25703 )));
25704 let case_expr = Expression::Case(Box::new(Case {
25705 operand: None,
25706 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
25707 else_: Some(corr_clone),
25708 comments: Vec::new(),
25709 inferred_type: None,
25710 }));
25711 Ok(case_expr)
25712 }
25713
25714 Action::TruncToDateTrunc => {
25715 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
25716 if let Expression::Function(f) = e {
25717 if f.args.len() == 2 {
25718 let timestamp = f.args[0].clone();
25719 let unit_expr = f.args[1].clone();
25720
25721 if matches!(target, DialectType::ClickHouse) {
25722 // For ClickHouse, produce Expression::DateTrunc which the generator
25723 // outputs as DATE_TRUNC(...) without going through the ClickHouse
25724 // target transform that would convert it to dateTrunc
25725 let unit_str = Self::get_unit_str_static(&unit_expr);
25726 let dt_field = match unit_str.as_str() {
25727 "YEAR" => DateTimeField::Year,
25728 "MONTH" => DateTimeField::Month,
25729 "DAY" => DateTimeField::Day,
25730 "HOUR" => DateTimeField::Hour,
25731 "MINUTE" => DateTimeField::Minute,
25732 "SECOND" => DateTimeField::Second,
25733 "WEEK" => DateTimeField::Week,
25734 "QUARTER" => DateTimeField::Quarter,
25735 _ => DateTimeField::Custom(unit_str),
25736 };
25737 Ok(Expression::DateTrunc(Box::new(
25738 crate::expressions::DateTruncFunc {
25739 this: timestamp,
25740 unit: dt_field,
25741 },
25742 )))
25743 } else {
25744 let new_args = vec![unit_expr, timestamp];
25745 Ok(Expression::Function(Box::new(Function::new(
25746 "DATE_TRUNC".to_string(),
25747 new_args,
25748 ))))
25749 }
25750 } else {
25751 Ok(Expression::Function(f))
25752 }
25753 } else {
25754 Ok(e)
25755 }
25756 }
25757
25758 Action::ArrayContainsConvert => {
25759 if let Expression::ArrayContains(f) = e {
25760 match target {
25761 DialectType::Presto | DialectType::Trino => {
25762 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
25763 Ok(Expression::Function(Box::new(Function::new(
25764 "CONTAINS".to_string(),
25765 vec![f.this, f.expression],
25766 ))))
25767 }
25768 DialectType::Snowflake => {
25769 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
25770 let cast_val =
25771 Expression::Cast(Box::new(crate::expressions::Cast {
25772 this: f.expression,
25773 to: crate::expressions::DataType::Custom {
25774 name: "VARIANT".to_string(),
25775 },
25776 trailing_comments: Vec::new(),
25777 double_colon_syntax: false,
25778 format: None,
25779 default: None,
25780 inferred_type: None,
25781 }));
25782 Ok(Expression::Function(Box::new(Function::new(
25783 "ARRAY_CONTAINS".to_string(),
25784 vec![cast_val, f.this],
25785 ))))
25786 }
25787 _ => Ok(Expression::ArrayContains(f)),
25788 }
25789 } else {
25790 Ok(e)
25791 }
25792 }
25793
25794 Action::ArrayExceptConvert => {
25795 if let Expression::ArrayExcept(f) = e {
25796 let source_arr = f.this;
25797 let exclude_arr = f.expression;
25798 match target {
25799 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
25800 // Snowflake ARRAY_EXCEPT -> DuckDB bag semantics:
25801 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
25802 // ELSE LIST_TRANSFORM(LIST_FILTER(
25803 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source))),
25804 // pair -> (LENGTH(LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1]))
25805 // > LENGTH(LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])))),
25806 // pair -> pair[1])
25807 // END
25808
25809 // Build null check
25810 let source_is_null =
25811 Expression::IsNull(Box::new(crate::expressions::IsNull {
25812 this: source_arr.clone(),
25813 not: false,
25814 postfix_form: false,
25815 }));
25816 let exclude_is_null =
25817 Expression::IsNull(Box::new(crate::expressions::IsNull {
25818 this: exclude_arr.clone(),
25819 not: false,
25820 postfix_form: false,
25821 }));
25822 let null_check =
25823 Expression::Or(Box::new(crate::expressions::BinaryOp {
25824 left: source_is_null,
25825 right: exclude_is_null,
25826 left_comments: vec![],
25827 operator_comments: vec![],
25828 trailing_comments: vec![],
25829 inferred_type: None,
25830 }));
25831
25832 // GENERATE_SERIES(1, LENGTH(source))
25833 let gen_series = Expression::Function(Box::new(Function::new(
25834 "GENERATE_SERIES".to_string(),
25835 vec![
25836 Expression::number(1),
25837 Expression::Function(Box::new(Function::new(
25838 "LENGTH".to_string(),
25839 vec![source_arr.clone()],
25840 ))),
25841 ],
25842 )));
25843
25844 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source)))
25845 let list_zip = Expression::Function(Box::new(Function::new(
25846 "LIST_ZIP".to_string(),
25847 vec![source_arr.clone(), gen_series],
25848 )));
25849
25850 // pair[1] and pair[2]
25851 let pair_col = Expression::column("pair");
25852 let pair_1 = Expression::Subscript(Box::new(
25853 crate::expressions::Subscript {
25854 this: pair_col.clone(),
25855 index: Expression::number(1),
25856 },
25857 ));
25858 let pair_2 = Expression::Subscript(Box::new(
25859 crate::expressions::Subscript {
25860 this: pair_col.clone(),
25861 index: Expression::number(2),
25862 },
25863 ));
25864
25865 // source[1:pair[2]]
25866 let source_slice = Expression::ArraySlice(Box::new(
25867 crate::expressions::ArraySlice {
25868 this: source_arr.clone(),
25869 start: Some(Expression::number(1)),
25870 end: Some(pair_2),
25871 },
25872 ));
25873
25874 let e_col = Expression::column("e");
25875
25876 // e -> e IS NOT DISTINCT FROM pair[1]
25877 let inner_lambda1 =
25878 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25879 parameters: vec![crate::expressions::Identifier::new("e")],
25880 body: Expression::NullSafeEq(Box::new(
25881 crate::expressions::BinaryOp {
25882 left: e_col.clone(),
25883 right: pair_1.clone(),
25884 left_comments: vec![],
25885 operator_comments: vec![],
25886 trailing_comments: vec![],
25887 inferred_type: None,
25888 },
25889 )),
25890 colon: false,
25891 parameter_types: vec![],
25892 }));
25893
25894 // LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
25895 let inner_filter1 = Expression::Function(Box::new(Function::new(
25896 "LIST_FILTER".to_string(),
25897 vec![source_slice, inner_lambda1],
25898 )));
25899
25900 // LENGTH(LIST_FILTER(source[1:pair[2]], ...))
25901 let len1 = Expression::Function(Box::new(Function::new(
25902 "LENGTH".to_string(),
25903 vec![inner_filter1],
25904 )));
25905
25906 // e -> e IS NOT DISTINCT FROM pair[1]
25907 let inner_lambda2 =
25908 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25909 parameters: vec![crate::expressions::Identifier::new("e")],
25910 body: Expression::NullSafeEq(Box::new(
25911 crate::expressions::BinaryOp {
25912 left: e_col,
25913 right: pair_1.clone(),
25914 left_comments: vec![],
25915 operator_comments: vec![],
25916 trailing_comments: vec![],
25917 inferred_type: None,
25918 },
25919 )),
25920 colon: false,
25921 parameter_types: vec![],
25922 }));
25923
25924 // LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])
25925 let inner_filter2 = Expression::Function(Box::new(Function::new(
25926 "LIST_FILTER".to_string(),
25927 vec![exclude_arr.clone(), inner_lambda2],
25928 )));
25929
25930 // LENGTH(LIST_FILTER(exclude, ...))
25931 let len2 = Expression::Function(Box::new(Function::new(
25932 "LENGTH".to_string(),
25933 vec![inner_filter2],
25934 )));
25935
25936 // (LENGTH(...) > LENGTH(...))
25937 let cond = Expression::Paren(Box::new(Paren {
25938 this: Expression::Gt(Box::new(crate::expressions::BinaryOp {
25939 left: len1,
25940 right: len2,
25941 left_comments: vec![],
25942 operator_comments: vec![],
25943 trailing_comments: vec![],
25944 inferred_type: None,
25945 })),
25946 trailing_comments: vec![],
25947 }));
25948
25949 // pair -> (condition)
25950 let filter_lambda =
25951 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25952 parameters: vec![crate::expressions::Identifier::new(
25953 "pair",
25954 )],
25955 body: cond,
25956 colon: false,
25957 parameter_types: vec![],
25958 }));
25959
25960 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
25961 let outer_filter = Expression::Function(Box::new(Function::new(
25962 "LIST_FILTER".to_string(),
25963 vec![list_zip, filter_lambda],
25964 )));
25965
25966 // pair -> pair[1]
25967 let transform_lambda =
25968 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25969 parameters: vec![crate::expressions::Identifier::new(
25970 "pair",
25971 )],
25972 body: pair_1,
25973 colon: false,
25974 parameter_types: vec![],
25975 }));
25976
25977 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
25978 let list_transform = Expression::Function(Box::new(Function::new(
25979 "LIST_TRANSFORM".to_string(),
25980 vec![outer_filter, transform_lambda],
25981 )));
25982
25983 Ok(Expression::Case(Box::new(Case {
25984 operand: None,
25985 whens: vec![(null_check, Expression::Null(Null))],
25986 else_: Some(list_transform),
25987 comments: Vec::new(),
25988 inferred_type: None,
25989 })))
25990 }
25991 DialectType::DuckDB => {
25992 // ARRAY_EXCEPT(source, exclude) -> set semantics for DuckDB:
25993 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
25994 // ELSE LIST_FILTER(LIST_DISTINCT(source),
25995 // e -> LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)) = 0)
25996 // END
25997
25998 // Build: source IS NULL
25999 let source_is_null =
26000 Expression::IsNull(Box::new(crate::expressions::IsNull {
26001 this: source_arr.clone(),
26002 not: false,
26003 postfix_form: false,
26004 }));
26005 // Build: exclude IS NULL
26006 let exclude_is_null =
26007 Expression::IsNull(Box::new(crate::expressions::IsNull {
26008 this: exclude_arr.clone(),
26009 not: false,
26010 postfix_form: false,
26011 }));
26012 // source IS NULL OR exclude IS NULL
26013 let null_check =
26014 Expression::Or(Box::new(crate::expressions::BinaryOp {
26015 left: source_is_null,
26016 right: exclude_is_null,
26017 left_comments: vec![],
26018 operator_comments: vec![],
26019 trailing_comments: vec![],
26020 inferred_type: None,
26021 }));
26022
26023 // LIST_DISTINCT(source)
26024 let list_distinct = Expression::Function(Box::new(Function::new(
26025 "LIST_DISTINCT".to_string(),
26026 vec![source_arr.clone()],
26027 )));
26028
26029 // x IS NOT DISTINCT FROM e
26030 let x_col = Expression::column("x");
26031 let e_col = Expression::column("e");
26032 let is_not_distinct = Expression::NullSafeEq(Box::new(
26033 crate::expressions::BinaryOp {
26034 left: x_col,
26035 right: e_col.clone(),
26036 left_comments: vec![],
26037 operator_comments: vec![],
26038 trailing_comments: vec![],
26039 inferred_type: None,
26040 },
26041 ));
26042
26043 // x -> x IS NOT DISTINCT FROM e
26044 let inner_lambda =
26045 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26046 parameters: vec![crate::expressions::Identifier::new("x")],
26047 body: is_not_distinct,
26048 colon: false,
26049 parameter_types: vec![],
26050 }));
26051
26052 // LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)
26053 let inner_list_filter =
26054 Expression::Function(Box::new(Function::new(
26055 "LIST_FILTER".to_string(),
26056 vec![exclude_arr.clone(), inner_lambda],
26057 )));
26058
26059 // LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e))
26060 let len_inner = Expression::Function(Box::new(Function::new(
26061 "LENGTH".to_string(),
26062 vec![inner_list_filter],
26063 )));
26064
26065 // LENGTH(...) = 0
26066 let eq_zero =
26067 Expression::Eq(Box::new(crate::expressions::BinaryOp {
26068 left: len_inner,
26069 right: Expression::number(0),
26070 left_comments: vec![],
26071 operator_comments: vec![],
26072 trailing_comments: vec![],
26073 inferred_type: None,
26074 }));
26075
26076 // e -> LENGTH(LIST_FILTER(...)) = 0
26077 let outer_lambda =
26078 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26079 parameters: vec![crate::expressions::Identifier::new("e")],
26080 body: eq_zero,
26081 colon: false,
26082 parameter_types: vec![],
26083 }));
26084
26085 // LIST_FILTER(LIST_DISTINCT(source), e -> ...)
26086 let outer_list_filter =
26087 Expression::Function(Box::new(Function::new(
26088 "LIST_FILTER".to_string(),
26089 vec![list_distinct, outer_lambda],
26090 )));
26091
26092 // CASE WHEN ... IS NULL ... THEN NULL ELSE LIST_FILTER(...) END
26093 Ok(Expression::Case(Box::new(Case {
26094 operand: None,
26095 whens: vec![(null_check, Expression::Null(Null))],
26096 else_: Some(outer_list_filter),
26097 comments: Vec::new(),
26098 inferred_type: None,
26099 })))
26100 }
26101 DialectType::Snowflake => {
26102 // Snowflake: ARRAY_EXCEPT(source, exclude) - keep as-is
26103 Ok(Expression::ArrayExcept(Box::new(
26104 crate::expressions::BinaryFunc {
26105 this: source_arr,
26106 expression: exclude_arr,
26107 original_name: None,
26108 inferred_type: None,
26109 },
26110 )))
26111 }
26112 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26113 // Presto/Trino: ARRAY_EXCEPT(source, exclude) - keep function name, array syntax already converted
26114 Ok(Expression::Function(Box::new(Function::new(
26115 "ARRAY_EXCEPT".to_string(),
26116 vec![source_arr, exclude_arr],
26117 ))))
26118 }
26119 _ => Ok(Expression::ArrayExcept(Box::new(
26120 crate::expressions::BinaryFunc {
26121 this: source_arr,
26122 expression: exclude_arr,
26123 original_name: None,
26124 inferred_type: None,
26125 },
26126 ))),
26127 }
26128 } else {
26129 Ok(e)
26130 }
26131 }
26132
26133 Action::RegexpLikeExasolAnchor => {
26134 // RegexpLike -> Exasol: wrap pattern with .*...*
26135 // Exasol REGEXP_LIKE does full-string match, but RLIKE/REGEXP from other
26136 // dialects does partial match, so we need to anchor with .* on both sides
26137 if let Expression::RegexpLike(mut f) = e {
26138 match &f.pattern {
26139 Expression::Literal(lit)
26140 if matches!(lit.as_ref(), Literal::String(_)) =>
26141 {
26142 let Literal::String(s) = lit.as_ref() else {
26143 unreachable!()
26144 };
26145 // String literal: wrap with .*...*
26146 f.pattern = Expression::Literal(Box::new(Literal::String(
26147 format!(".*{}.*", s),
26148 )));
26149 }
26150 _ => {
26151 // Non-literal: wrap with CONCAT('.*', pattern, '.*')
26152 f.pattern =
26153 Expression::Paren(Box::new(crate::expressions::Paren {
26154 this: Expression::Concat(Box::new(
26155 crate::expressions::BinaryOp {
26156 left: Expression::Concat(Box::new(
26157 crate::expressions::BinaryOp {
26158 left: Expression::Literal(Box::new(
26159 Literal::String(".*".to_string()),
26160 )),
26161 right: f.pattern,
26162 left_comments: vec![],
26163 operator_comments: vec![],
26164 trailing_comments: vec![],
26165 inferred_type: None,
26166 },
26167 )),
26168 right: Expression::Literal(Box::new(
26169 Literal::String(".*".to_string()),
26170 )),
26171 left_comments: vec![],
26172 operator_comments: vec![],
26173 trailing_comments: vec![],
26174 inferred_type: None,
26175 },
26176 )),
26177 trailing_comments: vec![],
26178 }));
26179 }
26180 }
26181 Ok(Expression::RegexpLike(f))
26182 } else {
26183 Ok(e)
26184 }
26185 }
26186
26187 Action::ArrayPositionSnowflakeSwap => {
26188 // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
26189 if let Expression::ArrayPosition(f) = e {
26190 Ok(Expression::ArrayPosition(Box::new(
26191 crate::expressions::BinaryFunc {
26192 this: f.expression,
26193 expression: f.this,
26194 original_name: f.original_name,
26195 inferred_type: f.inferred_type,
26196 },
26197 )))
26198 } else {
26199 Ok(e)
26200 }
26201 }
26202
26203 Action::SnowflakeArrayPositionToDuckDB => {
26204 // Snowflake ARRAY_POSITION(value, array) -> DuckDB ARRAY_POSITION(array, value) - 1
26205 // Snowflake uses 0-based indexing, DuckDB uses 1-based
26206 // The parser has this=value, expression=array (Snowflake order)
26207 if let Expression::ArrayPosition(f) = e {
26208 // Create ARRAY_POSITION(array, value) in standard order
26209 let standard_pos =
26210 Expression::ArrayPosition(Box::new(crate::expressions::BinaryFunc {
26211 this: f.expression, // array
26212 expression: f.this, // value
26213 original_name: f.original_name,
26214 inferred_type: f.inferred_type,
26215 }));
26216 // Subtract 1 for zero-based indexing
26217 Ok(Expression::Sub(Box::new(BinaryOp {
26218 left: standard_pos,
26219 right: Expression::number(1),
26220 left_comments: vec![],
26221 operator_comments: vec![],
26222 trailing_comments: vec![],
26223 inferred_type: None,
26224 })))
26225 } else {
26226 Ok(e)
26227 }
26228 }
26229
26230 Action::ArrayDistinctConvert => {
26231 // ARRAY_DISTINCT(arr) -> DuckDB NULL-aware CASE:
26232 // CASE WHEN ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
26233 // THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(arr, _u -> NOT _u IS NULL)), NULL)
26234 // ELSE LIST_DISTINCT(arr)
26235 // END
26236 if let Expression::ArrayDistinct(f) = e {
26237 let arr = f.this;
26238
26239 // ARRAY_LENGTH(arr)
26240 let array_length = Expression::Function(Box::new(Function::new(
26241 "ARRAY_LENGTH".to_string(),
26242 vec![arr.clone()],
26243 )));
26244 // LIST_COUNT(arr)
26245 let list_count = Expression::Function(Box::new(Function::new(
26246 "LIST_COUNT".to_string(),
26247 vec![arr.clone()],
26248 )));
26249 // ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
26250 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
26251 left: array_length,
26252 right: list_count,
26253 left_comments: vec![],
26254 operator_comments: vec![],
26255 trailing_comments: vec![],
26256 inferred_type: None,
26257 }));
26258
26259 // _u column
26260 let u_col = Expression::column("_u");
26261 // NOT _u IS NULL
26262 let u_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
26263 this: u_col.clone(),
26264 not: false,
26265 postfix_form: false,
26266 }));
26267 let not_u_is_null =
26268 Expression::Not(Box::new(crate::expressions::UnaryOp {
26269 this: u_is_null,
26270 inferred_type: None,
26271 }));
26272 // _u -> NOT _u IS NULL
26273 let filter_lambda =
26274 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26275 parameters: vec![crate::expressions::Identifier::new("_u")],
26276 body: not_u_is_null,
26277 colon: false,
26278 parameter_types: vec![],
26279 }));
26280 // LIST_FILTER(arr, _u -> NOT _u IS NULL)
26281 let list_filter = Expression::Function(Box::new(Function::new(
26282 "LIST_FILTER".to_string(),
26283 vec![arr.clone(), filter_lambda],
26284 )));
26285 // LIST_DISTINCT(LIST_FILTER(arr, ...))
26286 let list_distinct_filtered = Expression::Function(Box::new(Function::new(
26287 "LIST_DISTINCT".to_string(),
26288 vec![list_filter],
26289 )));
26290 // LIST_APPEND(LIST_DISTINCT(LIST_FILTER(...)), NULL)
26291 let list_append = Expression::Function(Box::new(Function::new(
26292 "LIST_APPEND".to_string(),
26293 vec![list_distinct_filtered, Expression::Null(Null)],
26294 )));
26295
26296 // LIST_DISTINCT(arr)
26297 let list_distinct = Expression::Function(Box::new(Function::new(
26298 "LIST_DISTINCT".to_string(),
26299 vec![arr],
26300 )));
26301
26302 // CASE WHEN neq THEN list_append ELSE list_distinct END
26303 Ok(Expression::Case(Box::new(Case {
26304 operand: None,
26305 whens: vec![(neq, list_append)],
26306 else_: Some(list_distinct),
26307 comments: Vec::new(),
26308 inferred_type: None,
26309 })))
26310 } else {
26311 Ok(e)
26312 }
26313 }
26314
26315 Action::ArrayDistinctClickHouse => {
26316 // ARRAY_DISTINCT(arr) -> arrayDistinct(arr) for ClickHouse
26317 if let Expression::ArrayDistinct(f) = e {
26318 Ok(Expression::Function(Box::new(Function::new(
26319 "arrayDistinct".to_string(),
26320 vec![f.this],
26321 ))))
26322 } else {
26323 Ok(e)
26324 }
26325 }
26326
26327 Action::ArrayContainsDuckDBConvert => {
26328 // Snowflake ARRAY_CONTAINS(value, array) -> DuckDB NULL-aware:
26329 // CASE WHEN value IS NULL
26330 // THEN NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
26331 // ELSE ARRAY_CONTAINS(array, value)
26332 // END
26333 // Note: In Rust AST from Snowflake parse, this=value (first arg), expression=array (second arg)
26334 if let Expression::ArrayContains(f) = e {
26335 let value = f.this;
26336 let array = f.expression;
26337
26338 // value IS NULL
26339 let value_is_null =
26340 Expression::IsNull(Box::new(crate::expressions::IsNull {
26341 this: value.clone(),
26342 not: false,
26343 postfix_form: false,
26344 }));
26345
26346 // ARRAY_LENGTH(array)
26347 let array_length = Expression::Function(Box::new(Function::new(
26348 "ARRAY_LENGTH".to_string(),
26349 vec![array.clone()],
26350 )));
26351 // LIST_COUNT(array)
26352 let list_count = Expression::Function(Box::new(Function::new(
26353 "LIST_COUNT".to_string(),
26354 vec![array.clone()],
26355 )));
26356 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
26357 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
26358 left: array_length,
26359 right: list_count,
26360 left_comments: vec![],
26361 operator_comments: vec![],
26362 trailing_comments: vec![],
26363 inferred_type: None,
26364 }));
26365 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
26366 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
26367 this: Box::new(neq),
26368 expression: Box::new(Expression::Boolean(
26369 crate::expressions::BooleanLiteral { value: false },
26370 )),
26371 }));
26372
26373 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
26374 let array_contains = Expression::Function(Box::new(Function::new(
26375 "ARRAY_CONTAINS".to_string(),
26376 vec![array, value],
26377 )));
26378
26379 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
26380 Ok(Expression::Case(Box::new(Case {
26381 operand: None,
26382 whens: vec![(value_is_null, nullif)],
26383 else_: Some(array_contains),
26384 comments: Vec::new(),
26385 inferred_type: None,
26386 })))
26387 } else {
26388 Ok(e)
26389 }
26390 }
26391
26392 Action::StrPositionExpand => {
26393 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
26394 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
26395 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
26396 if let Expression::StrPosition(sp) = e {
26397 let crate::expressions::StrPosition {
26398 this,
26399 substr,
26400 position,
26401 occurrence,
26402 } = *sp;
26403 let string = *this;
26404 let substr_expr = match substr {
26405 Some(s) => *s,
26406 None => Expression::Null(Null),
26407 };
26408 let pos = match position {
26409 Some(p) => *p,
26410 None => Expression::number(1),
26411 };
26412
26413 // SUBSTRING(string, pos)
26414 let substring_call = Expression::Function(Box::new(Function::new(
26415 "SUBSTRING".to_string(),
26416 vec![string.clone(), pos.clone()],
26417 )));
26418 // STRPOS(SUBSTRING(string, pos), substr)
26419 let strpos_call = Expression::Function(Box::new(Function::new(
26420 "STRPOS".to_string(),
26421 vec![substring_call, substr_expr.clone()],
26422 )));
26423 // STRPOS(...) + pos - 1
26424 let pos_adjusted =
26425 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
26426 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
26427 strpos_call.clone(),
26428 pos.clone(),
26429 ))),
26430 Expression::number(1),
26431 )));
26432 // STRPOS(...) = 0
26433 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
26434 strpos_call.clone(),
26435 Expression::number(0),
26436 )));
26437
26438 match target {
26439 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26440 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
26441 Ok(Expression::Function(Box::new(Function::new(
26442 "IF".to_string(),
26443 vec![is_zero, Expression::number(0), pos_adjusted],
26444 ))))
26445 }
26446 DialectType::DuckDB => {
26447 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
26448 Ok(Expression::Case(Box::new(Case {
26449 operand: None,
26450 whens: vec![(is_zero, Expression::number(0))],
26451 else_: Some(pos_adjusted),
26452 comments: Vec::new(),
26453 inferred_type: None,
26454 })))
26455 }
26456 _ => {
26457 // Reconstruct StrPosition
26458 Ok(Expression::StrPosition(Box::new(
26459 crate::expressions::StrPosition {
26460 this: Box::new(string),
26461 substr: Some(Box::new(substr_expr)),
26462 position: Some(Box::new(pos)),
26463 occurrence,
26464 },
26465 )))
26466 }
26467 }
26468 } else {
26469 Ok(e)
26470 }
26471 }
26472
26473 Action::MonthsBetweenConvert => {
26474 if let Expression::MonthsBetween(mb) = e {
26475 let crate::expressions::BinaryFunc {
26476 this: end_date,
26477 expression: start_date,
26478 ..
26479 } = *mb;
26480 match target {
26481 DialectType::DuckDB => {
26482 let cast_end = Self::ensure_cast_date(end_date);
26483 let cast_start = Self::ensure_cast_date(start_date);
26484 let dd = Expression::Function(Box::new(Function::new(
26485 "DATE_DIFF".to_string(),
26486 vec![
26487 Expression::string("MONTH"),
26488 cast_start.clone(),
26489 cast_end.clone(),
26490 ],
26491 )));
26492 let day_end = Expression::Function(Box::new(Function::new(
26493 "DAY".to_string(),
26494 vec![cast_end.clone()],
26495 )));
26496 let day_start = Expression::Function(Box::new(Function::new(
26497 "DAY".to_string(),
26498 vec![cast_start.clone()],
26499 )));
26500 let last_day_end = Expression::Function(Box::new(Function::new(
26501 "LAST_DAY".to_string(),
26502 vec![cast_end.clone()],
26503 )));
26504 let last_day_start = Expression::Function(Box::new(Function::new(
26505 "LAST_DAY".to_string(),
26506 vec![cast_start.clone()],
26507 )));
26508 let day_last_end = Expression::Function(Box::new(Function::new(
26509 "DAY".to_string(),
26510 vec![last_day_end],
26511 )));
26512 let day_last_start = Expression::Function(Box::new(Function::new(
26513 "DAY".to_string(),
26514 vec![last_day_start],
26515 )));
26516 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
26517 day_end.clone(),
26518 day_last_end,
26519 )));
26520 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
26521 day_start.clone(),
26522 day_last_start,
26523 )));
26524 let both_cond =
26525 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
26526 let day_diff =
26527 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
26528 let day_diff_paren =
26529 Expression::Paren(Box::new(crate::expressions::Paren {
26530 this: day_diff,
26531 trailing_comments: Vec::new(),
26532 }));
26533 let frac = Expression::Div(Box::new(BinaryOp::new(
26534 day_diff_paren,
26535 Expression::Literal(Box::new(Literal::Number(
26536 "31.0".to_string(),
26537 ))),
26538 )));
26539 let case_expr = Expression::Case(Box::new(Case {
26540 operand: None,
26541 whens: vec![(both_cond, Expression::number(0))],
26542 else_: Some(frac),
26543 comments: Vec::new(),
26544 inferred_type: None,
26545 }));
26546 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
26547 }
26548 DialectType::Snowflake | DialectType::Redshift => {
26549 let unit = Expression::Identifier(Identifier::new("MONTH"));
26550 Ok(Expression::Function(Box::new(Function::new(
26551 "DATEDIFF".to_string(),
26552 vec![unit, start_date, end_date],
26553 ))))
26554 }
26555 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26556 Ok(Expression::Function(Box::new(Function::new(
26557 "DATE_DIFF".to_string(),
26558 vec![Expression::string("MONTH"), start_date, end_date],
26559 ))))
26560 }
26561 _ => Ok(Expression::MonthsBetween(Box::new(
26562 crate::expressions::BinaryFunc {
26563 this: end_date,
26564 expression: start_date,
26565 original_name: None,
26566 inferred_type: None,
26567 },
26568 ))),
26569 }
26570 } else {
26571 Ok(e)
26572 }
26573 }
26574
26575 Action::AddMonthsConvert => {
26576 if let Expression::AddMonths(am) = e {
26577 let date = am.this;
26578 let val = am.expression;
26579 match target {
26580 DialectType::TSQL | DialectType::Fabric => {
26581 let cast_date = Self::ensure_cast_datetime2(date);
26582 Ok(Expression::Function(Box::new(Function::new(
26583 "DATEADD".to_string(),
26584 vec![
26585 Expression::Identifier(Identifier::new("MONTH")),
26586 val,
26587 cast_date,
26588 ],
26589 ))))
26590 }
26591 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
26592 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
26593 // Optionally wrapped in CAST(... AS type) if the input had a specific type
26594
26595 // Determine the cast type from the date expression
26596 let (cast_date, return_type) = match &date {
26597 Expression::Literal(lit)
26598 if matches!(lit.as_ref(), Literal::String(_)) =>
26599 {
26600 // String literal: CAST(str AS TIMESTAMP), no outer CAST
26601 (
26602 Expression::Cast(Box::new(Cast {
26603 this: date.clone(),
26604 to: DataType::Timestamp {
26605 precision: None,
26606 timezone: false,
26607 },
26608 trailing_comments: Vec::new(),
26609 double_colon_syntax: false,
26610 format: None,
26611 default: None,
26612 inferred_type: None,
26613 })),
26614 None,
26615 )
26616 }
26617 Expression::Cast(c) => {
26618 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
26619 (date.clone(), Some(c.to.clone()))
26620 }
26621 _ => {
26622 // Expression or NULL::TYPE - keep as-is, check for cast type
26623 if let Expression::Cast(c) = &date {
26624 (date.clone(), Some(c.to.clone()))
26625 } else {
26626 (date.clone(), None)
26627 }
26628 }
26629 };
26630
26631 // Build the interval expression
26632 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
26633 // For integer values, use INTERVAL val MONTH
26634 let is_non_integer_val = match &val {
26635 Expression::Literal(lit)
26636 if matches!(lit.as_ref(), Literal::Number(_)) =>
26637 {
26638 let Literal::Number(n) = lit.as_ref() else {
26639 unreachable!()
26640 };
26641 n.contains('.')
26642 }
26643 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
26644 Expression::Neg(n) => {
26645 if let Expression::Literal(lit) = &n.this {
26646 if let Literal::Number(s) = lit.as_ref() {
26647 s.contains('.')
26648 } else {
26649 false
26650 }
26651 } else {
26652 false
26653 }
26654 }
26655 _ => false,
26656 };
26657
26658 let add_interval = if is_non_integer_val {
26659 // TO_MONTHS(CAST(ROUND(val) AS INT))
26660 let round_val = Expression::Function(Box::new(Function::new(
26661 "ROUND".to_string(),
26662 vec![val.clone()],
26663 )));
26664 let cast_int = Expression::Cast(Box::new(Cast {
26665 this: round_val,
26666 to: DataType::Int {
26667 length: None,
26668 integer_spelling: false,
26669 },
26670 trailing_comments: Vec::new(),
26671 double_colon_syntax: false,
26672 format: None,
26673 default: None,
26674 inferred_type: None,
26675 }));
26676 Expression::Function(Box::new(Function::new(
26677 "TO_MONTHS".to_string(),
26678 vec![cast_int],
26679 )))
26680 } else {
26681 // INTERVAL val MONTH
26682 // For negative numbers, wrap in parens
26683 let interval_val = match &val {
26684 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n.starts_with('-')) =>
26685 {
26686 let Literal::Number(_) = lit.as_ref() else {
26687 unreachable!()
26688 };
26689 Expression::Paren(Box::new(Paren {
26690 this: val.clone(),
26691 trailing_comments: Vec::new(),
26692 }))
26693 }
26694 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
26695 this: val.clone(),
26696 trailing_comments: Vec::new(),
26697 })),
26698 Expression::Null(_) => Expression::Paren(Box::new(Paren {
26699 this: val.clone(),
26700 trailing_comments: Vec::new(),
26701 })),
26702 _ => val.clone(),
26703 };
26704 Expression::Interval(Box::new(crate::expressions::Interval {
26705 this: Some(interval_val),
26706 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26707 unit: crate::expressions::IntervalUnit::Month,
26708 use_plural: false,
26709 }),
26710 }))
26711 };
26712
26713 // Build: date + interval
26714 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
26715 cast_date.clone(),
26716 add_interval.clone(),
26717 )));
26718
26719 // Build LAST_DAY(date)
26720 let last_day_date = Expression::Function(Box::new(Function::new(
26721 "LAST_DAY".to_string(),
26722 vec![cast_date.clone()],
26723 )));
26724
26725 // Build LAST_DAY(date + interval)
26726 let last_day_date_plus =
26727 Expression::Function(Box::new(Function::new(
26728 "LAST_DAY".to_string(),
26729 vec![date_plus_interval.clone()],
26730 )));
26731
26732 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
26733 let case_expr = Expression::Case(Box::new(Case {
26734 operand: None,
26735 whens: vec![(
26736 Expression::Eq(Box::new(BinaryOp::new(
26737 last_day_date,
26738 cast_date.clone(),
26739 ))),
26740 last_day_date_plus,
26741 )],
26742 else_: Some(date_plus_interval),
26743 comments: Vec::new(),
26744 inferred_type: None,
26745 }));
26746
26747 // Wrap in CAST(... AS type) if needed
26748 if let Some(dt) = return_type {
26749 Ok(Expression::Cast(Box::new(Cast {
26750 this: case_expr,
26751 to: dt,
26752 trailing_comments: Vec::new(),
26753 double_colon_syntax: false,
26754 format: None,
26755 default: None,
26756 inferred_type: None,
26757 })))
26758 } else {
26759 Ok(case_expr)
26760 }
26761 }
26762 DialectType::DuckDB => {
26763 // Non-Snowflake source: simple date + INTERVAL
26764 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26765 {
26766 Expression::Cast(Box::new(Cast {
26767 this: date,
26768 to: DataType::Timestamp {
26769 precision: None,
26770 timezone: false,
26771 },
26772 trailing_comments: Vec::new(),
26773 double_colon_syntax: false,
26774 format: None,
26775 default: None,
26776 inferred_type: None,
26777 }))
26778 } else {
26779 date
26780 };
26781 let interval =
26782 Expression::Interval(Box::new(crate::expressions::Interval {
26783 this: Some(val),
26784 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26785 unit: crate::expressions::IntervalUnit::Month,
26786 use_plural: false,
26787 }),
26788 }));
26789 Ok(Expression::Add(Box::new(BinaryOp::new(
26790 cast_date, interval,
26791 ))))
26792 }
26793 DialectType::Snowflake => {
26794 // Keep ADD_MONTHS when source is also Snowflake
26795 if matches!(source, DialectType::Snowflake) {
26796 Ok(Expression::Function(Box::new(Function::new(
26797 "ADD_MONTHS".to_string(),
26798 vec![date, val],
26799 ))))
26800 } else {
26801 Ok(Expression::Function(Box::new(Function::new(
26802 "DATEADD".to_string(),
26803 vec![
26804 Expression::Identifier(Identifier::new("MONTH")),
26805 val,
26806 date,
26807 ],
26808 ))))
26809 }
26810 }
26811 DialectType::Redshift => {
26812 Ok(Expression::Function(Box::new(Function::new(
26813 "DATEADD".to_string(),
26814 vec![
26815 Expression::Identifier(Identifier::new("MONTH")),
26816 val,
26817 date,
26818 ],
26819 ))))
26820 }
26821 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26822 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26823 {
26824 Expression::Cast(Box::new(Cast {
26825 this: date,
26826 to: DataType::Timestamp {
26827 precision: None,
26828 timezone: false,
26829 },
26830 trailing_comments: Vec::new(),
26831 double_colon_syntax: false,
26832 format: None,
26833 default: None,
26834 inferred_type: None,
26835 }))
26836 } else {
26837 date
26838 };
26839 Ok(Expression::Function(Box::new(Function::new(
26840 "DATE_ADD".to_string(),
26841 vec![Expression::string("MONTH"), val, cast_date],
26842 ))))
26843 }
26844 DialectType::BigQuery => {
26845 let interval =
26846 Expression::Interval(Box::new(crate::expressions::Interval {
26847 this: Some(val),
26848 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26849 unit: crate::expressions::IntervalUnit::Month,
26850 use_plural: false,
26851 }),
26852 }));
26853 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26854 {
26855 Expression::Cast(Box::new(Cast {
26856 this: date,
26857 to: DataType::Custom {
26858 name: "DATETIME".to_string(),
26859 },
26860 trailing_comments: Vec::new(),
26861 double_colon_syntax: false,
26862 format: None,
26863 default: None,
26864 inferred_type: None,
26865 }))
26866 } else {
26867 date
26868 };
26869 Ok(Expression::Function(Box::new(Function::new(
26870 "DATE_ADD".to_string(),
26871 vec![cast_date, interval],
26872 ))))
26873 }
26874 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
26875 Ok(Expression::Function(Box::new(Function::new(
26876 "ADD_MONTHS".to_string(),
26877 vec![date, val],
26878 ))))
26879 }
26880 _ => {
26881 // Default: keep as AddMonths expression
26882 Ok(Expression::AddMonths(Box::new(
26883 crate::expressions::BinaryFunc {
26884 this: date,
26885 expression: val,
26886 original_name: None,
26887 inferred_type: None,
26888 },
26889 )))
26890 }
26891 }
26892 } else {
26893 Ok(e)
26894 }
26895 }
26896
26897 Action::PercentileContConvert => {
26898 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
26899 // Presto/Trino: APPROX_PERCENTILE(col, p)
26900 // Spark/Databricks: PERCENTILE_APPROX(col, p)
26901 if let Expression::WithinGroup(wg) = e {
26902 // Extract percentile value and order by column
26903 let (percentile, _is_disc) = match &wg.this {
26904 Expression::Function(f) => {
26905 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
26906 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
26907 Box::new(Literal::Number("0.5".to_string())),
26908 ));
26909 (pct, is_disc)
26910 }
26911 Expression::AggregateFunction(af) => {
26912 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
26913 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
26914 Box::new(Literal::Number("0.5".to_string())),
26915 ));
26916 (pct, is_disc)
26917 }
26918 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
26919 _ => return Ok(Expression::WithinGroup(wg)),
26920 };
26921 let col = wg.order_by.first().map(|o| o.this.clone()).unwrap_or(
26922 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
26923 );
26924
26925 let func_name = match target {
26926 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26927 "APPROX_PERCENTILE"
26928 }
26929 _ => "PERCENTILE_APPROX", // Spark, Databricks
26930 };
26931 Ok(Expression::Function(Box::new(Function::new(
26932 func_name.to_string(),
26933 vec![col, percentile],
26934 ))))
26935 } else {
26936 Ok(e)
26937 }
26938 }
26939
26940 Action::CurrentUserSparkParens => {
26941 // CURRENT_USER -> CURRENT_USER() for Spark
26942 if let Expression::CurrentUser(_) = e {
26943 Ok(Expression::Function(Box::new(Function::new(
26944 "CURRENT_USER".to_string(),
26945 vec![],
26946 ))))
26947 } else {
26948 Ok(e)
26949 }
26950 }
26951
26952 Action::SparkDateFuncCast => {
26953 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
26954 let cast_arg = |arg: Expression| -> Expression {
26955 match target {
26956 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26957 Self::double_cast_timestamp_date(arg)
26958 }
26959 _ => {
26960 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
26961 Self::ensure_cast_date(arg)
26962 }
26963 }
26964 };
26965 match e {
26966 Expression::Month(f) => Ok(Expression::Month(Box::new(
26967 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26968 ))),
26969 Expression::Year(f) => Ok(Expression::Year(Box::new(
26970 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26971 ))),
26972 Expression::Day(f) => Ok(Expression::Day(Box::new(
26973 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26974 ))),
26975 other => Ok(other),
26976 }
26977 }
26978
26979 Action::MapFromArraysConvert => {
26980 // Expression::MapFromArrays -> target-specific
26981 if let Expression::MapFromArrays(mfa) = e {
26982 let keys = mfa.this;
26983 let values = mfa.expression;
26984 match target {
26985 DialectType::Snowflake => Ok(Expression::Function(Box::new(
26986 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
26987 ))),
26988 _ => {
26989 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
26990 Ok(Expression::Function(Box::new(Function::new(
26991 "MAP".to_string(),
26992 vec![keys, values],
26993 ))))
26994 }
26995 }
26996 } else {
26997 Ok(e)
26998 }
26999 }
27000
27001 Action::AnyToExists => {
27002 if let Expression::Any(q) = e {
27003 if let Some(op) = q.op.clone() {
27004 let lambda_param = crate::expressions::Identifier::new("x");
27005 let rhs = Expression::Identifier(lambda_param.clone());
27006 let body = match op {
27007 crate::expressions::QuantifiedOp::Eq => {
27008 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
27009 }
27010 crate::expressions::QuantifiedOp::Neq => {
27011 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
27012 }
27013 crate::expressions::QuantifiedOp::Lt => {
27014 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
27015 }
27016 crate::expressions::QuantifiedOp::Lte => {
27017 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
27018 }
27019 crate::expressions::QuantifiedOp::Gt => {
27020 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
27021 }
27022 crate::expressions::QuantifiedOp::Gte => {
27023 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
27024 }
27025 };
27026 let lambda =
27027 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27028 parameters: vec![lambda_param],
27029 body,
27030 colon: false,
27031 parameter_types: Vec::new(),
27032 }));
27033 Ok(Expression::Function(Box::new(Function::new(
27034 "EXISTS".to_string(),
27035 vec![q.subquery, lambda],
27036 ))))
27037 } else {
27038 Ok(Expression::Any(q))
27039 }
27040 } else {
27041 Ok(e)
27042 }
27043 }
27044
27045 Action::GenerateSeriesConvert => {
27046 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
27047 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
27048 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
27049 if let Expression::Function(f) = e {
27050 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
27051 let start = f.args[0].clone();
27052 let end = f.args[1].clone();
27053 let step = f.args.get(2).cloned();
27054
27055 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
27056 let step = step.map(|s| Self::normalize_interval_string(s, target));
27057
27058 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
27059 let maybe_cast_timestamp = |arg: Expression| -> Expression {
27060 if matches!(
27061 target,
27062 DialectType::Presto
27063 | DialectType::Trino
27064 | DialectType::Athena
27065 | DialectType::Spark
27066 | DialectType::Databricks
27067 | DialectType::Hive
27068 ) {
27069 match &arg {
27070 Expression::CurrentTimestamp(_) => {
27071 Expression::Cast(Box::new(Cast {
27072 this: arg,
27073 to: DataType::Timestamp {
27074 precision: None,
27075 timezone: false,
27076 },
27077 trailing_comments: Vec::new(),
27078 double_colon_syntax: false,
27079 format: None,
27080 default: None,
27081 inferred_type: None,
27082 }))
27083 }
27084 _ => arg,
27085 }
27086 } else {
27087 arg
27088 }
27089 };
27090
27091 let start = maybe_cast_timestamp(start);
27092 let end = maybe_cast_timestamp(end);
27093
27094 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
27095 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
27096 let mut gs_args = vec![start, end];
27097 if let Some(step) = step {
27098 gs_args.push(step);
27099 }
27100 return Ok(Expression::Function(Box::new(Function::new(
27101 "GENERATE_SERIES".to_string(),
27102 gs_args,
27103 ))));
27104 }
27105
27106 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
27107 if matches!(target, DialectType::DuckDB) {
27108 let mut gs_args = vec![start, end];
27109 if let Some(step) = step {
27110 gs_args.push(step);
27111 }
27112 let gs = Expression::Function(Box::new(Function::new(
27113 "GENERATE_SERIES".to_string(),
27114 gs_args,
27115 )));
27116 return Ok(Expression::Function(Box::new(Function::new(
27117 "UNNEST".to_string(),
27118 vec![gs],
27119 ))));
27120 }
27121
27122 let mut seq_args = vec![start, end];
27123 if let Some(step) = step {
27124 seq_args.push(step);
27125 }
27126
27127 let seq = Expression::Function(Box::new(Function::new(
27128 "SEQUENCE".to_string(),
27129 seq_args,
27130 )));
27131
27132 match target {
27133 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27134 // Wrap in UNNEST
27135 Ok(Expression::Function(Box::new(Function::new(
27136 "UNNEST".to_string(),
27137 vec![seq],
27138 ))))
27139 }
27140 DialectType::Spark
27141 | DialectType::Databricks
27142 | DialectType::Hive => {
27143 // Wrap in EXPLODE
27144 Ok(Expression::Function(Box::new(Function::new(
27145 "EXPLODE".to_string(),
27146 vec![seq],
27147 ))))
27148 }
27149 _ => {
27150 // Just SEQUENCE for others
27151 Ok(seq)
27152 }
27153 }
27154 } else {
27155 Ok(Expression::Function(f))
27156 }
27157 } else {
27158 Ok(e)
27159 }
27160 }
27161
27162 Action::ConcatCoalesceWrap => {
27163 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
27164 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
27165 if let Expression::Function(f) = e {
27166 if f.name.eq_ignore_ascii_case("CONCAT") {
27167 let new_args: Vec<Expression> = f
27168 .args
27169 .into_iter()
27170 .map(|arg| {
27171 let cast_arg = if matches!(
27172 target,
27173 DialectType::Presto
27174 | DialectType::Trino
27175 | DialectType::Athena
27176 ) {
27177 Expression::Cast(Box::new(Cast {
27178 this: arg,
27179 to: DataType::VarChar {
27180 length: None,
27181 parenthesized_length: false,
27182 },
27183 trailing_comments: Vec::new(),
27184 double_colon_syntax: false,
27185 format: None,
27186 default: None,
27187 inferred_type: None,
27188 }))
27189 } else {
27190 arg
27191 };
27192 Expression::Function(Box::new(Function::new(
27193 "COALESCE".to_string(),
27194 vec![cast_arg, Expression::string("")],
27195 )))
27196 })
27197 .collect();
27198 Ok(Expression::Function(Box::new(Function::new(
27199 "CONCAT".to_string(),
27200 new_args,
27201 ))))
27202 } else {
27203 Ok(Expression::Function(f))
27204 }
27205 } else {
27206 Ok(e)
27207 }
27208 }
27209
27210 Action::PipeConcatToConcat => {
27211 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
27212 if let Expression::Concat(op) = e {
27213 let cast_left = Expression::Cast(Box::new(Cast {
27214 this: op.left,
27215 to: DataType::VarChar {
27216 length: None,
27217 parenthesized_length: false,
27218 },
27219 trailing_comments: Vec::new(),
27220 double_colon_syntax: false,
27221 format: None,
27222 default: None,
27223 inferred_type: None,
27224 }));
27225 let cast_right = Expression::Cast(Box::new(Cast {
27226 this: op.right,
27227 to: DataType::VarChar {
27228 length: None,
27229 parenthesized_length: false,
27230 },
27231 trailing_comments: Vec::new(),
27232 double_colon_syntax: false,
27233 format: None,
27234 default: None,
27235 inferred_type: None,
27236 }));
27237 Ok(Expression::Function(Box::new(Function::new(
27238 "CONCAT".to_string(),
27239 vec![cast_left, cast_right],
27240 ))))
27241 } else {
27242 Ok(e)
27243 }
27244 }
27245
27246 Action::DivFuncConvert => {
27247 // DIV(a, b) -> target-specific integer division
27248 if let Expression::Function(f) = e {
27249 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
27250 let a = f.args[0].clone();
27251 let b = f.args[1].clone();
27252 match target {
27253 DialectType::DuckDB => {
27254 // DIV(a, b) -> CAST(a // b AS DECIMAL)
27255 let int_div = Expression::IntDiv(Box::new(
27256 crate::expressions::BinaryFunc {
27257 this: a,
27258 expression: b,
27259 original_name: None,
27260 inferred_type: None,
27261 },
27262 ));
27263 Ok(Expression::Cast(Box::new(Cast {
27264 this: int_div,
27265 to: DataType::Decimal {
27266 precision: None,
27267 scale: None,
27268 },
27269 trailing_comments: Vec::new(),
27270 double_colon_syntax: false,
27271 format: None,
27272 default: None,
27273 inferred_type: None,
27274 })))
27275 }
27276 DialectType::BigQuery => {
27277 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
27278 let div_func = Expression::Function(Box::new(Function::new(
27279 "DIV".to_string(),
27280 vec![a, b],
27281 )));
27282 Ok(Expression::Cast(Box::new(Cast {
27283 this: div_func,
27284 to: DataType::Custom {
27285 name: "NUMERIC".to_string(),
27286 },
27287 trailing_comments: Vec::new(),
27288 double_colon_syntax: false,
27289 format: None,
27290 default: None,
27291 inferred_type: None,
27292 })))
27293 }
27294 DialectType::SQLite => {
27295 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
27296 let cast_a = Expression::Cast(Box::new(Cast {
27297 this: a,
27298 to: DataType::Custom {
27299 name: "REAL".to_string(),
27300 },
27301 trailing_comments: Vec::new(),
27302 double_colon_syntax: false,
27303 format: None,
27304 default: None,
27305 inferred_type: None,
27306 }));
27307 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
27308 let cast_int = Expression::Cast(Box::new(Cast {
27309 this: div,
27310 to: DataType::Int {
27311 length: None,
27312 integer_spelling: true,
27313 },
27314 trailing_comments: Vec::new(),
27315 double_colon_syntax: false,
27316 format: None,
27317 default: None,
27318 inferred_type: None,
27319 }));
27320 Ok(Expression::Cast(Box::new(Cast {
27321 this: cast_int,
27322 to: DataType::Custom {
27323 name: "REAL".to_string(),
27324 },
27325 trailing_comments: Vec::new(),
27326 double_colon_syntax: false,
27327 format: None,
27328 default: None,
27329 inferred_type: None,
27330 })))
27331 }
27332 _ => Ok(Expression::Function(f)),
27333 }
27334 } else {
27335 Ok(Expression::Function(f))
27336 }
27337 } else {
27338 Ok(e)
27339 }
27340 }
27341
27342 Action::JsonObjectAggConvert => {
27343 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
27344 match e {
27345 Expression::Function(f) => Ok(Expression::Function(Box::new(
27346 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
27347 ))),
27348 Expression::AggregateFunction(af) => {
27349 // AggregateFunction stores all args in the `args` vec
27350 Ok(Expression::Function(Box::new(Function::new(
27351 "JSON_GROUP_OBJECT".to_string(),
27352 af.args,
27353 ))))
27354 }
27355 other => Ok(other),
27356 }
27357 }
27358
27359 Action::JsonbExistsConvert => {
27360 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
27361 if let Expression::Function(f) = e {
27362 if f.args.len() == 2 {
27363 let json_expr = f.args[0].clone();
27364 let key = match &f.args[1] {
27365 Expression::Literal(lit)
27366 if matches!(
27367 lit.as_ref(),
27368 crate::expressions::Literal::String(_)
27369 ) =>
27370 {
27371 let crate::expressions::Literal::String(s) = lit.as_ref()
27372 else {
27373 unreachable!()
27374 };
27375 format!("$.{}", s)
27376 }
27377 _ => return Ok(Expression::Function(f)),
27378 };
27379 Ok(Expression::Function(Box::new(Function::new(
27380 "JSON_EXISTS".to_string(),
27381 vec![json_expr, Expression::string(&key)],
27382 ))))
27383 } else {
27384 Ok(Expression::Function(f))
27385 }
27386 } else {
27387 Ok(e)
27388 }
27389 }
27390
27391 Action::DateBinConvert => {
27392 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
27393 if let Expression::Function(f) = e {
27394 Ok(Expression::Function(Box::new(Function::new(
27395 "TIME_BUCKET".to_string(),
27396 f.args,
27397 ))))
27398 } else {
27399 Ok(e)
27400 }
27401 }
27402
27403 Action::MysqlCastCharToText => {
27404 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
27405 if let Expression::Cast(mut c) = e {
27406 c.to = DataType::Text;
27407 Ok(Expression::Cast(c))
27408 } else {
27409 Ok(e)
27410 }
27411 }
27412
27413 Action::SparkCastVarcharToString => {
27414 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
27415 match e {
27416 Expression::Cast(mut c) => {
27417 c.to = Self::normalize_varchar_to_string(c.to);
27418 Ok(Expression::Cast(c))
27419 }
27420 Expression::TryCast(mut c) => {
27421 c.to = Self::normalize_varchar_to_string(c.to);
27422 Ok(Expression::TryCast(c))
27423 }
27424 _ => Ok(e),
27425 }
27426 }
27427
27428 Action::MinMaxToLeastGreatest => {
27429 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
27430 if let Expression::Function(f) = e {
27431 let new_name = if f.name.eq_ignore_ascii_case("MIN") {
27432 "LEAST"
27433 } else if f.name.eq_ignore_ascii_case("MAX") {
27434 "GREATEST"
27435 } else {
27436 return Ok(Expression::Function(f));
27437 };
27438 Ok(Expression::Function(Box::new(Function::new(
27439 new_name.to_string(),
27440 f.args,
27441 ))))
27442 } else {
27443 Ok(e)
27444 }
27445 }
27446
27447 Action::ClickHouseUniqToApproxCountDistinct => {
27448 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
27449 if let Expression::Function(f) = e {
27450 Ok(Expression::Function(Box::new(Function::new(
27451 "APPROX_COUNT_DISTINCT".to_string(),
27452 f.args,
27453 ))))
27454 } else {
27455 Ok(e)
27456 }
27457 }
27458
27459 Action::ClickHouseAnyToAnyValue => {
27460 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
27461 if let Expression::Function(f) = e {
27462 Ok(Expression::Function(Box::new(Function::new(
27463 "ANY_VALUE".to_string(),
27464 f.args,
27465 ))))
27466 } else {
27467 Ok(e)
27468 }
27469 }
27470
27471 Action::OracleVarchar2ToVarchar => {
27472 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
27473 if let Expression::DataType(DataType::Custom { ref name }) = e {
27474 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
27475 let starts_varchar2 =
27476 name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2(");
27477 let starts_nvarchar2 =
27478 name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2(");
27479 let inner = if starts_varchar2 || starts_nvarchar2 {
27480 let start = if starts_nvarchar2 { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
27481 let end = name.len() - 1; // skip trailing ")"
27482 Some(&name[start..end])
27483 } else {
27484 Option::None
27485 };
27486 if let Some(inner_str) = inner {
27487 // Parse the number part, ignoring BYTE/CHAR qualifier
27488 let num_str = inner_str.split_whitespace().next().unwrap_or("");
27489 if let Ok(n) = num_str.parse::<u32>() {
27490 Ok(Expression::DataType(DataType::VarChar {
27491 length: Some(n),
27492 parenthesized_length: false,
27493 }))
27494 } else {
27495 Ok(e)
27496 }
27497 } else {
27498 // Plain VARCHAR2 / NVARCHAR2 without parens
27499 Ok(Expression::DataType(DataType::VarChar {
27500 length: Option::None,
27501 parenthesized_length: false,
27502 }))
27503 }
27504 } else {
27505 Ok(e)
27506 }
27507 }
27508
27509 Action::Nvl2Expand => {
27510 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
27511 // But keep as NVL2 for dialects that support it natively
27512 let nvl2_native = matches!(
27513 target,
27514 DialectType::Oracle
27515 | DialectType::Snowflake
27516 | DialectType::Redshift
27517 | DialectType::Teradata
27518 | DialectType::Spark
27519 | DialectType::Databricks
27520 );
27521 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
27522 if nvl2_native {
27523 return Ok(Expression::Nvl2(nvl2));
27524 }
27525 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
27526 } else if let Expression::Function(f) = e {
27527 if nvl2_native {
27528 return Ok(Expression::Function(Box::new(Function::new(
27529 "NVL2".to_string(),
27530 f.args,
27531 ))));
27532 }
27533 if f.args.len() < 2 {
27534 return Ok(Expression::Function(f));
27535 }
27536 let mut args = f.args;
27537 let a = args.remove(0);
27538 let b = args.remove(0);
27539 let c = if !args.is_empty() {
27540 Some(args.remove(0))
27541 } else {
27542 Option::None
27543 };
27544 (a, b, c)
27545 } else {
27546 return Ok(e);
27547 };
27548 // Build: NOT (a IS NULL)
27549 let is_null = Expression::IsNull(Box::new(IsNull {
27550 this: a,
27551 not: false,
27552 postfix_form: false,
27553 }));
27554 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
27555 this: is_null,
27556 inferred_type: None,
27557 }));
27558 Ok(Expression::Case(Box::new(Case {
27559 operand: Option::None,
27560 whens: vec![(not_null, b)],
27561 else_: c,
27562 comments: Vec::new(),
27563 inferred_type: None,
27564 })))
27565 }
27566
27567 Action::IfnullToCoalesce => {
27568 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
27569 if let Expression::Coalesce(mut cf) = e {
27570 cf.original_name = Option::None;
27571 Ok(Expression::Coalesce(cf))
27572 } else if let Expression::Function(f) = e {
27573 Ok(Expression::Function(Box::new(Function::new(
27574 "COALESCE".to_string(),
27575 f.args,
27576 ))))
27577 } else {
27578 Ok(e)
27579 }
27580 }
27581
27582 Action::IsAsciiConvert => {
27583 // IS_ASCII(x) -> dialect-specific ASCII check
27584 if let Expression::Function(f) = e {
27585 let arg = f.args.into_iter().next().unwrap();
27586 match target {
27587 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
27588 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
27589 Ok(Expression::Function(Box::new(Function::new(
27590 "REGEXP_LIKE".to_string(),
27591 vec![
27592 arg,
27593 Expression::Literal(Box::new(Literal::String(
27594 "^[[:ascii:]]*$".to_string(),
27595 ))),
27596 ],
27597 ))))
27598 }
27599 DialectType::PostgreSQL
27600 | DialectType::Redshift
27601 | DialectType::Materialize
27602 | DialectType::RisingWave => {
27603 // (x ~ '^[[:ascii:]]*$')
27604 Ok(Expression::Paren(Box::new(Paren {
27605 this: Expression::RegexpLike(Box::new(
27606 crate::expressions::RegexpFunc {
27607 this: arg,
27608 pattern: Expression::Literal(Box::new(
27609 Literal::String("^[[:ascii:]]*$".to_string()),
27610 )),
27611 flags: Option::None,
27612 },
27613 )),
27614 trailing_comments: Vec::new(),
27615 })))
27616 }
27617 DialectType::SQLite => {
27618 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
27619 let hex_lit = Expression::Literal(Box::new(Literal::HexString(
27620 "2a5b5e012d7f5d2a".to_string(),
27621 )));
27622 let cast_expr = Expression::Cast(Box::new(Cast {
27623 this: hex_lit,
27624 to: DataType::Text,
27625 trailing_comments: Vec::new(),
27626 double_colon_syntax: false,
27627 format: Option::None,
27628 default: Option::None,
27629 inferred_type: None,
27630 }));
27631 let glob = Expression::Glob(Box::new(BinaryOp {
27632 left: arg,
27633 right: cast_expr,
27634 left_comments: Vec::new(),
27635 operator_comments: Vec::new(),
27636 trailing_comments: Vec::new(),
27637 inferred_type: None,
27638 }));
27639 Ok(Expression::Paren(Box::new(Paren {
27640 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
27641 this: glob,
27642 inferred_type: None,
27643 })),
27644 trailing_comments: Vec::new(),
27645 })))
27646 }
27647 DialectType::TSQL | DialectType::Fabric => {
27648 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
27649 let hex_lit = Expression::Literal(Box::new(Literal::HexNumber(
27650 "255b5e002d7f5d25".to_string(),
27651 )));
27652 let convert_expr = Expression::Convert(Box::new(
27653 crate::expressions::ConvertFunc {
27654 this: hex_lit,
27655 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
27656 style: None,
27657 },
27658 ));
27659 let collated = Expression::Collation(Box::new(
27660 crate::expressions::CollationExpr {
27661 this: convert_expr,
27662 collation: "Latin1_General_BIN".to_string(),
27663 quoted: false,
27664 double_quoted: false,
27665 },
27666 ));
27667 let patindex = Expression::Function(Box::new(Function::new(
27668 "PATINDEX".to_string(),
27669 vec![collated, arg],
27670 )));
27671 let zero =
27672 Expression::Literal(Box::new(Literal::Number("0".to_string())));
27673 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27674 left: patindex,
27675 right: zero,
27676 left_comments: Vec::new(),
27677 operator_comments: Vec::new(),
27678 trailing_comments: Vec::new(),
27679 inferred_type: None,
27680 }));
27681 Ok(Expression::Paren(Box::new(Paren {
27682 this: eq_zero,
27683 trailing_comments: Vec::new(),
27684 })))
27685 }
27686 DialectType::Oracle => {
27687 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
27688 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
27689 let s1 = Expression::Literal(Box::new(Literal::String(
27690 "^[".to_string(),
27691 )));
27692 let chr1 = Expression::Function(Box::new(Function::new(
27693 "CHR".to_string(),
27694 vec![Expression::Literal(Box::new(Literal::Number(
27695 "1".to_string(),
27696 )))],
27697 )));
27698 let dash =
27699 Expression::Literal(Box::new(Literal::String("-".to_string())));
27700 let chr127 = Expression::Function(Box::new(Function::new(
27701 "CHR".to_string(),
27702 vec![Expression::Literal(Box::new(Literal::Number(
27703 "127".to_string(),
27704 )))],
27705 )));
27706 let s2 = Expression::Literal(Box::new(Literal::String(
27707 "]*$".to_string(),
27708 )));
27709 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
27710 let concat1 =
27711 Expression::DPipe(Box::new(crate::expressions::DPipe {
27712 this: Box::new(s1),
27713 expression: Box::new(chr1),
27714 safe: None,
27715 }));
27716 let concat2 =
27717 Expression::DPipe(Box::new(crate::expressions::DPipe {
27718 this: Box::new(concat1),
27719 expression: Box::new(dash),
27720 safe: None,
27721 }));
27722 let concat3 =
27723 Expression::DPipe(Box::new(crate::expressions::DPipe {
27724 this: Box::new(concat2),
27725 expression: Box::new(chr127),
27726 safe: None,
27727 }));
27728 let concat4 =
27729 Expression::DPipe(Box::new(crate::expressions::DPipe {
27730 this: Box::new(concat3),
27731 expression: Box::new(s2),
27732 safe: None,
27733 }));
27734 let regexp_like = Expression::Function(Box::new(Function::new(
27735 "REGEXP_LIKE".to_string(),
27736 vec![arg, concat4],
27737 )));
27738 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
27739 let true_expr =
27740 Expression::Column(Box::new(crate::expressions::Column {
27741 name: Identifier {
27742 name: "TRUE".to_string(),
27743 quoted: false,
27744 trailing_comments: Vec::new(),
27745 span: None,
27746 },
27747 table: None,
27748 join_mark: false,
27749 trailing_comments: Vec::new(),
27750 span: None,
27751 inferred_type: None,
27752 }));
27753 let nvl = Expression::Function(Box::new(Function::new(
27754 "NVL".to_string(),
27755 vec![regexp_like, true_expr],
27756 )));
27757 Ok(nvl)
27758 }
27759 _ => Ok(Expression::Function(Box::new(Function::new(
27760 "IS_ASCII".to_string(),
27761 vec![arg],
27762 )))),
27763 }
27764 } else {
27765 Ok(e)
27766 }
27767 }
27768
27769 Action::StrPositionConvert => {
27770 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
27771 if let Expression::Function(f) = e {
27772 if f.args.len() < 2 {
27773 return Ok(Expression::Function(f));
27774 }
27775 let mut args = f.args;
27776
27777 let haystack = args.remove(0);
27778 let needle = args.remove(0);
27779 let position = if !args.is_empty() {
27780 Some(args.remove(0))
27781 } else {
27782 Option::None
27783 };
27784 let occurrence = if !args.is_empty() {
27785 Some(args.remove(0))
27786 } else {
27787 Option::None
27788 };
27789
27790 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
27791 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
27792 fn build_position_expansion(
27793 haystack: Expression,
27794 needle: Expression,
27795 pos: Expression,
27796 occurrence: Option<Expression>,
27797 inner_func: &str,
27798 wrapper: &str, // "CASE", "IF", "IIF"
27799 ) -> Expression {
27800 let substr = Expression::Function(Box::new(Function::new(
27801 "SUBSTRING".to_string(),
27802 vec![haystack, pos.clone()],
27803 )));
27804 let mut inner_args = vec![substr, needle];
27805 if let Some(occ) = occurrence {
27806 inner_args.push(occ);
27807 }
27808 let inner_call = Expression::Function(Box::new(Function::new(
27809 inner_func.to_string(),
27810 inner_args,
27811 )));
27812 let zero =
27813 Expression::Literal(Box::new(Literal::Number("0".to_string())));
27814 let one =
27815 Expression::Literal(Box::new(Literal::Number("1".to_string())));
27816 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27817 left: inner_call.clone(),
27818 right: zero.clone(),
27819 left_comments: Vec::new(),
27820 operator_comments: Vec::new(),
27821 trailing_comments: Vec::new(),
27822 inferred_type: None,
27823 }));
27824 let add_pos = Expression::Add(Box::new(BinaryOp {
27825 left: inner_call,
27826 right: pos,
27827 left_comments: Vec::new(),
27828 operator_comments: Vec::new(),
27829 trailing_comments: Vec::new(),
27830 inferred_type: None,
27831 }));
27832 let sub_one = Expression::Sub(Box::new(BinaryOp {
27833 left: add_pos,
27834 right: one,
27835 left_comments: Vec::new(),
27836 operator_comments: Vec::new(),
27837 trailing_comments: Vec::new(),
27838 inferred_type: None,
27839 }));
27840
27841 match wrapper {
27842 "CASE" => Expression::Case(Box::new(Case {
27843 operand: Option::None,
27844 whens: vec![(eq_zero, zero)],
27845 else_: Some(sub_one),
27846 comments: Vec::new(),
27847 inferred_type: None,
27848 })),
27849 "IIF" => Expression::Function(Box::new(Function::new(
27850 "IIF".to_string(),
27851 vec![eq_zero, zero, sub_one],
27852 ))),
27853 _ => Expression::Function(Box::new(Function::new(
27854 "IF".to_string(),
27855 vec![eq_zero, zero, sub_one],
27856 ))),
27857 }
27858 }
27859
27860 match target {
27861 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
27862 DialectType::Athena
27863 | DialectType::DuckDB
27864 | DialectType::Presto
27865 | DialectType::Trino
27866 | DialectType::Drill => {
27867 if let Some(pos) = position {
27868 let wrapper = if matches!(target, DialectType::DuckDB) {
27869 "CASE"
27870 } else {
27871 "IF"
27872 };
27873 let result = build_position_expansion(
27874 haystack, needle, pos, occurrence, "STRPOS", wrapper,
27875 );
27876 if matches!(target, DialectType::Drill) {
27877 // Drill uses backtick-quoted `IF`
27878 if let Expression::Function(mut f) = result {
27879 f.name = "`IF`".to_string();
27880 Ok(Expression::Function(f))
27881 } else {
27882 Ok(result)
27883 }
27884 } else {
27885 Ok(result)
27886 }
27887 } else {
27888 Ok(Expression::Function(Box::new(Function::new(
27889 "STRPOS".to_string(),
27890 vec![haystack, needle],
27891 ))))
27892 }
27893 }
27894 // SQLite: IIF wrapper
27895 DialectType::SQLite => {
27896 if let Some(pos) = position {
27897 Ok(build_position_expansion(
27898 haystack, needle, pos, occurrence, "INSTR", "IIF",
27899 ))
27900 } else {
27901 Ok(Expression::Function(Box::new(Function::new(
27902 "INSTR".to_string(),
27903 vec![haystack, needle],
27904 ))))
27905 }
27906 }
27907 // INSTR group: Teradata, BigQuery, Oracle
27908 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
27909 let mut a = vec![haystack, needle];
27910 if let Some(pos) = position {
27911 a.push(pos);
27912 }
27913 if let Some(occ) = occurrence {
27914 a.push(occ);
27915 }
27916 Ok(Expression::Function(Box::new(Function::new(
27917 "INSTR".to_string(),
27918 a,
27919 ))))
27920 }
27921 // CHARINDEX group: Snowflake, TSQL
27922 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
27923 let mut a = vec![needle, haystack];
27924 if let Some(pos) = position {
27925 a.push(pos);
27926 }
27927 Ok(Expression::Function(Box::new(Function::new(
27928 "CHARINDEX".to_string(),
27929 a,
27930 ))))
27931 }
27932 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
27933 DialectType::PostgreSQL
27934 | DialectType::Materialize
27935 | DialectType::RisingWave
27936 | DialectType::Redshift => {
27937 if let Some(pos) = position {
27938 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
27939 // ELSE POSITION(...) + pos - 1 END
27940 let substr = Expression::Substring(Box::new(
27941 crate::expressions::SubstringFunc {
27942 this: haystack,
27943 start: pos.clone(),
27944 length: Option::None,
27945 from_for_syntax: true,
27946 },
27947 ));
27948 let pos_in = Expression::StrPosition(Box::new(
27949 crate::expressions::StrPosition {
27950 this: Box::new(substr),
27951 substr: Some(Box::new(needle)),
27952 position: Option::None,
27953 occurrence: Option::None,
27954 },
27955 ));
27956 let zero = Expression::Literal(Box::new(Literal::Number(
27957 "0".to_string(),
27958 )));
27959 let one = Expression::Literal(Box::new(Literal::Number(
27960 "1".to_string(),
27961 )));
27962 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27963 left: pos_in.clone(),
27964 right: zero.clone(),
27965 left_comments: Vec::new(),
27966 operator_comments: Vec::new(),
27967 trailing_comments: Vec::new(),
27968 inferred_type: None,
27969 }));
27970 let add_pos = Expression::Add(Box::new(BinaryOp {
27971 left: pos_in,
27972 right: pos,
27973 left_comments: Vec::new(),
27974 operator_comments: Vec::new(),
27975 trailing_comments: Vec::new(),
27976 inferred_type: None,
27977 }));
27978 let sub_one = Expression::Sub(Box::new(BinaryOp {
27979 left: add_pos,
27980 right: one,
27981 left_comments: Vec::new(),
27982 operator_comments: Vec::new(),
27983 trailing_comments: Vec::new(),
27984 inferred_type: None,
27985 }));
27986 Ok(Expression::Case(Box::new(Case {
27987 operand: Option::None,
27988 whens: vec![(eq_zero, zero)],
27989 else_: Some(sub_one),
27990 comments: Vec::new(),
27991 inferred_type: None,
27992 })))
27993 } else {
27994 Ok(Expression::StrPosition(Box::new(
27995 crate::expressions::StrPosition {
27996 this: Box::new(haystack),
27997 substr: Some(Box::new(needle)),
27998 position: Option::None,
27999 occurrence: Option::None,
28000 },
28001 )))
28002 }
28003 }
28004 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
28005 DialectType::MySQL
28006 | DialectType::SingleStore
28007 | DialectType::TiDB
28008 | DialectType::Hive
28009 | DialectType::Spark
28010 | DialectType::Databricks
28011 | DialectType::Doris
28012 | DialectType::StarRocks => {
28013 let mut a = vec![needle, haystack];
28014 if let Some(pos) = position {
28015 a.push(pos);
28016 }
28017 Ok(Expression::Function(Box::new(Function::new(
28018 "LOCATE".to_string(),
28019 a,
28020 ))))
28021 }
28022 // ClickHouse: POSITION(haystack, needle[, position])
28023 DialectType::ClickHouse => {
28024 let mut a = vec![haystack, needle];
28025 if let Some(pos) = position {
28026 a.push(pos);
28027 }
28028 Ok(Expression::Function(Box::new(Function::new(
28029 "POSITION".to_string(),
28030 a,
28031 ))))
28032 }
28033 _ => {
28034 let mut a = vec![haystack, needle];
28035 if let Some(pos) = position {
28036 a.push(pos);
28037 }
28038 if let Some(occ) = occurrence {
28039 a.push(occ);
28040 }
28041 Ok(Expression::Function(Box::new(Function::new(
28042 "STR_POSITION".to_string(),
28043 a,
28044 ))))
28045 }
28046 }
28047 } else {
28048 Ok(e)
28049 }
28050 }
28051
28052 Action::ArraySumConvert => {
28053 // ARRAY_SUM(arr) -> dialect-specific
28054 if let Expression::Function(f) = e {
28055 let args = f.args;
28056 match target {
28057 DialectType::DuckDB => Ok(Expression::Function(Box::new(
28058 Function::new("LIST_SUM".to_string(), args),
28059 ))),
28060 DialectType::Spark | DialectType::Databricks => {
28061 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
28062 let arr = args.into_iter().next().unwrap();
28063 let zero =
28064 Expression::Literal(Box::new(Literal::Number("0".to_string())));
28065 let acc_id = Identifier::new("acc");
28066 let x_id = Identifier::new("x");
28067 let acc = Expression::Identifier(acc_id.clone());
28068 let x = Expression::Identifier(x_id.clone());
28069 let add = Expression::Add(Box::new(BinaryOp {
28070 left: acc.clone(),
28071 right: x,
28072 left_comments: Vec::new(),
28073 operator_comments: Vec::new(),
28074 trailing_comments: Vec::new(),
28075 inferred_type: None,
28076 }));
28077 let lambda1 =
28078 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
28079 parameters: vec![acc_id.clone(), x_id],
28080 body: add,
28081 colon: false,
28082 parameter_types: Vec::new(),
28083 }));
28084 let lambda2 =
28085 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
28086 parameters: vec![acc_id],
28087 body: acc,
28088 colon: false,
28089 parameter_types: Vec::new(),
28090 }));
28091 Ok(Expression::Function(Box::new(Function::new(
28092 "AGGREGATE".to_string(),
28093 vec![arr, zero, lambda1, lambda2],
28094 ))))
28095 }
28096 DialectType::Presto | DialectType::Athena => {
28097 // Presto/Athena keep ARRAY_SUM natively
28098 Ok(Expression::Function(Box::new(Function::new(
28099 "ARRAY_SUM".to_string(),
28100 args,
28101 ))))
28102 }
28103 DialectType::Trino => {
28104 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
28105 if args.len() == 1 {
28106 let arr = args.into_iter().next().unwrap();
28107 let zero = Expression::Literal(Box::new(Literal::Number(
28108 "0".to_string(),
28109 )));
28110 let acc_id = Identifier::new("acc");
28111 let x_id = Identifier::new("x");
28112 let acc = Expression::Identifier(acc_id.clone());
28113 let x = Expression::Identifier(x_id.clone());
28114 let add = Expression::Add(Box::new(BinaryOp {
28115 left: acc.clone(),
28116 right: x,
28117 left_comments: Vec::new(),
28118 operator_comments: Vec::new(),
28119 trailing_comments: Vec::new(),
28120 inferred_type: None,
28121 }));
28122 let lambda1 = Expression::Lambda(Box::new(
28123 crate::expressions::LambdaExpr {
28124 parameters: vec![acc_id.clone(), x_id],
28125 body: add,
28126 colon: false,
28127 parameter_types: Vec::new(),
28128 },
28129 ));
28130 let lambda2 = Expression::Lambda(Box::new(
28131 crate::expressions::LambdaExpr {
28132 parameters: vec![acc_id],
28133 body: acc,
28134 colon: false,
28135 parameter_types: Vec::new(),
28136 },
28137 ));
28138 Ok(Expression::Function(Box::new(Function::new(
28139 "REDUCE".to_string(),
28140 vec![arr, zero, lambda1, lambda2],
28141 ))))
28142 } else {
28143 Ok(Expression::Function(Box::new(Function::new(
28144 "ARRAY_SUM".to_string(),
28145 args,
28146 ))))
28147 }
28148 }
28149 DialectType::ClickHouse => {
28150 // arraySum(lambda, arr) or arraySum(arr)
28151 Ok(Expression::Function(Box::new(Function::new(
28152 "arraySum".to_string(),
28153 args,
28154 ))))
28155 }
28156 _ => Ok(Expression::Function(Box::new(Function::new(
28157 "ARRAY_SUM".to_string(),
28158 args,
28159 )))),
28160 }
28161 } else {
28162 Ok(e)
28163 }
28164 }
28165
28166 Action::ArraySizeConvert => {
28167 if let Expression::Function(f) = e {
28168 Ok(Expression::Function(Box::new(Function::new(
28169 "REPEATED_COUNT".to_string(),
28170 f.args,
28171 ))))
28172 } else {
28173 Ok(e)
28174 }
28175 }
28176
28177 Action::ArrayAnyConvert => {
28178 if let Expression::Function(f) = e {
28179 let mut args = f.args;
28180 if args.len() == 2 {
28181 let arr = args.remove(0);
28182 let lambda = args.remove(0);
28183
28184 // Extract lambda parameter name and body
28185 let (param_name, pred_body) =
28186 if let Expression::Lambda(ref lam) = lambda {
28187 let name = if let Some(p) = lam.parameters.first() {
28188 p.name.clone()
28189 } else {
28190 "x".to_string()
28191 };
28192 (name, lam.body.clone())
28193 } else {
28194 ("x".to_string(), lambda.clone())
28195 };
28196
28197 // Helper: build a function call Expression
28198 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
28199 Expression::Function(Box::new(Function::new(
28200 name.to_string(),
28201 args,
28202 )))
28203 };
28204
28205 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
28206 let build_filter_pattern = |len_func: &str,
28207 len_args_extra: Vec<Expression>,
28208 filter_expr: Expression|
28209 -> Expression {
28210 // len_func(arr, ...extra) = 0
28211 let mut len_arr_args = vec![arr.clone()];
28212 len_arr_args.extend(len_args_extra.clone());
28213 let len_arr = make_func(len_func, len_arr_args);
28214 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
28215 len_arr,
28216 Expression::number(0),
28217 )));
28218
28219 // len_func(filter_expr, ...extra) <> 0
28220 let mut len_filter_args = vec![filter_expr];
28221 len_filter_args.extend(len_args_extra);
28222 let len_filter = make_func(len_func, len_filter_args);
28223 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
28224 len_filter,
28225 Expression::number(0),
28226 )));
28227
28228 // (eq_zero OR neq_zero)
28229 let or_expr =
28230 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
28231 Expression::Paren(Box::new(Paren {
28232 this: or_expr,
28233 trailing_comments: Vec::new(),
28234 }))
28235 };
28236
28237 match target {
28238 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
28239 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
28240 }
28241 DialectType::ClickHouse => {
28242 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
28243 // ClickHouse arrayFilter takes lambda first, then array
28244 let filter_expr =
28245 make_func("arrayFilter", vec![lambda, arr.clone()]);
28246 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
28247 }
28248 DialectType::Databricks | DialectType::Spark => {
28249 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
28250 let filter_expr =
28251 make_func("FILTER", vec![arr.clone(), lambda]);
28252 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
28253 }
28254 DialectType::DuckDB => {
28255 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
28256 let filter_expr =
28257 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
28258 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
28259 }
28260 DialectType::Teradata => {
28261 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
28262 let filter_expr =
28263 make_func("FILTER", vec![arr.clone(), lambda]);
28264 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
28265 }
28266 DialectType::BigQuery => {
28267 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
28268 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
28269 let param_col = Expression::column(¶m_name);
28270 let unnest_expr = Expression::Unnest(Box::new(
28271 crate::expressions::UnnestFunc {
28272 this: arr.clone(),
28273 expressions: vec![],
28274 with_ordinality: false,
28275 alias: Some(Identifier::new(¶m_name)),
28276 offset_alias: None,
28277 },
28278 ));
28279 let mut sel = crate::expressions::Select::default();
28280 sel.expressions = vec![param_col];
28281 sel.from = Some(crate::expressions::From {
28282 expressions: vec![unnest_expr],
28283 });
28284 sel.where_clause =
28285 Some(crate::expressions::Where { this: pred_body });
28286 let array_subquery =
28287 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
28288 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
28289 }
28290 DialectType::PostgreSQL => {
28291 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
28292 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
28293 let param_col = Expression::column(¶m_name);
28294 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
28295 let unnest_with_alias =
28296 Expression::Alias(Box::new(crate::expressions::Alias {
28297 this: Expression::Unnest(Box::new(
28298 crate::expressions::UnnestFunc {
28299 this: arr.clone(),
28300 expressions: vec![],
28301 with_ordinality: false,
28302 alias: None,
28303 offset_alias: None,
28304 },
28305 )),
28306 alias: Identifier::new("_t0"),
28307 column_aliases: vec![Identifier::new(¶m_name)],
28308 alias_explicit_as: false,
28309 alias_keyword: None,
28310 pre_alias_comments: Vec::new(),
28311 trailing_comments: Vec::new(),
28312 inferred_type: None,
28313 }));
28314 let mut sel = crate::expressions::Select::default();
28315 sel.expressions = vec![param_col];
28316 sel.from = Some(crate::expressions::From {
28317 expressions: vec![unnest_with_alias],
28318 });
28319 sel.where_clause =
28320 Some(crate::expressions::Where { this: pred_body });
28321 let array_subquery =
28322 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
28323 Ok(build_filter_pattern(
28324 "ARRAY_LENGTH",
28325 vec![Expression::number(1)],
28326 array_subquery,
28327 ))
28328 }
28329 _ => Ok(Expression::Function(Box::new(Function::new(
28330 "ARRAY_ANY".to_string(),
28331 vec![arr, lambda],
28332 )))),
28333 }
28334 } else {
28335 Ok(Expression::Function(Box::new(Function::new(
28336 "ARRAY_ANY".to_string(),
28337 args,
28338 ))))
28339 }
28340 } else {
28341 Ok(e)
28342 }
28343 }
28344
28345 Action::DecodeSimplify => {
28346 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
28347 // For literal search values: CASE WHEN x = search THEN result
28348 // For NULL search: CASE WHEN x IS NULL THEN result
28349 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
28350 fn is_decode_literal(e: &Expression) -> bool {
28351 matches!(
28352 e,
28353 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
28354 )
28355 }
28356
28357 let build_decode_case =
28358 |this_expr: Expression,
28359 pairs: Vec<(Expression, Expression)>,
28360 default: Option<Expression>| {
28361 let whens: Vec<(Expression, Expression)> = pairs
28362 .into_iter()
28363 .map(|(search, result)| {
28364 if matches!(&search, Expression::Null(_)) {
28365 // NULL search -> IS NULL
28366 let condition = Expression::Is(Box::new(BinaryOp {
28367 left: this_expr.clone(),
28368 right: Expression::Null(crate::expressions::Null),
28369 left_comments: Vec::new(),
28370 operator_comments: Vec::new(),
28371 trailing_comments: Vec::new(),
28372 inferred_type: None,
28373 }));
28374 (condition, result)
28375 } else if is_decode_literal(&search)
28376 || is_decode_literal(&this_expr)
28377 {
28378 // At least one side is a literal -> simple equality (no NULL check needed)
28379 let eq = Expression::Eq(Box::new(BinaryOp {
28380 left: this_expr.clone(),
28381 right: search,
28382 left_comments: Vec::new(),
28383 operator_comments: Vec::new(),
28384 trailing_comments: Vec::new(),
28385 inferred_type: None,
28386 }));
28387 (eq, result)
28388 } else {
28389 // Non-literal -> null-safe comparison
28390 let needs_paren = matches!(
28391 &search,
28392 Expression::Eq(_)
28393 | Expression::Neq(_)
28394 | Expression::Gt(_)
28395 | Expression::Gte(_)
28396 | Expression::Lt(_)
28397 | Expression::Lte(_)
28398 );
28399 let search_ref = if needs_paren {
28400 Expression::Paren(Box::new(crate::expressions::Paren {
28401 this: search.clone(),
28402 trailing_comments: Vec::new(),
28403 }))
28404 } else {
28405 search.clone()
28406 };
28407 // Build: x = search OR (x IS NULL AND search IS NULL)
28408 let eq = Expression::Eq(Box::new(BinaryOp {
28409 left: this_expr.clone(),
28410 right: search_ref,
28411 left_comments: Vec::new(),
28412 operator_comments: Vec::new(),
28413 trailing_comments: Vec::new(),
28414 inferred_type: None,
28415 }));
28416 let search_in_null = if needs_paren {
28417 Expression::Paren(Box::new(crate::expressions::Paren {
28418 this: search.clone(),
28419 trailing_comments: Vec::new(),
28420 }))
28421 } else {
28422 search.clone()
28423 };
28424 let x_is_null = Expression::Is(Box::new(BinaryOp {
28425 left: this_expr.clone(),
28426 right: Expression::Null(crate::expressions::Null),
28427 left_comments: Vec::new(),
28428 operator_comments: Vec::new(),
28429 trailing_comments: Vec::new(),
28430 inferred_type: None,
28431 }));
28432 let search_is_null = Expression::Is(Box::new(BinaryOp {
28433 left: search_in_null,
28434 right: Expression::Null(crate::expressions::Null),
28435 left_comments: Vec::new(),
28436 operator_comments: Vec::new(),
28437 trailing_comments: Vec::new(),
28438 inferred_type: None,
28439 }));
28440 let both_null = Expression::And(Box::new(BinaryOp {
28441 left: x_is_null,
28442 right: search_is_null,
28443 left_comments: Vec::new(),
28444 operator_comments: Vec::new(),
28445 trailing_comments: Vec::new(),
28446 inferred_type: None,
28447 }));
28448 let condition = Expression::Or(Box::new(BinaryOp {
28449 left: eq,
28450 right: Expression::Paren(Box::new(
28451 crate::expressions::Paren {
28452 this: both_null,
28453 trailing_comments: Vec::new(),
28454 },
28455 )),
28456 left_comments: Vec::new(),
28457 operator_comments: Vec::new(),
28458 trailing_comments: Vec::new(),
28459 inferred_type: None,
28460 }));
28461 (condition, result)
28462 }
28463 })
28464 .collect();
28465 Expression::Case(Box::new(Case {
28466 operand: None,
28467 whens,
28468 else_: default,
28469 comments: Vec::new(),
28470 inferred_type: None,
28471 }))
28472 };
28473
28474 if let Expression::Decode(decode) = e {
28475 Ok(build_decode_case(
28476 decode.this,
28477 decode.search_results,
28478 decode.default,
28479 ))
28480 } else if let Expression::DecodeCase(dc) = e {
28481 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
28482 let mut exprs = dc.expressions;
28483 if exprs.len() < 3 {
28484 return Ok(Expression::DecodeCase(Box::new(
28485 crate::expressions::DecodeCase { expressions: exprs },
28486 )));
28487 }
28488 let this_expr = exprs.remove(0);
28489 let mut pairs = Vec::new();
28490 let mut default = None;
28491 let mut i = 0;
28492 while i + 1 < exprs.len() {
28493 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
28494 i += 2;
28495 }
28496 if i < exprs.len() {
28497 // Odd remaining element is the default
28498 default = Some(exprs[i].clone());
28499 }
28500 Ok(build_decode_case(this_expr, pairs, default))
28501 } else {
28502 Ok(e)
28503 }
28504 }
28505
28506 Action::CreateTableLikeToCtas => {
28507 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
28508 if let Expression::CreateTable(ct) = e {
28509 let like_source = ct.constraints.iter().find_map(|c| {
28510 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28511 Some(source.clone())
28512 } else {
28513 None
28514 }
28515 });
28516 if let Some(source_table) = like_source {
28517 let mut new_ct = *ct;
28518 new_ct.constraints.clear();
28519 // Build: SELECT * FROM b LIMIT 0
28520 let select = Expression::Select(Box::new(crate::expressions::Select {
28521 expressions: vec![Expression::Star(crate::expressions::Star {
28522 table: None,
28523 except: None,
28524 replace: None,
28525 rename: None,
28526 trailing_comments: Vec::new(),
28527 span: None,
28528 })],
28529 from: Some(crate::expressions::From {
28530 expressions: vec![Expression::Table(Box::new(source_table))],
28531 }),
28532 limit: Some(crate::expressions::Limit {
28533 this: Expression::Literal(Box::new(Literal::Number(
28534 "0".to_string(),
28535 ))),
28536 percent: false,
28537 comments: Vec::new(),
28538 }),
28539 ..Default::default()
28540 }));
28541 new_ct.as_select = Some(select);
28542 Ok(Expression::CreateTable(Box::new(new_ct)))
28543 } else {
28544 Ok(Expression::CreateTable(ct))
28545 }
28546 } else {
28547 Ok(e)
28548 }
28549 }
28550
28551 Action::CreateTableLikeToSelectInto => {
28552 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
28553 if let Expression::CreateTable(ct) = e {
28554 let like_source = ct.constraints.iter().find_map(|c| {
28555 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28556 Some(source.clone())
28557 } else {
28558 None
28559 }
28560 });
28561 if let Some(source_table) = like_source {
28562 let mut aliased_source = source_table;
28563 aliased_source.alias = Some(Identifier::new("temp"));
28564 // Build: SELECT TOP 0 * INTO a FROM b AS temp
28565 let select = Expression::Select(Box::new(crate::expressions::Select {
28566 expressions: vec![Expression::Star(crate::expressions::Star {
28567 table: None,
28568 except: None,
28569 replace: None,
28570 rename: None,
28571 trailing_comments: Vec::new(),
28572 span: None,
28573 })],
28574 from: Some(crate::expressions::From {
28575 expressions: vec![Expression::Table(Box::new(aliased_source))],
28576 }),
28577 into: Some(crate::expressions::SelectInto {
28578 this: Expression::Table(Box::new(ct.name.clone())),
28579 temporary: false,
28580 unlogged: false,
28581 bulk_collect: false,
28582 expressions: Vec::new(),
28583 }),
28584 top: Some(crate::expressions::Top {
28585 this: Expression::Literal(Box::new(Literal::Number(
28586 "0".to_string(),
28587 ))),
28588 percent: false,
28589 with_ties: false,
28590 parenthesized: false,
28591 }),
28592 ..Default::default()
28593 }));
28594 Ok(select)
28595 } else {
28596 Ok(Expression::CreateTable(ct))
28597 }
28598 } else {
28599 Ok(e)
28600 }
28601 }
28602
28603 Action::CreateTableLikeToAs => {
28604 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
28605 if let Expression::CreateTable(ct) = e {
28606 let like_source = ct.constraints.iter().find_map(|c| {
28607 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28608 Some(source.clone())
28609 } else {
28610 None
28611 }
28612 });
28613 if let Some(source_table) = like_source {
28614 let mut new_ct = *ct;
28615 new_ct.constraints.clear();
28616 // AS b (just a table reference, not a SELECT)
28617 new_ct.as_select = Some(Expression::Table(Box::new(source_table)));
28618 Ok(Expression::CreateTable(Box::new(new_ct)))
28619 } else {
28620 Ok(Expression::CreateTable(ct))
28621 }
28622 } else {
28623 Ok(e)
28624 }
28625 }
28626
28627 Action::TsOrDsToDateConvert => {
28628 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
28629 if let Expression::Function(f) = e {
28630 let mut args = f.args;
28631 let this = args.remove(0);
28632 let fmt = if !args.is_empty() {
28633 match &args[0] {
28634 Expression::Literal(lit)
28635 if matches!(lit.as_ref(), Literal::String(_)) =>
28636 {
28637 let Literal::String(s) = lit.as_ref() else {
28638 unreachable!()
28639 };
28640 Some(s.clone())
28641 }
28642 _ => None,
28643 }
28644 } else {
28645 None
28646 };
28647 Ok(Expression::TsOrDsToDate(Box::new(
28648 crate::expressions::TsOrDsToDate {
28649 this: Box::new(this),
28650 format: fmt,
28651 safe: None,
28652 },
28653 )))
28654 } else {
28655 Ok(e)
28656 }
28657 }
28658
28659 Action::TsOrDsToDateStrConvert => {
28660 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
28661 if let Expression::Function(f) = e {
28662 let arg = f.args.into_iter().next().unwrap();
28663 let str_type = match target {
28664 DialectType::DuckDB
28665 | DialectType::PostgreSQL
28666 | DialectType::Materialize => DataType::Text,
28667 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
28668 DataType::Custom {
28669 name: "STRING".to_string(),
28670 }
28671 }
28672 DialectType::Presto
28673 | DialectType::Trino
28674 | DialectType::Athena
28675 | DialectType::Drill => DataType::VarChar {
28676 length: None,
28677 parenthesized_length: false,
28678 },
28679 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
28680 DataType::Custom {
28681 name: "STRING".to_string(),
28682 }
28683 }
28684 _ => DataType::VarChar {
28685 length: None,
28686 parenthesized_length: false,
28687 },
28688 };
28689 let cast_expr = Expression::Cast(Box::new(Cast {
28690 this: arg,
28691 to: str_type,
28692 double_colon_syntax: false,
28693 trailing_comments: Vec::new(),
28694 format: None,
28695 default: None,
28696 inferred_type: None,
28697 }));
28698 Ok(Expression::Substring(Box::new(
28699 crate::expressions::SubstringFunc {
28700 this: cast_expr,
28701 start: Expression::number(1),
28702 length: Some(Expression::number(10)),
28703 from_for_syntax: false,
28704 },
28705 )))
28706 } else {
28707 Ok(e)
28708 }
28709 }
28710
28711 Action::DateStrToDateConvert => {
28712 // DATE_STR_TO_DATE(x) -> dialect-specific
28713 if let Expression::Function(f) = e {
28714 let arg = f.args.into_iter().next().unwrap();
28715 match target {
28716 DialectType::SQLite => {
28717 // SQLite: just the bare expression (dates are strings)
28718 Ok(arg)
28719 }
28720 _ => Ok(Expression::Cast(Box::new(Cast {
28721 this: arg,
28722 to: DataType::Date,
28723 double_colon_syntax: false,
28724 trailing_comments: Vec::new(),
28725 format: None,
28726 default: None,
28727 inferred_type: None,
28728 }))),
28729 }
28730 } else {
28731 Ok(e)
28732 }
28733 }
28734
28735 Action::TimeStrToDateConvert => {
28736 // TIME_STR_TO_DATE(x) -> dialect-specific
28737 if let Expression::Function(f) = e {
28738 let arg = f.args.into_iter().next().unwrap();
28739 match target {
28740 DialectType::Hive
28741 | DialectType::Doris
28742 | DialectType::StarRocks
28743 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
28744 Function::new("TO_DATE".to_string(), vec![arg]),
28745 ))),
28746 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28747 // Presto: CAST(x AS TIMESTAMP)
28748 Ok(Expression::Cast(Box::new(Cast {
28749 this: arg,
28750 to: DataType::Timestamp {
28751 timezone: false,
28752 precision: None,
28753 },
28754 double_colon_syntax: false,
28755 trailing_comments: Vec::new(),
28756 format: None,
28757 default: None,
28758 inferred_type: None,
28759 })))
28760 }
28761 _ => {
28762 // Default: CAST(x AS DATE)
28763 Ok(Expression::Cast(Box::new(Cast {
28764 this: arg,
28765 to: DataType::Date,
28766 double_colon_syntax: false,
28767 trailing_comments: Vec::new(),
28768 format: None,
28769 default: None,
28770 inferred_type: None,
28771 })))
28772 }
28773 }
28774 } else {
28775 Ok(e)
28776 }
28777 }
28778
28779 Action::TimeStrToTimeConvert => {
28780 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
28781 if let Expression::Function(f) = e {
28782 let mut args = f.args;
28783 let this = args.remove(0);
28784 let zone = if !args.is_empty() {
28785 match &args[0] {
28786 Expression::Literal(lit)
28787 if matches!(lit.as_ref(), Literal::String(_)) =>
28788 {
28789 let Literal::String(s) = lit.as_ref() else {
28790 unreachable!()
28791 };
28792 Some(s.clone())
28793 }
28794 _ => None,
28795 }
28796 } else {
28797 None
28798 };
28799 let has_zone = zone.is_some();
28800
28801 match target {
28802 DialectType::SQLite => {
28803 // SQLite: just the bare expression
28804 Ok(this)
28805 }
28806 DialectType::MySQL => {
28807 if has_zone {
28808 // MySQL with zone: TIMESTAMP(x)
28809 Ok(Expression::Function(Box::new(Function::new(
28810 "TIMESTAMP".to_string(),
28811 vec![this],
28812 ))))
28813 } else {
28814 // MySQL: CAST(x AS DATETIME) or with precision
28815 // Use DataType::Custom to avoid MySQL's transform_cast converting
28816 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
28817 let precision = if let Expression::Literal(ref lit) = this {
28818 if let Literal::String(ref s) = lit.as_ref() {
28819 if let Some(dot_pos) = s.rfind('.') {
28820 let frac = &s[dot_pos + 1..];
28821 let digit_count = frac
28822 .chars()
28823 .take_while(|c| c.is_ascii_digit())
28824 .count();
28825 if digit_count > 0 {
28826 Some(digit_count)
28827 } else {
28828 None
28829 }
28830 } else {
28831 None
28832 }
28833 } else {
28834 None
28835 }
28836 } else {
28837 None
28838 };
28839 let type_name = match precision {
28840 Some(p) => format!("DATETIME({})", p),
28841 None => "DATETIME".to_string(),
28842 };
28843 Ok(Expression::Cast(Box::new(Cast {
28844 this,
28845 to: DataType::Custom { name: type_name },
28846 double_colon_syntax: false,
28847 trailing_comments: Vec::new(),
28848 format: None,
28849 default: None,
28850 inferred_type: None,
28851 })))
28852 }
28853 }
28854 DialectType::ClickHouse => {
28855 if has_zone {
28856 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
28857 // We need to strip the timezone offset from the literal if present
28858 let clean_this = if let Expression::Literal(ref lit) = this {
28859 if let Literal::String(ref s) = lit.as_ref() {
28860 // Strip timezone offset like "-08:00" or "+00:00"
28861 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
28862 if let Some(offset_pos) = re_offset {
28863 if offset_pos > 10 {
28864 // After the date part
28865 let trimmed = s[..offset_pos].to_string();
28866 Expression::Literal(Box::new(Literal::String(
28867 trimmed,
28868 )))
28869 } else {
28870 this.clone()
28871 }
28872 } else {
28873 this.clone()
28874 }
28875 } else {
28876 this.clone()
28877 }
28878 } else {
28879 this.clone()
28880 };
28881 let zone_str = zone.unwrap();
28882 // Build: CAST(x AS DateTime64(6, 'zone'))
28883 let type_name = format!("DateTime64(6, '{}')", zone_str);
28884 Ok(Expression::Cast(Box::new(Cast {
28885 this: clean_this,
28886 to: DataType::Custom { name: type_name },
28887 double_colon_syntax: false,
28888 trailing_comments: Vec::new(),
28889 format: None,
28890 default: None,
28891 inferred_type: None,
28892 })))
28893 } else {
28894 Ok(Expression::Cast(Box::new(Cast {
28895 this,
28896 to: DataType::Custom {
28897 name: "DateTime64(6)".to_string(),
28898 },
28899 double_colon_syntax: false,
28900 trailing_comments: Vec::new(),
28901 format: None,
28902 default: None,
28903 inferred_type: None,
28904 })))
28905 }
28906 }
28907 DialectType::BigQuery => {
28908 if has_zone {
28909 // BigQuery with zone: CAST(x AS TIMESTAMP)
28910 Ok(Expression::Cast(Box::new(Cast {
28911 this,
28912 to: DataType::Timestamp {
28913 timezone: false,
28914 precision: None,
28915 },
28916 double_colon_syntax: false,
28917 trailing_comments: Vec::new(),
28918 format: None,
28919 default: None,
28920 inferred_type: None,
28921 })))
28922 } else {
28923 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
28924 Ok(Expression::Cast(Box::new(Cast {
28925 this,
28926 to: DataType::Custom {
28927 name: "DATETIME".to_string(),
28928 },
28929 double_colon_syntax: false,
28930 trailing_comments: Vec::new(),
28931 format: None,
28932 default: None,
28933 inferred_type: None,
28934 })))
28935 }
28936 }
28937 DialectType::Doris => {
28938 // Doris: CAST(x AS DATETIME)
28939 Ok(Expression::Cast(Box::new(Cast {
28940 this,
28941 to: DataType::Custom {
28942 name: "DATETIME".to_string(),
28943 },
28944 double_colon_syntax: false,
28945 trailing_comments: Vec::new(),
28946 format: None,
28947 default: None,
28948 inferred_type: None,
28949 })))
28950 }
28951 DialectType::TSQL | DialectType::Fabric => {
28952 if has_zone {
28953 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
28954 let cast_expr = Expression::Cast(Box::new(Cast {
28955 this,
28956 to: DataType::Custom {
28957 name: "DATETIMEOFFSET".to_string(),
28958 },
28959 double_colon_syntax: false,
28960 trailing_comments: Vec::new(),
28961 format: None,
28962 default: None,
28963 inferred_type: None,
28964 }));
28965 Ok(Expression::AtTimeZone(Box::new(
28966 crate::expressions::AtTimeZone {
28967 this: cast_expr,
28968 zone: Expression::Literal(Box::new(Literal::String(
28969 "UTC".to_string(),
28970 ))),
28971 },
28972 )))
28973 } else {
28974 // TSQL: CAST(x AS DATETIME2)
28975 Ok(Expression::Cast(Box::new(Cast {
28976 this,
28977 to: DataType::Custom {
28978 name: "DATETIME2".to_string(),
28979 },
28980 double_colon_syntax: false,
28981 trailing_comments: Vec::new(),
28982 format: None,
28983 default: None,
28984 inferred_type: None,
28985 })))
28986 }
28987 }
28988 DialectType::DuckDB => {
28989 if has_zone {
28990 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
28991 Ok(Expression::Cast(Box::new(Cast {
28992 this,
28993 to: DataType::Timestamp {
28994 timezone: true,
28995 precision: None,
28996 },
28997 double_colon_syntax: false,
28998 trailing_comments: Vec::new(),
28999 format: None,
29000 default: None,
29001 inferred_type: None,
29002 })))
29003 } else {
29004 // DuckDB: CAST(x AS TIMESTAMP)
29005 Ok(Expression::Cast(Box::new(Cast {
29006 this,
29007 to: DataType::Timestamp {
29008 timezone: false,
29009 precision: None,
29010 },
29011 double_colon_syntax: false,
29012 trailing_comments: Vec::new(),
29013 format: None,
29014 default: None,
29015 inferred_type: None,
29016 })))
29017 }
29018 }
29019 DialectType::PostgreSQL
29020 | DialectType::Materialize
29021 | DialectType::RisingWave => {
29022 if has_zone {
29023 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
29024 Ok(Expression::Cast(Box::new(Cast {
29025 this,
29026 to: DataType::Timestamp {
29027 timezone: true,
29028 precision: None,
29029 },
29030 double_colon_syntax: false,
29031 trailing_comments: Vec::new(),
29032 format: None,
29033 default: None,
29034 inferred_type: None,
29035 })))
29036 } else {
29037 // PostgreSQL: CAST(x AS TIMESTAMP)
29038 Ok(Expression::Cast(Box::new(Cast {
29039 this,
29040 to: DataType::Timestamp {
29041 timezone: false,
29042 precision: None,
29043 },
29044 double_colon_syntax: false,
29045 trailing_comments: Vec::new(),
29046 format: None,
29047 default: None,
29048 inferred_type: None,
29049 })))
29050 }
29051 }
29052 DialectType::Snowflake => {
29053 if has_zone {
29054 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
29055 Ok(Expression::Cast(Box::new(Cast {
29056 this,
29057 to: DataType::Timestamp {
29058 timezone: true,
29059 precision: None,
29060 },
29061 double_colon_syntax: false,
29062 trailing_comments: Vec::new(),
29063 format: None,
29064 default: None,
29065 inferred_type: None,
29066 })))
29067 } else {
29068 // Snowflake: CAST(x AS TIMESTAMP)
29069 Ok(Expression::Cast(Box::new(Cast {
29070 this,
29071 to: DataType::Timestamp {
29072 timezone: false,
29073 precision: None,
29074 },
29075 double_colon_syntax: false,
29076 trailing_comments: Vec::new(),
29077 format: None,
29078 default: None,
29079 inferred_type: None,
29080 })))
29081 }
29082 }
29083 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29084 if has_zone {
29085 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
29086 // Check for precision from sub-second digits
29087 let precision = if let Expression::Literal(ref lit) = this {
29088 if let Literal::String(ref s) = lit.as_ref() {
29089 if let Some(dot_pos) = s.rfind('.') {
29090 let frac = &s[dot_pos + 1..];
29091 let digit_count = frac
29092 .chars()
29093 .take_while(|c| c.is_ascii_digit())
29094 .count();
29095 if digit_count > 0
29096 && matches!(target, DialectType::Trino)
29097 {
29098 Some(digit_count as u32)
29099 } else {
29100 None
29101 }
29102 } else {
29103 None
29104 }
29105 } else {
29106 None
29107 }
29108 } else {
29109 None
29110 };
29111 let dt = if let Some(prec) = precision {
29112 DataType::Timestamp {
29113 timezone: true,
29114 precision: Some(prec),
29115 }
29116 } else {
29117 DataType::Timestamp {
29118 timezone: true,
29119 precision: None,
29120 }
29121 };
29122 Ok(Expression::Cast(Box::new(Cast {
29123 this,
29124 to: dt,
29125 double_colon_syntax: false,
29126 trailing_comments: Vec::new(),
29127 format: None,
29128 default: None,
29129 inferred_type: None,
29130 })))
29131 } else {
29132 // Check for sub-second precision for Trino
29133 let precision = if let Expression::Literal(ref lit) = this {
29134 if let Literal::String(ref s) = lit.as_ref() {
29135 if let Some(dot_pos) = s.rfind('.') {
29136 let frac = &s[dot_pos + 1..];
29137 let digit_count = frac
29138 .chars()
29139 .take_while(|c| c.is_ascii_digit())
29140 .count();
29141 if digit_count > 0
29142 && matches!(target, DialectType::Trino)
29143 {
29144 Some(digit_count as u32)
29145 } else {
29146 None
29147 }
29148 } else {
29149 None
29150 }
29151 } else {
29152 None
29153 }
29154 } else {
29155 None
29156 };
29157 let dt = DataType::Timestamp {
29158 timezone: false,
29159 precision,
29160 };
29161 Ok(Expression::Cast(Box::new(Cast {
29162 this,
29163 to: dt,
29164 double_colon_syntax: false,
29165 trailing_comments: Vec::new(),
29166 format: None,
29167 default: None,
29168 inferred_type: None,
29169 })))
29170 }
29171 }
29172 DialectType::Redshift => {
29173 if has_zone {
29174 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
29175 Ok(Expression::Cast(Box::new(Cast {
29176 this,
29177 to: DataType::Timestamp {
29178 timezone: true,
29179 precision: None,
29180 },
29181 double_colon_syntax: false,
29182 trailing_comments: Vec::new(),
29183 format: None,
29184 default: None,
29185 inferred_type: None,
29186 })))
29187 } else {
29188 // Redshift: CAST(x AS TIMESTAMP)
29189 Ok(Expression::Cast(Box::new(Cast {
29190 this,
29191 to: DataType::Timestamp {
29192 timezone: false,
29193 precision: None,
29194 },
29195 double_colon_syntax: false,
29196 trailing_comments: Vec::new(),
29197 format: None,
29198 default: None,
29199 inferred_type: None,
29200 })))
29201 }
29202 }
29203 _ => {
29204 // Default: CAST(x AS TIMESTAMP)
29205 Ok(Expression::Cast(Box::new(Cast {
29206 this,
29207 to: DataType::Timestamp {
29208 timezone: false,
29209 precision: None,
29210 },
29211 double_colon_syntax: false,
29212 trailing_comments: Vec::new(),
29213 format: None,
29214 default: None,
29215 inferred_type: None,
29216 })))
29217 }
29218 }
29219 } else {
29220 Ok(e)
29221 }
29222 }
29223
29224 Action::DateToDateStrConvert => {
29225 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
29226 if let Expression::Function(f) = e {
29227 let arg = f.args.into_iter().next().unwrap();
29228 let str_type = match target {
29229 DialectType::DuckDB => DataType::Text,
29230 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29231 DataType::Custom {
29232 name: "STRING".to_string(),
29233 }
29234 }
29235 DialectType::Presto
29236 | DialectType::Trino
29237 | DialectType::Athena
29238 | DialectType::Drill => DataType::VarChar {
29239 length: None,
29240 parenthesized_length: false,
29241 },
29242 _ => DataType::VarChar {
29243 length: None,
29244 parenthesized_length: false,
29245 },
29246 };
29247 Ok(Expression::Cast(Box::new(Cast {
29248 this: arg,
29249 to: str_type,
29250 double_colon_syntax: false,
29251 trailing_comments: Vec::new(),
29252 format: None,
29253 default: None,
29254 inferred_type: None,
29255 })))
29256 } else {
29257 Ok(e)
29258 }
29259 }
29260
29261 Action::DateToDiConvert => {
29262 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
29263 if let Expression::Function(f) = e {
29264 let arg = f.args.into_iter().next().unwrap();
29265 let inner = match target {
29266 DialectType::DuckDB => {
29267 // STRFTIME(x, '%Y%m%d')
29268 Expression::Function(Box::new(Function::new(
29269 "STRFTIME".to_string(),
29270 vec![arg, Expression::string("%Y%m%d")],
29271 )))
29272 }
29273 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29274 // DATE_FORMAT(x, 'yyyyMMdd')
29275 Expression::Function(Box::new(Function::new(
29276 "DATE_FORMAT".to_string(),
29277 vec![arg, Expression::string("yyyyMMdd")],
29278 )))
29279 }
29280 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29281 // DATE_FORMAT(x, '%Y%m%d')
29282 Expression::Function(Box::new(Function::new(
29283 "DATE_FORMAT".to_string(),
29284 vec![arg, Expression::string("%Y%m%d")],
29285 )))
29286 }
29287 DialectType::Drill => {
29288 // TO_DATE(x, 'yyyyMMdd')
29289 Expression::Function(Box::new(Function::new(
29290 "TO_DATE".to_string(),
29291 vec![arg, Expression::string("yyyyMMdd")],
29292 )))
29293 }
29294 _ => {
29295 // Default: STRFTIME(x, '%Y%m%d')
29296 Expression::Function(Box::new(Function::new(
29297 "STRFTIME".to_string(),
29298 vec![arg, Expression::string("%Y%m%d")],
29299 )))
29300 }
29301 };
29302 // Use INT (not INTEGER) for Presto/Trino
29303 let int_type = match target {
29304 DialectType::Presto
29305 | DialectType::Trino
29306 | DialectType::Athena
29307 | DialectType::TSQL
29308 | DialectType::Fabric
29309 | DialectType::SQLite
29310 | DialectType::Redshift => DataType::Custom {
29311 name: "INT".to_string(),
29312 },
29313 _ => DataType::Int {
29314 length: None,
29315 integer_spelling: false,
29316 },
29317 };
29318 Ok(Expression::Cast(Box::new(Cast {
29319 this: inner,
29320 to: int_type,
29321 double_colon_syntax: false,
29322 trailing_comments: Vec::new(),
29323 format: None,
29324 default: None,
29325 inferred_type: None,
29326 })))
29327 } else {
29328 Ok(e)
29329 }
29330 }
29331
29332 Action::DiToDateConvert => {
29333 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
29334 if let Expression::Function(f) = e {
29335 let arg = f.args.into_iter().next().unwrap();
29336 match target {
29337 DialectType::DuckDB => {
29338 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
29339 let cast_text = Expression::Cast(Box::new(Cast {
29340 this: arg,
29341 to: DataType::Text,
29342 double_colon_syntax: false,
29343 trailing_comments: Vec::new(),
29344 format: None,
29345 default: None,
29346 inferred_type: None,
29347 }));
29348 let strptime = Expression::Function(Box::new(Function::new(
29349 "STRPTIME".to_string(),
29350 vec![cast_text, Expression::string("%Y%m%d")],
29351 )));
29352 Ok(Expression::Cast(Box::new(Cast {
29353 this: strptime,
29354 to: DataType::Date,
29355 double_colon_syntax: false,
29356 trailing_comments: Vec::new(),
29357 format: None,
29358 default: None,
29359 inferred_type: None,
29360 })))
29361 }
29362 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29363 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
29364 let cast_str = Expression::Cast(Box::new(Cast {
29365 this: arg,
29366 to: DataType::Custom {
29367 name: "STRING".to_string(),
29368 },
29369 double_colon_syntax: false,
29370 trailing_comments: Vec::new(),
29371 format: None,
29372 default: None,
29373 inferred_type: None,
29374 }));
29375 Ok(Expression::Function(Box::new(Function::new(
29376 "TO_DATE".to_string(),
29377 vec![cast_str, Expression::string("yyyyMMdd")],
29378 ))))
29379 }
29380 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29381 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
29382 let cast_varchar = Expression::Cast(Box::new(Cast {
29383 this: arg,
29384 to: DataType::VarChar {
29385 length: None,
29386 parenthesized_length: false,
29387 },
29388 double_colon_syntax: false,
29389 trailing_comments: Vec::new(),
29390 format: None,
29391 default: None,
29392 inferred_type: None,
29393 }));
29394 let date_parse = Expression::Function(Box::new(Function::new(
29395 "DATE_PARSE".to_string(),
29396 vec![cast_varchar, Expression::string("%Y%m%d")],
29397 )));
29398 Ok(Expression::Cast(Box::new(Cast {
29399 this: date_parse,
29400 to: DataType::Date,
29401 double_colon_syntax: false,
29402 trailing_comments: Vec::new(),
29403 format: None,
29404 default: None,
29405 inferred_type: None,
29406 })))
29407 }
29408 DialectType::Drill => {
29409 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
29410 let cast_varchar = Expression::Cast(Box::new(Cast {
29411 this: arg,
29412 to: DataType::VarChar {
29413 length: None,
29414 parenthesized_length: false,
29415 },
29416 double_colon_syntax: false,
29417 trailing_comments: Vec::new(),
29418 format: None,
29419 default: None,
29420 inferred_type: None,
29421 }));
29422 Ok(Expression::Function(Box::new(Function::new(
29423 "TO_DATE".to_string(),
29424 vec![cast_varchar, Expression::string("yyyyMMdd")],
29425 ))))
29426 }
29427 _ => Ok(Expression::Function(Box::new(Function::new(
29428 "DI_TO_DATE".to_string(),
29429 vec![arg],
29430 )))),
29431 }
29432 } else {
29433 Ok(e)
29434 }
29435 }
29436
29437 Action::TsOrDiToDiConvert => {
29438 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
29439 if let Expression::Function(f) = e {
29440 let arg = f.args.into_iter().next().unwrap();
29441 let str_type = match target {
29442 DialectType::DuckDB => DataType::Text,
29443 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29444 DataType::Custom {
29445 name: "STRING".to_string(),
29446 }
29447 }
29448 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29449 DataType::VarChar {
29450 length: None,
29451 parenthesized_length: false,
29452 }
29453 }
29454 _ => DataType::VarChar {
29455 length: None,
29456 parenthesized_length: false,
29457 },
29458 };
29459 let cast_str = Expression::Cast(Box::new(Cast {
29460 this: arg,
29461 to: str_type,
29462 double_colon_syntax: false,
29463 trailing_comments: Vec::new(),
29464 format: None,
29465 default: None,
29466 inferred_type: None,
29467 }));
29468 let replace_expr = Expression::Function(Box::new(Function::new(
29469 "REPLACE".to_string(),
29470 vec![cast_str, Expression::string("-"), Expression::string("")],
29471 )));
29472 let substr_name = match target {
29473 DialectType::DuckDB
29474 | DialectType::Hive
29475 | DialectType::Spark
29476 | DialectType::Databricks => "SUBSTR",
29477 _ => "SUBSTR",
29478 };
29479 let substr = Expression::Function(Box::new(Function::new(
29480 substr_name.to_string(),
29481 vec![replace_expr, Expression::number(1), Expression::number(8)],
29482 )));
29483 // Use INT (not INTEGER) for Presto/Trino etc.
29484 let int_type = match target {
29485 DialectType::Presto
29486 | DialectType::Trino
29487 | DialectType::Athena
29488 | DialectType::TSQL
29489 | DialectType::Fabric
29490 | DialectType::SQLite
29491 | DialectType::Redshift => DataType::Custom {
29492 name: "INT".to_string(),
29493 },
29494 _ => DataType::Int {
29495 length: None,
29496 integer_spelling: false,
29497 },
29498 };
29499 Ok(Expression::Cast(Box::new(Cast {
29500 this: substr,
29501 to: int_type,
29502 double_colon_syntax: false,
29503 trailing_comments: Vec::new(),
29504 format: None,
29505 default: None,
29506 inferred_type: None,
29507 })))
29508 } else {
29509 Ok(e)
29510 }
29511 }
29512
29513 Action::UnixToStrConvert => {
29514 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
29515 if let Expression::Function(f) = e {
29516 let mut args = f.args;
29517 let this = args.remove(0);
29518 let fmt_expr = if !args.is_empty() {
29519 Some(args.remove(0))
29520 } else {
29521 None
29522 };
29523
29524 // Check if format is a string literal
29525 let fmt_str = fmt_expr.as_ref().and_then(|f| {
29526 if let Expression::Literal(lit) = f {
29527 if let Literal::String(s) = lit.as_ref() {
29528 Some(s.clone())
29529 } else {
29530 None
29531 }
29532 } else {
29533 None
29534 }
29535 });
29536
29537 if let Some(fmt_string) = fmt_str {
29538 // String literal format -> use UnixToStr expression (generator handles it)
29539 Ok(Expression::UnixToStr(Box::new(
29540 crate::expressions::UnixToStr {
29541 this: Box::new(this),
29542 format: Some(fmt_string),
29543 },
29544 )))
29545 } else if let Some(fmt_e) = fmt_expr {
29546 // Non-literal format (e.g., identifier `y`) -> build target expression directly
29547 match target {
29548 DialectType::DuckDB => {
29549 // STRFTIME(TO_TIMESTAMP(x), y)
29550 let to_ts = Expression::Function(Box::new(Function::new(
29551 "TO_TIMESTAMP".to_string(),
29552 vec![this],
29553 )));
29554 Ok(Expression::Function(Box::new(Function::new(
29555 "STRFTIME".to_string(),
29556 vec![to_ts, fmt_e],
29557 ))))
29558 }
29559 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29560 // DATE_FORMAT(FROM_UNIXTIME(x), y)
29561 let from_unix = Expression::Function(Box::new(Function::new(
29562 "FROM_UNIXTIME".to_string(),
29563 vec![this],
29564 )));
29565 Ok(Expression::Function(Box::new(Function::new(
29566 "DATE_FORMAT".to_string(),
29567 vec![from_unix, fmt_e],
29568 ))))
29569 }
29570 DialectType::Hive
29571 | DialectType::Spark
29572 | DialectType::Databricks
29573 | DialectType::Doris
29574 | DialectType::StarRocks => {
29575 // FROM_UNIXTIME(x, y)
29576 Ok(Expression::Function(Box::new(Function::new(
29577 "FROM_UNIXTIME".to_string(),
29578 vec![this, fmt_e],
29579 ))))
29580 }
29581 _ => {
29582 // Default: keep as UNIX_TO_STR(x, y)
29583 Ok(Expression::Function(Box::new(Function::new(
29584 "UNIX_TO_STR".to_string(),
29585 vec![this, fmt_e],
29586 ))))
29587 }
29588 }
29589 } else {
29590 Ok(Expression::UnixToStr(Box::new(
29591 crate::expressions::UnixToStr {
29592 this: Box::new(this),
29593 format: None,
29594 },
29595 )))
29596 }
29597 } else {
29598 Ok(e)
29599 }
29600 }
29601
29602 Action::UnixToTimeConvert => {
29603 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
29604 if let Expression::Function(f) = e {
29605 let arg = f.args.into_iter().next().unwrap();
29606 Ok(Expression::UnixToTime(Box::new(
29607 crate::expressions::UnixToTime {
29608 this: Box::new(arg),
29609 scale: None,
29610 zone: None,
29611 hours: None,
29612 minutes: None,
29613 format: None,
29614 target_type: None,
29615 },
29616 )))
29617 } else {
29618 Ok(e)
29619 }
29620 }
29621
29622 Action::UnixToTimeStrConvert => {
29623 // UNIX_TO_TIME_STR(x) -> dialect-specific
29624 if let Expression::Function(f) = e {
29625 let arg = f.args.into_iter().next().unwrap();
29626 match target {
29627 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29628 // FROM_UNIXTIME(x)
29629 Ok(Expression::Function(Box::new(Function::new(
29630 "FROM_UNIXTIME".to_string(),
29631 vec![arg],
29632 ))))
29633 }
29634 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29635 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
29636 let from_unix = Expression::Function(Box::new(Function::new(
29637 "FROM_UNIXTIME".to_string(),
29638 vec![arg],
29639 )));
29640 Ok(Expression::Cast(Box::new(Cast {
29641 this: from_unix,
29642 to: DataType::VarChar {
29643 length: None,
29644 parenthesized_length: false,
29645 },
29646 double_colon_syntax: false,
29647 trailing_comments: Vec::new(),
29648 format: None,
29649 default: None,
29650 inferred_type: None,
29651 })))
29652 }
29653 DialectType::DuckDB => {
29654 // CAST(TO_TIMESTAMP(x) AS TEXT)
29655 let to_ts = Expression::Function(Box::new(Function::new(
29656 "TO_TIMESTAMP".to_string(),
29657 vec![arg],
29658 )));
29659 Ok(Expression::Cast(Box::new(Cast {
29660 this: to_ts,
29661 to: DataType::Text,
29662 double_colon_syntax: false,
29663 trailing_comments: Vec::new(),
29664 format: None,
29665 default: None,
29666 inferred_type: None,
29667 })))
29668 }
29669 _ => Ok(Expression::Function(Box::new(Function::new(
29670 "UNIX_TO_TIME_STR".to_string(),
29671 vec![arg],
29672 )))),
29673 }
29674 } else {
29675 Ok(e)
29676 }
29677 }
29678
29679 Action::TimeToUnixConvert => {
29680 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
29681 if let Expression::Function(f) = e {
29682 let arg = f.args.into_iter().next().unwrap();
29683 Ok(Expression::TimeToUnix(Box::new(
29684 crate::expressions::UnaryFunc {
29685 this: arg,
29686 original_name: None,
29687 inferred_type: None,
29688 },
29689 )))
29690 } else {
29691 Ok(e)
29692 }
29693 }
29694
29695 Action::TimeToStrConvert => {
29696 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
29697 if let Expression::Function(f) = e {
29698 let mut args = f.args;
29699 let this = args.remove(0);
29700 let fmt = match args.remove(0) {
29701 Expression::Literal(lit)
29702 if matches!(lit.as_ref(), Literal::String(_)) =>
29703 {
29704 let Literal::String(s) = lit.as_ref() else {
29705 unreachable!()
29706 };
29707 s.clone()
29708 }
29709 other => {
29710 return Ok(Expression::Function(Box::new(Function::new(
29711 "TIME_TO_STR".to_string(),
29712 vec![this, other],
29713 ))));
29714 }
29715 };
29716 Ok(Expression::TimeToStr(Box::new(
29717 crate::expressions::TimeToStr {
29718 this: Box::new(this),
29719 format: fmt,
29720 culture: None,
29721 zone: None,
29722 },
29723 )))
29724 } else {
29725 Ok(e)
29726 }
29727 }
29728
29729 Action::StrToUnixConvert => {
29730 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
29731 if let Expression::Function(f) = e {
29732 let mut args = f.args;
29733 let this = args.remove(0);
29734 let fmt = match args.remove(0) {
29735 Expression::Literal(lit)
29736 if matches!(lit.as_ref(), Literal::String(_)) =>
29737 {
29738 let Literal::String(s) = lit.as_ref() else {
29739 unreachable!()
29740 };
29741 s.clone()
29742 }
29743 other => {
29744 return Ok(Expression::Function(Box::new(Function::new(
29745 "STR_TO_UNIX".to_string(),
29746 vec![this, other],
29747 ))));
29748 }
29749 };
29750 Ok(Expression::StrToUnix(Box::new(
29751 crate::expressions::StrToUnix {
29752 this: Some(Box::new(this)),
29753 format: Some(fmt),
29754 },
29755 )))
29756 } else {
29757 Ok(e)
29758 }
29759 }
29760
29761 Action::TimeStrToUnixConvert => {
29762 // TIME_STR_TO_UNIX(x) -> dialect-specific
29763 if let Expression::Function(f) = e {
29764 let arg = f.args.into_iter().next().unwrap();
29765 match target {
29766 DialectType::DuckDB => {
29767 // EPOCH(CAST(x AS TIMESTAMP))
29768 let cast_ts = Expression::Cast(Box::new(Cast {
29769 this: arg,
29770 to: DataType::Timestamp {
29771 timezone: false,
29772 precision: None,
29773 },
29774 double_colon_syntax: false,
29775 trailing_comments: Vec::new(),
29776 format: None,
29777 default: None,
29778 inferred_type: None,
29779 }));
29780 Ok(Expression::Function(Box::new(Function::new(
29781 "EPOCH".to_string(),
29782 vec![cast_ts],
29783 ))))
29784 }
29785 DialectType::Hive
29786 | DialectType::Doris
29787 | DialectType::StarRocks
29788 | DialectType::MySQL => {
29789 // UNIX_TIMESTAMP(x)
29790 Ok(Expression::Function(Box::new(Function::new(
29791 "UNIX_TIMESTAMP".to_string(),
29792 vec![arg],
29793 ))))
29794 }
29795 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29796 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
29797 let date_parse = Expression::Function(Box::new(Function::new(
29798 "DATE_PARSE".to_string(),
29799 vec![arg, Expression::string("%Y-%m-%d %T")],
29800 )));
29801 Ok(Expression::Function(Box::new(Function::new(
29802 "TO_UNIXTIME".to_string(),
29803 vec![date_parse],
29804 ))))
29805 }
29806 _ => Ok(Expression::Function(Box::new(Function::new(
29807 "TIME_STR_TO_UNIX".to_string(),
29808 vec![arg],
29809 )))),
29810 }
29811 } else {
29812 Ok(e)
29813 }
29814 }
29815
29816 Action::TimeToTimeStrConvert => {
29817 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
29818 if let Expression::Function(f) = e {
29819 let arg = f.args.into_iter().next().unwrap();
29820 let str_type = match target {
29821 DialectType::DuckDB => DataType::Text,
29822 DialectType::Hive
29823 | DialectType::Spark
29824 | DialectType::Databricks
29825 | DialectType::Doris
29826 | DialectType::StarRocks => DataType::Custom {
29827 name: "STRING".to_string(),
29828 },
29829 DialectType::Redshift => DataType::Custom {
29830 name: "VARCHAR(MAX)".to_string(),
29831 },
29832 _ => DataType::VarChar {
29833 length: None,
29834 parenthesized_length: false,
29835 },
29836 };
29837 Ok(Expression::Cast(Box::new(Cast {
29838 this: arg,
29839 to: str_type,
29840 double_colon_syntax: false,
29841 trailing_comments: Vec::new(),
29842 format: None,
29843 default: None,
29844 inferred_type: None,
29845 })))
29846 } else {
29847 Ok(e)
29848 }
29849 }
29850
29851 Action::DateTruncSwapArgs => {
29852 // DATE_TRUNC('unit', x) from Generic -> target-specific
29853 if let Expression::Function(f) = e {
29854 if f.args.len() == 2 {
29855 let unit_arg = f.args[0].clone();
29856 let expr_arg = f.args[1].clone();
29857 // Extract unit string from the first arg
29858 let unit_str = match &unit_arg {
29859 Expression::Literal(lit)
29860 if matches!(lit.as_ref(), Literal::String(_)) =>
29861 {
29862 let Literal::String(s) = lit.as_ref() else {
29863 unreachable!()
29864 };
29865 s.to_ascii_uppercase()
29866 }
29867 _ => return Ok(Expression::Function(f)),
29868 };
29869 match target {
29870 DialectType::BigQuery => {
29871 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
29872 let unit_ident =
29873 Expression::Column(Box::new(crate::expressions::Column {
29874 name: crate::expressions::Identifier::new(unit_str),
29875 table: None,
29876 join_mark: false,
29877 trailing_comments: Vec::new(),
29878 span: None,
29879 inferred_type: None,
29880 }));
29881 Ok(Expression::Function(Box::new(Function::new(
29882 "DATE_TRUNC".to_string(),
29883 vec![expr_arg, unit_ident],
29884 ))))
29885 }
29886 DialectType::Doris => {
29887 // Doris: DATE_TRUNC(x, 'UNIT')
29888 Ok(Expression::Function(Box::new(Function::new(
29889 "DATE_TRUNC".to_string(),
29890 vec![expr_arg, Expression::string(&unit_str)],
29891 ))))
29892 }
29893 DialectType::StarRocks => {
29894 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
29895 Ok(Expression::Function(Box::new(Function::new(
29896 "DATE_TRUNC".to_string(),
29897 vec![Expression::string(&unit_str), expr_arg],
29898 ))))
29899 }
29900 DialectType::Spark | DialectType::Databricks => {
29901 // Spark: TRUNC(x, 'UNIT')
29902 Ok(Expression::Function(Box::new(Function::new(
29903 "TRUNC".to_string(),
29904 vec![expr_arg, Expression::string(&unit_str)],
29905 ))))
29906 }
29907 DialectType::MySQL => {
29908 // MySQL: complex expansion based on unit
29909 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
29910 }
29911 _ => Ok(Expression::Function(f)),
29912 }
29913 } else {
29914 Ok(Expression::Function(f))
29915 }
29916 } else {
29917 Ok(e)
29918 }
29919 }
29920
29921 Action::TimestampTruncConvert => {
29922 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
29923 if let Expression::Function(f) = e {
29924 if f.args.len() >= 2 {
29925 let expr_arg = f.args[0].clone();
29926 let unit_arg = f.args[1].clone();
29927 let tz_arg = if f.args.len() >= 3 {
29928 Some(f.args[2].clone())
29929 } else {
29930 None
29931 };
29932 // Extract unit string
29933 let unit_str = match &unit_arg {
29934 Expression::Literal(lit)
29935 if matches!(lit.as_ref(), Literal::String(_)) =>
29936 {
29937 let Literal::String(s) = lit.as_ref() else {
29938 unreachable!()
29939 };
29940 s.to_ascii_uppercase()
29941 }
29942 Expression::Column(c) => c.name.name.to_ascii_uppercase(),
29943 _ => {
29944 return Ok(Expression::Function(f));
29945 }
29946 };
29947 match target {
29948 DialectType::Spark | DialectType::Databricks => {
29949 // Spark: DATE_TRUNC('UNIT', x)
29950 Ok(Expression::Function(Box::new(Function::new(
29951 "DATE_TRUNC".to_string(),
29952 vec![Expression::string(&unit_str), expr_arg],
29953 ))))
29954 }
29955 DialectType::Doris | DialectType::StarRocks => {
29956 // Doris: DATE_TRUNC(x, 'UNIT')
29957 Ok(Expression::Function(Box::new(Function::new(
29958 "DATE_TRUNC".to_string(),
29959 vec![expr_arg, Expression::string(&unit_str)],
29960 ))))
29961 }
29962 DialectType::BigQuery => {
29963 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
29964 let unit_ident =
29965 Expression::Column(Box::new(crate::expressions::Column {
29966 name: crate::expressions::Identifier::new(unit_str),
29967 table: None,
29968 join_mark: false,
29969 trailing_comments: Vec::new(),
29970 span: None,
29971 inferred_type: None,
29972 }));
29973 let mut args = vec![expr_arg, unit_ident];
29974 if let Some(tz) = tz_arg {
29975 args.push(tz);
29976 }
29977 Ok(Expression::Function(Box::new(Function::new(
29978 "TIMESTAMP_TRUNC".to_string(),
29979 args,
29980 ))))
29981 }
29982 DialectType::DuckDB => {
29983 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
29984 if let Some(tz) = tz_arg {
29985 let tz_str = match &tz {
29986 Expression::Literal(lit)
29987 if matches!(lit.as_ref(), Literal::String(_)) =>
29988 {
29989 let Literal::String(s) = lit.as_ref() else {
29990 unreachable!()
29991 };
29992 s.clone()
29993 }
29994 _ => "UTC".to_string(),
29995 };
29996 // x AT TIME ZONE 'tz'
29997 let at_tz = Expression::AtTimeZone(Box::new(
29998 crate::expressions::AtTimeZone {
29999 this: expr_arg,
30000 zone: Expression::string(&tz_str),
30001 },
30002 ));
30003 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
30004 let trunc = Expression::Function(Box::new(Function::new(
30005 "DATE_TRUNC".to_string(),
30006 vec![Expression::string(&unit_str), at_tz],
30007 )));
30008 // DATE_TRUNC(...) AT TIME ZONE 'tz'
30009 Ok(Expression::AtTimeZone(Box::new(
30010 crate::expressions::AtTimeZone {
30011 this: trunc,
30012 zone: Expression::string(&tz_str),
30013 },
30014 )))
30015 } else {
30016 Ok(Expression::Function(Box::new(Function::new(
30017 "DATE_TRUNC".to_string(),
30018 vec![Expression::string(&unit_str), expr_arg],
30019 ))))
30020 }
30021 }
30022 DialectType::Presto
30023 | DialectType::Trino
30024 | DialectType::Athena
30025 | DialectType::Snowflake => {
30026 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
30027 Ok(Expression::Function(Box::new(Function::new(
30028 "DATE_TRUNC".to_string(),
30029 vec![Expression::string(&unit_str), expr_arg],
30030 ))))
30031 }
30032 _ => {
30033 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
30034 let mut args = vec![Expression::string(&unit_str), expr_arg];
30035 if let Some(tz) = tz_arg {
30036 args.push(tz);
30037 }
30038 Ok(Expression::Function(Box::new(Function::new(
30039 "DATE_TRUNC".to_string(),
30040 args,
30041 ))))
30042 }
30043 }
30044 } else {
30045 Ok(Expression::Function(f))
30046 }
30047 } else {
30048 Ok(e)
30049 }
30050 }
30051
30052 Action::StrToDateConvert => {
30053 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
30054 if let Expression::Function(f) = e {
30055 if f.args.len() == 2 {
30056 let mut args = f.args;
30057 let this = args.remove(0);
30058 let fmt_expr = args.remove(0);
30059 let fmt_str = match &fmt_expr {
30060 Expression::Literal(lit)
30061 if matches!(lit.as_ref(), Literal::String(_)) =>
30062 {
30063 let Literal::String(s) = lit.as_ref() else {
30064 unreachable!()
30065 };
30066 Some(s.clone())
30067 }
30068 _ => None,
30069 };
30070 let default_date = "%Y-%m-%d";
30071 let default_time = "%Y-%m-%d %H:%M:%S";
30072 let is_default = fmt_str
30073 .as_ref()
30074 .map_or(false, |f| f == default_date || f == default_time);
30075
30076 if is_default {
30077 // Default format: handle per-dialect
30078 match target {
30079 DialectType::MySQL
30080 | DialectType::Doris
30081 | DialectType::StarRocks => {
30082 // Keep STR_TO_DATE(x, fmt) as-is
30083 Ok(Expression::Function(Box::new(Function::new(
30084 "STR_TO_DATE".to_string(),
30085 vec![this, fmt_expr],
30086 ))))
30087 }
30088 DialectType::Hive => {
30089 // Hive: CAST(x AS DATE)
30090 Ok(Expression::Cast(Box::new(Cast {
30091 this,
30092 to: DataType::Date,
30093 double_colon_syntax: false,
30094 trailing_comments: Vec::new(),
30095 format: None,
30096 default: None,
30097 inferred_type: None,
30098 })))
30099 }
30100 DialectType::Presto
30101 | DialectType::Trino
30102 | DialectType::Athena => {
30103 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
30104 let date_parse =
30105 Expression::Function(Box::new(Function::new(
30106 "DATE_PARSE".to_string(),
30107 vec![this, fmt_expr],
30108 )));
30109 Ok(Expression::Cast(Box::new(Cast {
30110 this: date_parse,
30111 to: DataType::Date,
30112 double_colon_syntax: false,
30113 trailing_comments: Vec::new(),
30114 format: None,
30115 default: None,
30116 inferred_type: None,
30117 })))
30118 }
30119 _ => {
30120 // Others: TsOrDsToDate (delegates to generator)
30121 Ok(Expression::TsOrDsToDate(Box::new(
30122 crate::expressions::TsOrDsToDate {
30123 this: Box::new(this),
30124 format: None,
30125 safe: None,
30126 },
30127 )))
30128 }
30129 }
30130 } else if let Some(fmt) = fmt_str {
30131 match target {
30132 DialectType::Doris
30133 | DialectType::StarRocks
30134 | DialectType::MySQL => {
30135 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
30136 let mut normalized = fmt.clone();
30137 normalized = normalized.replace("%-d", "%e");
30138 normalized = normalized.replace("%-m", "%c");
30139 normalized = normalized.replace("%H:%M:%S", "%T");
30140 Ok(Expression::Function(Box::new(Function::new(
30141 "STR_TO_DATE".to_string(),
30142 vec![this, Expression::string(&normalized)],
30143 ))))
30144 }
30145 DialectType::Hive => {
30146 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
30147 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
30148 let unix_ts =
30149 Expression::Function(Box::new(Function::new(
30150 "UNIX_TIMESTAMP".to_string(),
30151 vec![this, Expression::string(&java_fmt)],
30152 )));
30153 let from_unix =
30154 Expression::Function(Box::new(Function::new(
30155 "FROM_UNIXTIME".to_string(),
30156 vec![unix_ts],
30157 )));
30158 Ok(Expression::Cast(Box::new(Cast {
30159 this: from_unix,
30160 to: DataType::Date,
30161 double_colon_syntax: false,
30162 trailing_comments: Vec::new(),
30163 format: None,
30164 default: None,
30165 inferred_type: None,
30166 })))
30167 }
30168 DialectType::Spark | DialectType::Databricks => {
30169 // Spark: TO_DATE(x, java_fmt)
30170 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
30171 Ok(Expression::Function(Box::new(Function::new(
30172 "TO_DATE".to_string(),
30173 vec![this, Expression::string(&java_fmt)],
30174 ))))
30175 }
30176 DialectType::Drill => {
30177 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
30178 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
30179 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
30180 let java_fmt = java_fmt.replace('T', "'T'");
30181 Ok(Expression::Function(Box::new(Function::new(
30182 "TO_DATE".to_string(),
30183 vec![this, Expression::string(&java_fmt)],
30184 ))))
30185 }
30186 _ => {
30187 // For other dialects: use TsOrDsToDate which delegates to generator
30188 Ok(Expression::TsOrDsToDate(Box::new(
30189 crate::expressions::TsOrDsToDate {
30190 this: Box::new(this),
30191 format: Some(fmt),
30192 safe: None,
30193 },
30194 )))
30195 }
30196 }
30197 } else {
30198 // Non-string format - keep as-is
30199 let mut new_args = Vec::new();
30200 new_args.push(this);
30201 new_args.push(fmt_expr);
30202 Ok(Expression::Function(Box::new(Function::new(
30203 "STR_TO_DATE".to_string(),
30204 new_args,
30205 ))))
30206 }
30207 } else {
30208 Ok(Expression::Function(f))
30209 }
30210 } else {
30211 Ok(e)
30212 }
30213 }
30214
30215 Action::TsOrDsAddConvert => {
30216 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
30217 if let Expression::Function(f) = e {
30218 if f.args.len() == 3 {
30219 let mut args = f.args;
30220 let x = args.remove(0);
30221 let n = args.remove(0);
30222 let unit_expr = args.remove(0);
30223 let unit_str = match &unit_expr {
30224 Expression::Literal(lit)
30225 if matches!(lit.as_ref(), Literal::String(_)) =>
30226 {
30227 let Literal::String(s) = lit.as_ref() else {
30228 unreachable!()
30229 };
30230 s.to_ascii_uppercase()
30231 }
30232 _ => "DAY".to_string(),
30233 };
30234
30235 match target {
30236 DialectType::Hive
30237 | DialectType::Spark
30238 | DialectType::Databricks => {
30239 // DATE_ADD(x, n) - only supports DAY unit
30240 Ok(Expression::Function(Box::new(Function::new(
30241 "DATE_ADD".to_string(),
30242 vec![x, n],
30243 ))))
30244 }
30245 DialectType::MySQL => {
30246 // DATE_ADD(x, INTERVAL n UNIT)
30247 let iu = match unit_str.as_str() {
30248 "YEAR" => crate::expressions::IntervalUnit::Year,
30249 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
30250 "MONTH" => crate::expressions::IntervalUnit::Month,
30251 "WEEK" => crate::expressions::IntervalUnit::Week,
30252 "HOUR" => crate::expressions::IntervalUnit::Hour,
30253 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30254 "SECOND" => crate::expressions::IntervalUnit::Second,
30255 _ => crate::expressions::IntervalUnit::Day,
30256 };
30257 let interval = Expression::Interval(Box::new(
30258 crate::expressions::Interval {
30259 this: Some(n),
30260 unit: Some(
30261 crate::expressions::IntervalUnitSpec::Simple {
30262 unit: iu,
30263 use_plural: false,
30264 },
30265 ),
30266 },
30267 ));
30268 Ok(Expression::Function(Box::new(Function::new(
30269 "DATE_ADD".to_string(),
30270 vec![x, interval],
30271 ))))
30272 }
30273 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30274 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
30275 let cast_ts = Expression::Cast(Box::new(Cast {
30276 this: x,
30277 to: DataType::Timestamp {
30278 precision: None,
30279 timezone: false,
30280 },
30281 double_colon_syntax: false,
30282 trailing_comments: Vec::new(),
30283 format: None,
30284 default: None,
30285 inferred_type: None,
30286 }));
30287 let cast_date = Expression::Cast(Box::new(Cast {
30288 this: cast_ts,
30289 to: DataType::Date,
30290 double_colon_syntax: false,
30291 trailing_comments: Vec::new(),
30292 format: None,
30293 default: None,
30294 inferred_type: None,
30295 }));
30296 Ok(Expression::Function(Box::new(Function::new(
30297 "DATE_ADD".to_string(),
30298 vec![Expression::string(&unit_str), n, cast_date],
30299 ))))
30300 }
30301 DialectType::DuckDB => {
30302 // CAST(x AS DATE) + INTERVAL n UNIT
30303 let cast_date = Expression::Cast(Box::new(Cast {
30304 this: x,
30305 to: DataType::Date,
30306 double_colon_syntax: false,
30307 trailing_comments: Vec::new(),
30308 format: None,
30309 default: None,
30310 inferred_type: None,
30311 }));
30312 let iu = match unit_str.as_str() {
30313 "YEAR" => crate::expressions::IntervalUnit::Year,
30314 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
30315 "MONTH" => crate::expressions::IntervalUnit::Month,
30316 "WEEK" => crate::expressions::IntervalUnit::Week,
30317 "HOUR" => crate::expressions::IntervalUnit::Hour,
30318 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30319 "SECOND" => crate::expressions::IntervalUnit::Second,
30320 _ => crate::expressions::IntervalUnit::Day,
30321 };
30322 let interval = Expression::Interval(Box::new(
30323 crate::expressions::Interval {
30324 this: Some(n),
30325 unit: Some(
30326 crate::expressions::IntervalUnitSpec::Simple {
30327 unit: iu,
30328 use_plural: false,
30329 },
30330 ),
30331 },
30332 ));
30333 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
30334 left: cast_date,
30335 right: interval,
30336 left_comments: Vec::new(),
30337 operator_comments: Vec::new(),
30338 trailing_comments: Vec::new(),
30339 inferred_type: None,
30340 })))
30341 }
30342 DialectType::Drill => {
30343 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
30344 let cast_date = Expression::Cast(Box::new(Cast {
30345 this: x,
30346 to: DataType::Date,
30347 double_colon_syntax: false,
30348 trailing_comments: Vec::new(),
30349 format: None,
30350 default: None,
30351 inferred_type: None,
30352 }));
30353 let iu = match unit_str.as_str() {
30354 "YEAR" => crate::expressions::IntervalUnit::Year,
30355 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
30356 "MONTH" => crate::expressions::IntervalUnit::Month,
30357 "WEEK" => crate::expressions::IntervalUnit::Week,
30358 "HOUR" => crate::expressions::IntervalUnit::Hour,
30359 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30360 "SECOND" => crate::expressions::IntervalUnit::Second,
30361 _ => crate::expressions::IntervalUnit::Day,
30362 };
30363 let interval = Expression::Interval(Box::new(
30364 crate::expressions::Interval {
30365 this: Some(n),
30366 unit: Some(
30367 crate::expressions::IntervalUnitSpec::Simple {
30368 unit: iu,
30369 use_plural: false,
30370 },
30371 ),
30372 },
30373 ));
30374 Ok(Expression::Function(Box::new(Function::new(
30375 "DATE_ADD".to_string(),
30376 vec![cast_date, interval],
30377 ))))
30378 }
30379 _ => {
30380 // Default: keep as TS_OR_DS_ADD
30381 Ok(Expression::Function(Box::new(Function::new(
30382 "TS_OR_DS_ADD".to_string(),
30383 vec![x, n, unit_expr],
30384 ))))
30385 }
30386 }
30387 } else {
30388 Ok(Expression::Function(f))
30389 }
30390 } else {
30391 Ok(e)
30392 }
30393 }
30394
30395 Action::DateFromUnixDateConvert => {
30396 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
30397 if let Expression::Function(f) = e {
30398 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
30399 if matches!(
30400 target,
30401 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
30402 ) {
30403 return Ok(Expression::Function(Box::new(Function::new(
30404 "DATE_FROM_UNIX_DATE".to_string(),
30405 f.args,
30406 ))));
30407 }
30408 let n = f.args.into_iter().next().unwrap();
30409 let epoch_date = Expression::Cast(Box::new(Cast {
30410 this: Expression::string("1970-01-01"),
30411 to: DataType::Date,
30412 double_colon_syntax: false,
30413 trailing_comments: Vec::new(),
30414 format: None,
30415 default: None,
30416 inferred_type: None,
30417 }));
30418 match target {
30419 DialectType::DuckDB => {
30420 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
30421 let interval =
30422 Expression::Interval(Box::new(crate::expressions::Interval {
30423 this: Some(n),
30424 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30425 unit: crate::expressions::IntervalUnit::Day,
30426 use_plural: false,
30427 }),
30428 }));
30429 Ok(Expression::Add(Box::new(
30430 crate::expressions::BinaryOp::new(epoch_date, interval),
30431 )))
30432 }
30433 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30434 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
30435 Ok(Expression::Function(Box::new(Function::new(
30436 "DATE_ADD".to_string(),
30437 vec![Expression::string("DAY"), n, epoch_date],
30438 ))))
30439 }
30440 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
30441 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
30442 Ok(Expression::Function(Box::new(Function::new(
30443 "DATEADD".to_string(),
30444 vec![
30445 Expression::Identifier(Identifier::new("DAY")),
30446 n,
30447 epoch_date,
30448 ],
30449 ))))
30450 }
30451 DialectType::BigQuery => {
30452 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
30453 let interval =
30454 Expression::Interval(Box::new(crate::expressions::Interval {
30455 this: Some(n),
30456 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30457 unit: crate::expressions::IntervalUnit::Day,
30458 use_plural: false,
30459 }),
30460 }));
30461 Ok(Expression::Function(Box::new(Function::new(
30462 "DATE_ADD".to_string(),
30463 vec![epoch_date, interval],
30464 ))))
30465 }
30466 DialectType::MySQL
30467 | DialectType::Doris
30468 | DialectType::StarRocks
30469 | DialectType::Drill => {
30470 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
30471 let interval =
30472 Expression::Interval(Box::new(crate::expressions::Interval {
30473 this: Some(n),
30474 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30475 unit: crate::expressions::IntervalUnit::Day,
30476 use_plural: false,
30477 }),
30478 }));
30479 Ok(Expression::Function(Box::new(Function::new(
30480 "DATE_ADD".to_string(),
30481 vec![epoch_date, interval],
30482 ))))
30483 }
30484 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30485 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
30486 Ok(Expression::Function(Box::new(Function::new(
30487 "DATE_ADD".to_string(),
30488 vec![epoch_date, n],
30489 ))))
30490 }
30491 DialectType::PostgreSQL
30492 | DialectType::Materialize
30493 | DialectType::RisingWave => {
30494 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
30495 let n_str = match &n {
30496 Expression::Literal(lit)
30497 if matches!(lit.as_ref(), Literal::Number(_)) =>
30498 {
30499 let Literal::Number(s) = lit.as_ref() else {
30500 unreachable!()
30501 };
30502 s.clone()
30503 }
30504 _ => Self::expr_to_string_static(&n),
30505 };
30506 let interval =
30507 Expression::Interval(Box::new(crate::expressions::Interval {
30508 this: Some(Expression::string(&format!("{} DAY", n_str))),
30509 unit: None,
30510 }));
30511 Ok(Expression::Add(Box::new(
30512 crate::expressions::BinaryOp::new(epoch_date, interval),
30513 )))
30514 }
30515 _ => {
30516 // Default: keep as-is
30517 Ok(Expression::Function(Box::new(Function::new(
30518 "DATE_FROM_UNIX_DATE".to_string(),
30519 vec![n],
30520 ))))
30521 }
30522 }
30523 } else {
30524 Ok(e)
30525 }
30526 }
30527
30528 Action::ArrayRemoveConvert => {
30529 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
30530 if let Expression::ArrayRemove(bf) = e {
30531 let arr = bf.this;
30532 let target_val = bf.expression;
30533 match target {
30534 DialectType::DuckDB => {
30535 let u_id = crate::expressions::Identifier::new("_u");
30536 let lambda =
30537 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30538 parameters: vec![u_id.clone()],
30539 body: Expression::Neq(Box::new(BinaryOp {
30540 left: Expression::Identifier(u_id),
30541 right: target_val,
30542 left_comments: Vec::new(),
30543 operator_comments: Vec::new(),
30544 trailing_comments: Vec::new(),
30545 inferred_type: None,
30546 })),
30547 colon: false,
30548 parameter_types: Vec::new(),
30549 }));
30550 Ok(Expression::Function(Box::new(Function::new(
30551 "LIST_FILTER".to_string(),
30552 vec![arr, lambda],
30553 ))))
30554 }
30555 DialectType::ClickHouse => {
30556 let u_id = crate::expressions::Identifier::new("_u");
30557 let lambda =
30558 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30559 parameters: vec![u_id.clone()],
30560 body: Expression::Neq(Box::new(BinaryOp {
30561 left: Expression::Identifier(u_id),
30562 right: target_val,
30563 left_comments: Vec::new(),
30564 operator_comments: Vec::new(),
30565 trailing_comments: Vec::new(),
30566 inferred_type: None,
30567 })),
30568 colon: false,
30569 parameter_types: Vec::new(),
30570 }));
30571 Ok(Expression::Function(Box::new(Function::new(
30572 "arrayFilter".to_string(),
30573 vec![lambda, arr],
30574 ))))
30575 }
30576 DialectType::BigQuery => {
30577 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
30578 let u_id = crate::expressions::Identifier::new("_u");
30579 let u_col =
30580 Expression::Column(Box::new(crate::expressions::Column {
30581 name: u_id.clone(),
30582 table: None,
30583 join_mark: false,
30584 trailing_comments: Vec::new(),
30585 span: None,
30586 inferred_type: None,
30587 }));
30588 let unnest_expr =
30589 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
30590 this: arr,
30591 expressions: Vec::new(),
30592 with_ordinality: false,
30593 alias: None,
30594 offset_alias: None,
30595 }));
30596 let aliased_unnest =
30597 Expression::Alias(Box::new(crate::expressions::Alias {
30598 this: unnest_expr,
30599 alias: u_id.clone(),
30600 column_aliases: Vec::new(),
30601 alias_explicit_as: false,
30602 alias_keyword: None,
30603 pre_alias_comments: Vec::new(),
30604 trailing_comments: Vec::new(),
30605 inferred_type: None,
30606 }));
30607 let where_cond = Expression::Neq(Box::new(BinaryOp {
30608 left: u_col.clone(),
30609 right: target_val,
30610 left_comments: Vec::new(),
30611 operator_comments: Vec::new(),
30612 trailing_comments: Vec::new(),
30613 inferred_type: None,
30614 }));
30615 let subquery = Expression::Select(Box::new(
30616 crate::expressions::Select::new()
30617 .column(u_col)
30618 .from(aliased_unnest)
30619 .where_(where_cond),
30620 ));
30621 Ok(Expression::ArrayFunc(Box::new(
30622 crate::expressions::ArrayConstructor {
30623 expressions: vec![subquery],
30624 bracket_notation: false,
30625 use_list_keyword: false,
30626 },
30627 )))
30628 }
30629 _ => Ok(Expression::ArrayRemove(Box::new(
30630 crate::expressions::BinaryFunc {
30631 original_name: None,
30632 this: arr,
30633 expression: target_val,
30634 inferred_type: None,
30635 },
30636 ))),
30637 }
30638 } else {
30639 Ok(e)
30640 }
30641 }
30642
30643 Action::ArrayReverseConvert => {
30644 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
30645 if let Expression::ArrayReverse(af) = e {
30646 Ok(Expression::Function(Box::new(Function::new(
30647 "arrayReverse".to_string(),
30648 vec![af.this],
30649 ))))
30650 } else {
30651 Ok(e)
30652 }
30653 }
30654
30655 Action::JsonKeysConvert => {
30656 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
30657 if let Expression::JsonKeys(uf) = e {
30658 match target {
30659 DialectType::Spark | DialectType::Databricks => {
30660 Ok(Expression::Function(Box::new(Function::new(
30661 "JSON_OBJECT_KEYS".to_string(),
30662 vec![uf.this],
30663 ))))
30664 }
30665 DialectType::Snowflake => Ok(Expression::Function(Box::new(
30666 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
30667 ))),
30668 _ => Ok(Expression::JsonKeys(uf)),
30669 }
30670 } else {
30671 Ok(e)
30672 }
30673 }
30674
30675 Action::ParseJsonStrip => {
30676 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
30677 if let Expression::ParseJson(uf) = e {
30678 Ok(uf.this)
30679 } else {
30680 Ok(e)
30681 }
30682 }
30683
30684 Action::ArraySizeDrill => {
30685 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
30686 if let Expression::ArraySize(uf) = e {
30687 Ok(Expression::Function(Box::new(Function::new(
30688 "REPEATED_COUNT".to_string(),
30689 vec![uf.this],
30690 ))))
30691 } else {
30692 Ok(e)
30693 }
30694 }
30695
30696 Action::WeekOfYearToWeekIso => {
30697 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
30698 if let Expression::WeekOfYear(uf) = e {
30699 Ok(Expression::Function(Box::new(Function::new(
30700 "WEEKISO".to_string(),
30701 vec![uf.this],
30702 ))))
30703 } else {
30704 Ok(e)
30705 }
30706 }
30707 }
30708 })
30709 }
30710
30711 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
30712 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
30713 use crate::expressions::Function;
30714 match unit {
30715 "DAY" => {
30716 // DATE(x)
30717 Ok(Expression::Function(Box::new(Function::new(
30718 "DATE".to_string(),
30719 vec![expr.clone()],
30720 ))))
30721 }
30722 "WEEK" => {
30723 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
30724 let year_x = Expression::Function(Box::new(Function::new(
30725 "YEAR".to_string(),
30726 vec![expr.clone()],
30727 )));
30728 let week_x = Expression::Function(Box::new(Function::new(
30729 "WEEK".to_string(),
30730 vec![expr.clone(), Expression::number(1)],
30731 )));
30732 let concat_args = vec![
30733 year_x,
30734 Expression::string(" "),
30735 week_x,
30736 Expression::string(" 1"),
30737 ];
30738 let concat = Expression::Function(Box::new(Function::new(
30739 "CONCAT".to_string(),
30740 concat_args,
30741 )));
30742 Ok(Expression::Function(Box::new(Function::new(
30743 "STR_TO_DATE".to_string(),
30744 vec![concat, Expression::string("%Y %u %w")],
30745 ))))
30746 }
30747 "MONTH" => {
30748 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
30749 let year_x = Expression::Function(Box::new(Function::new(
30750 "YEAR".to_string(),
30751 vec![expr.clone()],
30752 )));
30753 let month_x = Expression::Function(Box::new(Function::new(
30754 "MONTH".to_string(),
30755 vec![expr.clone()],
30756 )));
30757 let concat_args = vec![
30758 year_x,
30759 Expression::string(" "),
30760 month_x,
30761 Expression::string(" 1"),
30762 ];
30763 let concat = Expression::Function(Box::new(Function::new(
30764 "CONCAT".to_string(),
30765 concat_args,
30766 )));
30767 Ok(Expression::Function(Box::new(Function::new(
30768 "STR_TO_DATE".to_string(),
30769 vec![concat, Expression::string("%Y %c %e")],
30770 ))))
30771 }
30772 "QUARTER" => {
30773 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
30774 let year_x = Expression::Function(Box::new(Function::new(
30775 "YEAR".to_string(),
30776 vec![expr.clone()],
30777 )));
30778 let quarter_x = Expression::Function(Box::new(Function::new(
30779 "QUARTER".to_string(),
30780 vec![expr.clone()],
30781 )));
30782 // QUARTER(x) * 3 - 2
30783 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
30784 left: quarter_x,
30785 right: Expression::number(3),
30786 left_comments: Vec::new(),
30787 operator_comments: Vec::new(),
30788 trailing_comments: Vec::new(),
30789 inferred_type: None,
30790 }));
30791 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
30792 left: mul,
30793 right: Expression::number(2),
30794 left_comments: Vec::new(),
30795 operator_comments: Vec::new(),
30796 trailing_comments: Vec::new(),
30797 inferred_type: None,
30798 }));
30799 let concat_args = vec![
30800 year_x,
30801 Expression::string(" "),
30802 sub,
30803 Expression::string(" 1"),
30804 ];
30805 let concat = Expression::Function(Box::new(Function::new(
30806 "CONCAT".to_string(),
30807 concat_args,
30808 )));
30809 Ok(Expression::Function(Box::new(Function::new(
30810 "STR_TO_DATE".to_string(),
30811 vec![concat, Expression::string("%Y %c %e")],
30812 ))))
30813 }
30814 "YEAR" => {
30815 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
30816 let year_x = Expression::Function(Box::new(Function::new(
30817 "YEAR".to_string(),
30818 vec![expr.clone()],
30819 )));
30820 let concat_args = vec![year_x, Expression::string(" 1 1")];
30821 let concat = Expression::Function(Box::new(Function::new(
30822 "CONCAT".to_string(),
30823 concat_args,
30824 )));
30825 Ok(Expression::Function(Box::new(Function::new(
30826 "STR_TO_DATE".to_string(),
30827 vec![concat, Expression::string("%Y %c %e")],
30828 ))))
30829 }
30830 _ => {
30831 // Unsupported unit -> keep as DATE_TRUNC
30832 Ok(Expression::Function(Box::new(Function::new(
30833 "DATE_TRUNC".to_string(),
30834 vec![Expression::string(unit), expr.clone()],
30835 ))))
30836 }
30837 }
30838 }
30839
30840 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
30841 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
30842 use crate::expressions::DataType;
30843 match dt {
30844 DataType::VarChar { .. } | DataType::Char { .. } => true,
30845 DataType::Struct { fields, .. } => fields
30846 .iter()
30847 .any(|f| Self::has_varchar_char_type(&f.data_type)),
30848 _ => false,
30849 }
30850 }
30851
30852 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
30853 fn normalize_varchar_to_string(
30854 dt: crate::expressions::DataType,
30855 ) -> crate::expressions::DataType {
30856 use crate::expressions::DataType;
30857 match dt {
30858 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
30859 name: "STRING".to_string(),
30860 },
30861 DataType::Struct { fields, nested } => {
30862 let fields = fields
30863 .into_iter()
30864 .map(|mut f| {
30865 f.data_type = Self::normalize_varchar_to_string(f.data_type);
30866 f
30867 })
30868 .collect();
30869 DataType::Struct { fields, nested }
30870 }
30871 other => other,
30872 }
30873 }
30874
30875 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
30876 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
30877 if let Expression::Literal(ref lit) = expr {
30878 if let crate::expressions::Literal::String(ref s) = lit.as_ref() {
30879 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
30880 let trimmed = s.trim();
30881
30882 // Find where digits end and unit text begins
30883 let digit_end = trimmed
30884 .find(|c: char| !c.is_ascii_digit())
30885 .unwrap_or(trimmed.len());
30886 if digit_end == 0 || digit_end == trimmed.len() {
30887 return expr;
30888 }
30889 let num = &trimmed[..digit_end];
30890 let unit_text = trimmed[digit_end..].trim().to_ascii_uppercase();
30891 if unit_text.is_empty() {
30892 return expr;
30893 }
30894
30895 let known_units = [
30896 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS",
30897 "WEEK", "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
30898 ];
30899 if !known_units.contains(&unit_text.as_str()) {
30900 return expr;
30901 }
30902
30903 let unit_str = unit_text.clone();
30904 // Singularize
30905 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
30906 &unit_str[..unit_str.len() - 1]
30907 } else {
30908 &unit_str
30909 };
30910 let unit = unit_singular;
30911
30912 match target {
30913 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30914 // INTERVAL '2' DAY
30915 let iu = match unit {
30916 "DAY" => crate::expressions::IntervalUnit::Day,
30917 "HOUR" => crate::expressions::IntervalUnit::Hour,
30918 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30919 "SECOND" => crate::expressions::IntervalUnit::Second,
30920 "WEEK" => crate::expressions::IntervalUnit::Week,
30921 "MONTH" => crate::expressions::IntervalUnit::Month,
30922 "YEAR" => crate::expressions::IntervalUnit::Year,
30923 _ => return expr,
30924 };
30925 return Expression::Interval(Box::new(crate::expressions::Interval {
30926 this: Some(Expression::string(num)),
30927 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30928 unit: iu,
30929 use_plural: false,
30930 }),
30931 }));
30932 }
30933 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
30934 // INTERVAL '2 DAYS'
30935 let plural = if num != "1" && !unit_str.ends_with('S') {
30936 format!("{} {}S", num, unit)
30937 } else if unit_str.ends_with('S') {
30938 format!("{} {}", num, unit_str)
30939 } else {
30940 format!("{} {}", num, unit)
30941 };
30942 return Expression::Interval(Box::new(crate::expressions::Interval {
30943 this: Some(Expression::string(&plural)),
30944 unit: None,
30945 }));
30946 }
30947 _ => {
30948 // Spark/Databricks/Hive: INTERVAL '1' DAY
30949 let iu = match unit {
30950 "DAY" => crate::expressions::IntervalUnit::Day,
30951 "HOUR" => crate::expressions::IntervalUnit::Hour,
30952 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30953 "SECOND" => crate::expressions::IntervalUnit::Second,
30954 "WEEK" => crate::expressions::IntervalUnit::Week,
30955 "MONTH" => crate::expressions::IntervalUnit::Month,
30956 "YEAR" => crate::expressions::IntervalUnit::Year,
30957 _ => return expr,
30958 };
30959 return Expression::Interval(Box::new(crate::expressions::Interval {
30960 this: Some(Expression::string(num)),
30961 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30962 unit: iu,
30963 use_plural: false,
30964 }),
30965 }));
30966 }
30967 }
30968 }
30969 }
30970 // If it's already an INTERVAL expression, pass through
30971 expr
30972 }
30973
30974 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
30975 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
30976 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
30977 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
30978 fn rewrite_unnest_expansion(
30979 select: &crate::expressions::Select,
30980 target: DialectType,
30981 ) -> Option<crate::expressions::Select> {
30982 use crate::expressions::{
30983 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
30984 UnnestFunc,
30985 };
30986
30987 let index_offset: i64 = match target {
30988 DialectType::Presto | DialectType::Trino => 1,
30989 _ => 0, // BigQuery, Snowflake
30990 };
30991
30992 let if_func_name = match target {
30993 DialectType::Snowflake => "IFF",
30994 _ => "IF",
30995 };
30996
30997 let array_length_func = match target {
30998 DialectType::BigQuery => "ARRAY_LENGTH",
30999 DialectType::Presto | DialectType::Trino => "CARDINALITY",
31000 DialectType::Snowflake => "ARRAY_SIZE",
31001 _ => "ARRAY_LENGTH",
31002 };
31003
31004 let use_table_aliases = matches!(
31005 target,
31006 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
31007 );
31008 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
31009
31010 fn make_col(name: &str, table: Option<&str>) -> Expression {
31011 if let Some(tbl) = table {
31012 Expression::boxed_column(Column {
31013 name: Identifier::new(name.to_string()),
31014 table: Some(Identifier::new(tbl.to_string())),
31015 join_mark: false,
31016 trailing_comments: Vec::new(),
31017 span: None,
31018 inferred_type: None,
31019 })
31020 } else {
31021 Expression::Identifier(Identifier::new(name.to_string()))
31022 }
31023 }
31024
31025 fn make_join(this: Expression) -> Join {
31026 Join {
31027 this,
31028 on: None,
31029 using: Vec::new(),
31030 kind: JoinKind::Cross,
31031 use_inner_keyword: false,
31032 use_outer_keyword: false,
31033 deferred_condition: false,
31034 join_hint: None,
31035 match_condition: None,
31036 pivots: Vec::new(),
31037 comments: Vec::new(),
31038 nesting_group: 0,
31039 directed: false,
31040 }
31041 }
31042
31043 // Collect UNNEST info from SELECT expressions
31044 struct UnnestInfo {
31045 arr_expr: Expression,
31046 col_alias: String,
31047 pos_alias: String,
31048 source_alias: String,
31049 original_expr: Expression,
31050 has_outer_alias: Option<String>,
31051 }
31052
31053 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
31054 let mut col_counter = 0usize;
31055 let mut pos_counter = 1usize;
31056 let mut source_counter = 1usize;
31057
31058 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
31059 match expr {
31060 Expression::Unnest(u) => Some(u.this.clone()),
31061 Expression::Function(f)
31062 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
31063 {
31064 Some(f.args[0].clone())
31065 }
31066 Expression::Alias(a) => extract_unnest_arg(&a.this),
31067 Expression::Add(op)
31068 | Expression::Sub(op)
31069 | Expression::Mul(op)
31070 | Expression::Div(op) => {
31071 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
31072 }
31073 _ => None,
31074 }
31075 }
31076
31077 fn get_alias_name(expr: &Expression) -> Option<String> {
31078 if let Expression::Alias(a) = expr {
31079 Some(a.alias.name.clone())
31080 } else {
31081 None
31082 }
31083 }
31084
31085 for sel_expr in &select.expressions {
31086 if let Some(arr) = extract_unnest_arg(sel_expr) {
31087 col_counter += 1;
31088 pos_counter += 1;
31089 source_counter += 1;
31090
31091 let col_alias = if col_counter == 1 {
31092 "col".to_string()
31093 } else {
31094 format!("col_{}", col_counter)
31095 };
31096 let pos_alias = format!("pos_{}", pos_counter);
31097 let source_alias = format!("_u_{}", source_counter);
31098 let has_outer_alias = get_alias_name(sel_expr);
31099
31100 unnest_infos.push(UnnestInfo {
31101 arr_expr: arr,
31102 col_alias,
31103 pos_alias,
31104 source_alias,
31105 original_expr: sel_expr.clone(),
31106 has_outer_alias,
31107 });
31108 }
31109 }
31110
31111 if unnest_infos.is_empty() {
31112 return None;
31113 }
31114
31115 let series_alias = "pos".to_string();
31116 let series_source_alias = "_u".to_string();
31117 let tbl_ref = if use_table_aliases {
31118 Some(series_source_alias.as_str())
31119 } else {
31120 None
31121 };
31122
31123 // Build new SELECT expressions
31124 let mut new_select_exprs = Vec::new();
31125 for info in &unnest_infos {
31126 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
31127 let src_ref = if use_table_aliases {
31128 Some(info.source_alias.as_str())
31129 } else {
31130 None
31131 };
31132
31133 let pos_col = make_col(&series_alias, tbl_ref);
31134 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
31135 let col_ref = make_col(actual_col_name, src_ref);
31136
31137 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
31138 pos_col.clone(),
31139 unnest_pos_col.clone(),
31140 )));
31141 let mut if_args = vec![eq_cond, col_ref];
31142 if null_third_arg {
31143 if_args.push(Expression::Null(crate::expressions::Null));
31144 }
31145
31146 let if_expr =
31147 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
31148 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
31149
31150 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
31151 final_expr,
31152 Identifier::new(actual_col_name.clone()),
31153 ))));
31154 }
31155
31156 // Build array size expressions for GREATEST
31157 let size_exprs: Vec<Expression> = unnest_infos
31158 .iter()
31159 .map(|info| {
31160 Expression::Function(Box::new(Function::new(
31161 array_length_func.to_string(),
31162 vec![info.arr_expr.clone()],
31163 )))
31164 })
31165 .collect();
31166
31167 let greatest =
31168 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
31169
31170 let series_end = if index_offset == 0 {
31171 Expression::Sub(Box::new(BinaryOp::new(
31172 greatest,
31173 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
31174 )))
31175 } else {
31176 greatest
31177 };
31178
31179 // Build the position array source
31180 let series_unnest_expr = match target {
31181 DialectType::BigQuery => {
31182 let gen_array = Expression::Function(Box::new(Function::new(
31183 "GENERATE_ARRAY".to_string(),
31184 vec![
31185 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
31186 series_end,
31187 ],
31188 )));
31189 Expression::Unnest(Box::new(UnnestFunc {
31190 this: gen_array,
31191 expressions: Vec::new(),
31192 with_ordinality: false,
31193 alias: None,
31194 offset_alias: None,
31195 }))
31196 }
31197 DialectType::Presto | DialectType::Trino => {
31198 let sequence = Expression::Function(Box::new(Function::new(
31199 "SEQUENCE".to_string(),
31200 vec![
31201 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
31202 series_end,
31203 ],
31204 )));
31205 Expression::Unnest(Box::new(UnnestFunc {
31206 this: sequence,
31207 expressions: Vec::new(),
31208 with_ordinality: false,
31209 alias: None,
31210 offset_alias: None,
31211 }))
31212 }
31213 DialectType::Snowflake => {
31214 let range_end = Expression::Add(Box::new(BinaryOp::new(
31215 Expression::Paren(Box::new(crate::expressions::Paren {
31216 this: series_end,
31217 trailing_comments: Vec::new(),
31218 })),
31219 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
31220 )));
31221 let gen_range = Expression::Function(Box::new(Function::new(
31222 "ARRAY_GENERATE_RANGE".to_string(),
31223 vec![
31224 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
31225 range_end,
31226 ],
31227 )));
31228 let flatten_arg =
31229 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
31230 name: Identifier::new("INPUT".to_string()),
31231 value: gen_range,
31232 separator: crate::expressions::NamedArgSeparator::DArrow,
31233 }));
31234 let flatten = Expression::Function(Box::new(Function::new(
31235 "FLATTEN".to_string(),
31236 vec![flatten_arg],
31237 )));
31238 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
31239 }
31240 _ => return None,
31241 };
31242
31243 // Build series alias expression
31244 let series_alias_expr = if use_table_aliases {
31245 let col_aliases = if matches!(target, DialectType::Snowflake) {
31246 vec![
31247 Identifier::new("seq".to_string()),
31248 Identifier::new("key".to_string()),
31249 Identifier::new("path".to_string()),
31250 Identifier::new("index".to_string()),
31251 Identifier::new(series_alias.clone()),
31252 Identifier::new("this".to_string()),
31253 ]
31254 } else {
31255 vec![Identifier::new(series_alias.clone())]
31256 };
31257 Expression::Alias(Box::new(Alias {
31258 this: series_unnest_expr,
31259 alias: Identifier::new(series_source_alias.clone()),
31260 column_aliases: col_aliases,
31261 alias_explicit_as: false,
31262 alias_keyword: None,
31263 pre_alias_comments: Vec::new(),
31264 trailing_comments: Vec::new(),
31265 inferred_type: None,
31266 }))
31267 } else {
31268 Expression::Alias(Box::new(Alias::new(
31269 series_unnest_expr,
31270 Identifier::new(series_alias.clone()),
31271 )))
31272 };
31273
31274 // Build CROSS JOINs for each UNNEST
31275 let mut joins = Vec::new();
31276 for info in &unnest_infos {
31277 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
31278
31279 let unnest_join_expr = match target {
31280 DialectType::BigQuery => {
31281 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
31282 let unnest = UnnestFunc {
31283 this: info.arr_expr.clone(),
31284 expressions: Vec::new(),
31285 with_ordinality: true,
31286 alias: Some(Identifier::new(actual_col_name.clone())),
31287 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
31288 };
31289 Expression::Unnest(Box::new(unnest))
31290 }
31291 DialectType::Presto | DialectType::Trino => {
31292 let unnest = UnnestFunc {
31293 this: info.arr_expr.clone(),
31294 expressions: Vec::new(),
31295 with_ordinality: true,
31296 alias: None,
31297 offset_alias: None,
31298 };
31299 Expression::Alias(Box::new(Alias {
31300 this: Expression::Unnest(Box::new(unnest)),
31301 alias: Identifier::new(info.source_alias.clone()),
31302 column_aliases: vec![
31303 Identifier::new(actual_col_name.clone()),
31304 Identifier::new(info.pos_alias.clone()),
31305 ],
31306 alias_explicit_as: false,
31307 alias_keyword: None,
31308 pre_alias_comments: Vec::new(),
31309 trailing_comments: Vec::new(),
31310 inferred_type: None,
31311 }))
31312 }
31313 DialectType::Snowflake => {
31314 let flatten_arg =
31315 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
31316 name: Identifier::new("INPUT".to_string()),
31317 value: info.arr_expr.clone(),
31318 separator: crate::expressions::NamedArgSeparator::DArrow,
31319 }));
31320 let flatten = Expression::Function(Box::new(Function::new(
31321 "FLATTEN".to_string(),
31322 vec![flatten_arg],
31323 )));
31324 let table_fn = Expression::Function(Box::new(Function::new(
31325 "TABLE".to_string(),
31326 vec![flatten],
31327 )));
31328 Expression::Alias(Box::new(Alias {
31329 this: table_fn,
31330 alias: Identifier::new(info.source_alias.clone()),
31331 column_aliases: vec![
31332 Identifier::new("seq".to_string()),
31333 Identifier::new("key".to_string()),
31334 Identifier::new("path".to_string()),
31335 Identifier::new(info.pos_alias.clone()),
31336 Identifier::new(actual_col_name.clone()),
31337 Identifier::new("this".to_string()),
31338 ],
31339 alias_explicit_as: false,
31340 alias_keyword: None,
31341 pre_alias_comments: Vec::new(),
31342 trailing_comments: Vec::new(),
31343 inferred_type: None,
31344 }))
31345 }
31346 _ => return None,
31347 };
31348
31349 joins.push(make_join(unnest_join_expr));
31350 }
31351
31352 // Build WHERE clause
31353 let mut where_conditions: Vec<Expression> = Vec::new();
31354 for info in &unnest_infos {
31355 let src_ref = if use_table_aliases {
31356 Some(info.source_alias.as_str())
31357 } else {
31358 None
31359 };
31360 let pos_col = make_col(&series_alias, tbl_ref);
31361 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
31362
31363 let arr_size = Expression::Function(Box::new(Function::new(
31364 array_length_func.to_string(),
31365 vec![info.arr_expr.clone()],
31366 )));
31367
31368 let size_ref = if index_offset == 0 {
31369 Expression::Paren(Box::new(crate::expressions::Paren {
31370 this: Expression::Sub(Box::new(BinaryOp::new(
31371 arr_size,
31372 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
31373 ))),
31374 trailing_comments: Vec::new(),
31375 }))
31376 } else {
31377 arr_size
31378 };
31379
31380 let eq = Expression::Eq(Box::new(BinaryOp::new(
31381 pos_col.clone(),
31382 unnest_pos_col.clone(),
31383 )));
31384 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
31385 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
31386 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
31387 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
31388 this: and_cond,
31389 trailing_comments: Vec::new(),
31390 }));
31391 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
31392
31393 where_conditions.push(or_cond);
31394 }
31395
31396 let where_expr = if where_conditions.len() == 1 {
31397 // Single condition: no parens needed
31398 where_conditions.into_iter().next().unwrap()
31399 } else {
31400 // Multiple conditions: wrap each OR in parens, then combine with AND
31401 let wrap = |e: Expression| {
31402 Expression::Paren(Box::new(crate::expressions::Paren {
31403 this: e,
31404 trailing_comments: Vec::new(),
31405 }))
31406 };
31407 let mut iter = where_conditions.into_iter();
31408 let first = wrap(iter.next().unwrap());
31409 let second = wrap(iter.next().unwrap());
31410 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
31411 this: Expression::And(Box::new(BinaryOp::new(first, second))),
31412 trailing_comments: Vec::new(),
31413 }));
31414 for cond in iter {
31415 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
31416 }
31417 combined
31418 };
31419
31420 // Build the new SELECT
31421 let mut new_select = select.clone();
31422 new_select.expressions = new_select_exprs;
31423
31424 if new_select.from.is_some() {
31425 let mut all_joins = vec![make_join(series_alias_expr)];
31426 all_joins.extend(joins);
31427 new_select.joins.extend(all_joins);
31428 } else {
31429 new_select.from = Some(From {
31430 expressions: vec![series_alias_expr],
31431 });
31432 new_select.joins.extend(joins);
31433 }
31434
31435 if let Some(ref existing_where) = new_select.where_clause {
31436 let combined = Expression::And(Box::new(BinaryOp::new(
31437 existing_where.this.clone(),
31438 where_expr,
31439 )));
31440 new_select.where_clause = Some(crate::expressions::Where { this: combined });
31441 } else {
31442 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
31443 }
31444
31445 Some(new_select)
31446 }
31447
31448 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
31449 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
31450 match original {
31451 Expression::Unnest(_) => replacement.clone(),
31452 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
31453 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
31454 Expression::Add(op) => {
31455 let left = Self::replace_unnest_with_if(&op.left, replacement);
31456 let right = Self::replace_unnest_with_if(&op.right, replacement);
31457 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
31458 }
31459 Expression::Sub(op) => {
31460 let left = Self::replace_unnest_with_if(&op.left, replacement);
31461 let right = Self::replace_unnest_with_if(&op.right, replacement);
31462 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
31463 }
31464 Expression::Mul(op) => {
31465 let left = Self::replace_unnest_with_if(&op.left, replacement);
31466 let right = Self::replace_unnest_with_if(&op.right, replacement);
31467 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
31468 }
31469 Expression::Div(op) => {
31470 let left = Self::replace_unnest_with_if(&op.left, replacement);
31471 let right = Self::replace_unnest_with_if(&op.right, replacement);
31472 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
31473 }
31474 _ => original.clone(),
31475 }
31476 }
31477
31478 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
31479 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
31480 fn decompose_json_path(path: &str) -> Vec<String> {
31481 let mut parts = Vec::new();
31482 let path = if path.starts_with("$.") {
31483 &path[2..]
31484 } else if path.starts_with('$') {
31485 &path[1..]
31486 } else {
31487 path
31488 };
31489 if path.is_empty() {
31490 return parts;
31491 }
31492 let mut current = String::new();
31493 let chars: Vec<char> = path.chars().collect();
31494 let mut i = 0;
31495 while i < chars.len() {
31496 match chars[i] {
31497 '.' => {
31498 if !current.is_empty() {
31499 parts.push(current.clone());
31500 current.clear();
31501 }
31502 i += 1;
31503 }
31504 '[' => {
31505 if !current.is_empty() {
31506 parts.push(current.clone());
31507 current.clear();
31508 }
31509 i += 1;
31510 let mut bracket_content = String::new();
31511 while i < chars.len() && chars[i] != ']' {
31512 if chars[i] == '"' || chars[i] == '\'' {
31513 let quote = chars[i];
31514 i += 1;
31515 while i < chars.len() && chars[i] != quote {
31516 bracket_content.push(chars[i]);
31517 i += 1;
31518 }
31519 if i < chars.len() {
31520 i += 1;
31521 }
31522 } else {
31523 bracket_content.push(chars[i]);
31524 i += 1;
31525 }
31526 }
31527 if i < chars.len() {
31528 i += 1;
31529 }
31530 if bracket_content != "*" {
31531 parts.push(bracket_content);
31532 }
31533 }
31534 _ => {
31535 current.push(chars[i]);
31536 i += 1;
31537 }
31538 }
31539 }
31540 if !current.is_empty() {
31541 parts.push(current);
31542 }
31543 parts
31544 }
31545
31546 /// Strip `$` prefix from a JSON path, keeping the rest.
31547 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
31548 fn strip_json_dollar_prefix(path: &str) -> String {
31549 if path.starts_with("$.") {
31550 path[2..].to_string()
31551 } else if path.starts_with('$') {
31552 path[1..].to_string()
31553 } else {
31554 path.to_string()
31555 }
31556 }
31557
31558 /// Strip `[*]` wildcards from a JSON path.
31559 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
31560 fn strip_json_wildcards(path: &str) -> String {
31561 path.replace("[*]", "")
31562 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
31563 .trim_end_matches('.')
31564 .to_string()
31565 }
31566
31567 /// Convert bracket notation to dot notation for JSON paths.
31568 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
31569 fn bracket_to_dot_notation(path: &str) -> String {
31570 let mut result = String::new();
31571 let chars: Vec<char> = path.chars().collect();
31572 let mut i = 0;
31573 while i < chars.len() {
31574 if chars[i] == '[' {
31575 // Read bracket content
31576 i += 1;
31577 let mut bracket_content = String::new();
31578 let mut is_quoted = false;
31579 let mut _quote_char = '"';
31580 while i < chars.len() && chars[i] != ']' {
31581 if chars[i] == '"' || chars[i] == '\'' {
31582 is_quoted = true;
31583 _quote_char = chars[i];
31584 i += 1;
31585 while i < chars.len() && chars[i] != _quote_char {
31586 bracket_content.push(chars[i]);
31587 i += 1;
31588 }
31589 if i < chars.len() {
31590 i += 1;
31591 }
31592 } else {
31593 bracket_content.push(chars[i]);
31594 i += 1;
31595 }
31596 }
31597 if i < chars.len() {
31598 i += 1;
31599 } // skip ]
31600 if bracket_content == "*" {
31601 // Keep wildcard as-is
31602 result.push_str("[*]");
31603 } else if is_quoted {
31604 // Quoted bracket -> dot notation with quotes
31605 result.push('.');
31606 result.push('"');
31607 result.push_str(&bracket_content);
31608 result.push('"');
31609 } else {
31610 // Numeric index -> keep as bracket
31611 result.push('[');
31612 result.push_str(&bracket_content);
31613 result.push(']');
31614 }
31615 } else {
31616 result.push(chars[i]);
31617 i += 1;
31618 }
31619 }
31620 result
31621 }
31622
31623 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
31624 /// `$["a b"]` -> `$['a b']`
31625 fn bracket_to_single_quotes(path: &str) -> String {
31626 let mut result = String::new();
31627 let chars: Vec<char> = path.chars().collect();
31628 let mut i = 0;
31629 while i < chars.len() {
31630 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
31631 result.push('[');
31632 result.push('\'');
31633 i += 2; // skip [ and "
31634 while i < chars.len() && chars[i] != '"' {
31635 result.push(chars[i]);
31636 i += 1;
31637 }
31638 if i < chars.len() {
31639 i += 1;
31640 } // skip closing "
31641 result.push('\'');
31642 } else {
31643 result.push(chars[i]);
31644 i += 1;
31645 }
31646 }
31647 result
31648 }
31649
31650 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
31651 /// or PostgreSQL #temp -> TEMPORARY.
31652 /// Also strips # from INSERT INTO #table for non-TSQL targets.
31653 fn transform_select_into(
31654 expr: Expression,
31655 _source: DialectType,
31656 target: DialectType,
31657 ) -> Expression {
31658 use crate::expressions::{CreateTable, Expression, TableRef};
31659
31660 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
31661 if let Expression::Insert(ref insert) = expr {
31662 if insert.table.name.name.starts_with('#')
31663 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
31664 {
31665 let mut new_insert = insert.clone();
31666 new_insert.table.name.name =
31667 insert.table.name.name.trim_start_matches('#').to_string();
31668 return Expression::Insert(new_insert);
31669 }
31670 return expr;
31671 }
31672
31673 if let Expression::Select(ref select) = expr {
31674 if let Some(ref into) = select.into {
31675 let table_name_raw = match &into.this {
31676 Expression::Table(tr) => tr.name.name.clone(),
31677 Expression::Identifier(id) => id.name.clone(),
31678 _ => String::new(),
31679 };
31680 let is_temp = table_name_raw.starts_with('#') || into.temporary;
31681 let clean_name = table_name_raw.trim_start_matches('#').to_string();
31682
31683 match target {
31684 DialectType::DuckDB | DialectType::Snowflake => {
31685 // SELECT INTO -> CREATE TABLE AS SELECT
31686 let mut new_select = select.clone();
31687 new_select.into = None;
31688 let ct = CreateTable {
31689 name: TableRef::new(clean_name),
31690 on_cluster: None,
31691 columns: Vec::new(),
31692 constraints: Vec::new(),
31693 if_not_exists: false,
31694 temporary: is_temp,
31695 or_replace: false,
31696 table_modifier: None,
31697 as_select: Some(Expression::Select(new_select)),
31698 as_select_parenthesized: false,
31699 on_commit: None,
31700 clone_source: None,
31701 clone_at_clause: None,
31702 shallow_clone: false,
31703 deep_clone: false,
31704 is_copy: false,
31705 leading_comments: Vec::new(),
31706 with_properties: Vec::new(),
31707 teradata_post_name_options: Vec::new(),
31708 with_data: None,
31709 with_statistics: None,
31710 teradata_indexes: Vec::new(),
31711 with_cte: None,
31712 properties: Vec::new(),
31713 partition_of: None,
31714 post_table_properties: Vec::new(),
31715 mysql_table_options: Vec::new(),
31716 inherits: Vec::new(),
31717 on_property: None,
31718 copy_grants: false,
31719 using_template: None,
31720 rollup: None,
31721 uuid: None,
31722 with_partition_columns: Vec::new(),
31723 with_connection: None,
31724 };
31725 return Expression::CreateTable(Box::new(ct));
31726 }
31727 DialectType::PostgreSQL | DialectType::Redshift => {
31728 // PostgreSQL: #foo -> INTO TEMPORARY foo
31729 if is_temp && !into.temporary {
31730 let mut new_select = select.clone();
31731 let mut new_into = into.clone();
31732 new_into.temporary = true;
31733 new_into.unlogged = false;
31734 new_into.this = Expression::Table(Box::new(TableRef::new(clean_name)));
31735 new_select.into = Some(new_into);
31736 Expression::Select(new_select)
31737 } else {
31738 expr
31739 }
31740 }
31741 _ => expr,
31742 }
31743 } else {
31744 expr
31745 }
31746 } else {
31747 expr
31748 }
31749 }
31750
31751 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
31752 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
31753 fn transform_create_table_properties(
31754 ct: &mut crate::expressions::CreateTable,
31755 _source: DialectType,
31756 target: DialectType,
31757 ) {
31758 use crate::expressions::{
31759 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
31760 Properties,
31761 };
31762
31763 // Helper to convert a raw property value string to the correct Expression
31764 let value_to_expr = |v: &str| -> Expression {
31765 let trimmed = v.trim();
31766 // Check if it's a quoted string (starts and ends with ')
31767 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
31768 Expression::Literal(Box::new(Literal::String(
31769 trimmed[1..trimmed.len() - 1].to_string(),
31770 )))
31771 }
31772 // Check if it's a number
31773 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
31774 Expression::Literal(Box::new(Literal::Number(trimmed.to_string())))
31775 }
31776 // Check if it's ARRAY[...] or ARRAY(...)
31777 else if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
31778 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
31779 let inner = trimmed
31780 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
31781 .trim_start_matches('[')
31782 .trim_start_matches('(')
31783 .trim_end_matches(']')
31784 .trim_end_matches(')');
31785 let elements: Vec<Expression> = inner
31786 .split(',')
31787 .map(|e| {
31788 let elem = e.trim().trim_matches('\'');
31789 Expression::Literal(Box::new(Literal::String(elem.to_string())))
31790 })
31791 .collect();
31792 Expression::Function(Box::new(crate::expressions::Function::new(
31793 "ARRAY".to_string(),
31794 elements,
31795 )))
31796 }
31797 // Otherwise, just output as identifier (unquoted)
31798 else {
31799 Expression::Identifier(Identifier::new(trimmed.to_string()))
31800 }
31801 };
31802
31803 if ct.with_properties.is_empty() && ct.properties.is_empty() {
31804 return;
31805 }
31806
31807 // Handle Presto-style WITH properties
31808 if !ct.with_properties.is_empty() {
31809 // Extract FORMAT property and remaining properties
31810 let mut format_value: Option<String> = None;
31811 let mut partitioned_by: Option<String> = None;
31812 let mut other_props: Vec<(String, String)> = Vec::new();
31813
31814 for (key, value) in ct.with_properties.drain(..) {
31815 if key.eq_ignore_ascii_case("FORMAT") {
31816 // Strip surrounding quotes from value if present
31817 format_value = Some(value.trim_matches('\'').to_string());
31818 } else if key.eq_ignore_ascii_case("PARTITIONED_BY") {
31819 partitioned_by = Some(value);
31820 } else {
31821 other_props.push((key, value));
31822 }
31823 }
31824
31825 match target {
31826 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31827 // Presto: keep WITH properties but lowercase 'format' key
31828 if let Some(fmt) = format_value {
31829 ct.with_properties
31830 .push(("format".to_string(), format!("'{}'", fmt)));
31831 }
31832 if let Some(part) = partitioned_by {
31833 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
31834 let trimmed = part.trim();
31835 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
31836 // Also handle ARRAY['...'] format - keep as-is
31837 if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
31838 ct.with_properties
31839 .push(("PARTITIONED_BY".to_string(), part));
31840 } else {
31841 // Parse column names from the parenthesized list
31842 let cols: Vec<&str> = inner
31843 .split(',')
31844 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
31845 .collect();
31846 let array_val = format!(
31847 "ARRAY[{}]",
31848 cols.iter()
31849 .map(|c| format!("'{}'", c))
31850 .collect::<Vec<_>>()
31851 .join(", ")
31852 );
31853 ct.with_properties
31854 .push(("PARTITIONED_BY".to_string(), array_val));
31855 }
31856 }
31857 ct.with_properties.extend(other_props);
31858 }
31859 DialectType::Hive => {
31860 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
31861 if let Some(fmt) = format_value {
31862 ct.properties.push(Expression::FileFormatProperty(Box::new(
31863 FileFormatProperty {
31864 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
31865 expressions: vec![],
31866 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
31867 value: true,
31868 }))),
31869 },
31870 )));
31871 }
31872 if let Some(_part) = partitioned_by {
31873 // PARTITIONED_BY handling is complex - move columns to partitioned by
31874 // For now, the partition columns are extracted from the column list
31875 Self::apply_partitioned_by(ct, &_part, target);
31876 }
31877 if !other_props.is_empty() {
31878 let eq_exprs: Vec<Expression> = other_props
31879 .into_iter()
31880 .map(|(k, v)| {
31881 Expression::Eq(Box::new(BinaryOp::new(
31882 Expression::Literal(Box::new(Literal::String(k))),
31883 value_to_expr(&v),
31884 )))
31885 })
31886 .collect();
31887 ct.properties
31888 .push(Expression::Properties(Box::new(Properties {
31889 expressions: eq_exprs,
31890 })));
31891 }
31892 }
31893 DialectType::Spark | DialectType::Databricks => {
31894 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
31895 if let Some(fmt) = format_value {
31896 ct.properties.push(Expression::FileFormatProperty(Box::new(
31897 FileFormatProperty {
31898 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
31899 expressions: vec![],
31900 hive_format: None, // None means USING syntax
31901 },
31902 )));
31903 }
31904 if let Some(_part) = partitioned_by {
31905 Self::apply_partitioned_by(ct, &_part, target);
31906 }
31907 if !other_props.is_empty() {
31908 let eq_exprs: Vec<Expression> = other_props
31909 .into_iter()
31910 .map(|(k, v)| {
31911 Expression::Eq(Box::new(BinaryOp::new(
31912 Expression::Literal(Box::new(Literal::String(k))),
31913 value_to_expr(&v),
31914 )))
31915 })
31916 .collect();
31917 ct.properties
31918 .push(Expression::Properties(Box::new(Properties {
31919 expressions: eq_exprs,
31920 })));
31921 }
31922 }
31923 DialectType::DuckDB => {
31924 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
31925 // Keep nothing
31926 }
31927 _ => {
31928 // For other dialects, keep WITH properties as-is
31929 if let Some(fmt) = format_value {
31930 ct.with_properties
31931 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
31932 }
31933 if let Some(part) = partitioned_by {
31934 ct.with_properties
31935 .push(("PARTITIONED_BY".to_string(), part));
31936 }
31937 ct.with_properties.extend(other_props);
31938 }
31939 }
31940 }
31941
31942 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
31943 // and Hive STORED AS -> Presto WITH (format=...) conversion
31944 if !ct.properties.is_empty() {
31945 let is_presto_target = matches!(
31946 target,
31947 DialectType::Presto | DialectType::Trino | DialectType::Athena
31948 );
31949 let is_duckdb_target = matches!(target, DialectType::DuckDB);
31950
31951 if is_presto_target || is_duckdb_target {
31952 let mut new_properties = Vec::new();
31953 for prop in ct.properties.drain(..) {
31954 match &prop {
31955 Expression::FileFormatProperty(ffp) => {
31956 if is_presto_target {
31957 // Convert STORED AS/USING to WITH (format=...)
31958 if let Some(ref fmt_expr) = ffp.this {
31959 let fmt_str = match fmt_expr.as_ref() {
31960 Expression::Identifier(id) => id.name.clone(),
31961 Expression::Literal(lit)
31962 if matches!(lit.as_ref(), Literal::String(_)) =>
31963 {
31964 let Literal::String(s) = lit.as_ref() else {
31965 unreachable!()
31966 };
31967 s.clone()
31968 }
31969 _ => {
31970 new_properties.push(prop);
31971 continue;
31972 }
31973 };
31974 ct.with_properties
31975 .push(("format".to_string(), format!("'{}'", fmt_str)));
31976 }
31977 }
31978 // DuckDB: just strip file format properties
31979 }
31980 // Convert TBLPROPERTIES to WITH properties for Presto target
31981 Expression::Properties(props) if is_presto_target => {
31982 for expr in &props.expressions {
31983 if let Expression::Eq(eq) = expr {
31984 // Extract key and value from the Eq expression
31985 let key = match &eq.left {
31986 Expression::Literal(lit)
31987 if matches!(lit.as_ref(), Literal::String(_)) =>
31988 {
31989 let Literal::String(s) = lit.as_ref() else {
31990 unreachable!()
31991 };
31992 s.clone()
31993 }
31994 Expression::Identifier(id) => id.name.clone(),
31995 _ => continue,
31996 };
31997 let value = match &eq.right {
31998 Expression::Literal(lit)
31999 if matches!(lit.as_ref(), Literal::String(_)) =>
32000 {
32001 let Literal::String(s) = lit.as_ref() else {
32002 unreachable!()
32003 };
32004 format!("'{}'", s)
32005 }
32006 Expression::Literal(lit)
32007 if matches!(lit.as_ref(), Literal::Number(_)) =>
32008 {
32009 let Literal::Number(n) = lit.as_ref() else {
32010 unreachable!()
32011 };
32012 n.clone()
32013 }
32014 Expression::Identifier(id) => id.name.clone(),
32015 _ => continue,
32016 };
32017 ct.with_properties.push((key, value));
32018 }
32019 }
32020 }
32021 // Convert PartitionedByProperty for Presto target
32022 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
32023 // Check if it contains ColumnDef expressions (Hive-style with types)
32024 if let Expression::Tuple(ref tuple) = *pbp.this {
32025 let mut col_names: Vec<String> = Vec::new();
32026 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
32027 let mut has_col_defs = false;
32028 for expr in &tuple.expressions {
32029 if let Expression::ColumnDef(ref cd) = expr {
32030 has_col_defs = true;
32031 col_names.push(cd.name.name.clone());
32032 col_defs.push(*cd.clone());
32033 } else if let Expression::Column(ref col) = expr {
32034 col_names.push(col.name.name.clone());
32035 } else if let Expression::Identifier(ref id) = expr {
32036 col_names.push(id.name.clone());
32037 } else {
32038 // For function expressions like MONTHS(y), serialize to SQL
32039 let generic = Dialect::get(DialectType::Generic);
32040 if let Ok(sql) = generic.generate(expr) {
32041 col_names.push(sql);
32042 }
32043 }
32044 }
32045 if has_col_defs {
32046 // Merge partition column defs into the main column list
32047 for cd in col_defs {
32048 ct.columns.push(cd);
32049 }
32050 }
32051 if !col_names.is_empty() {
32052 // Add PARTITIONED_BY property
32053 let array_val = format!(
32054 "ARRAY[{}]",
32055 col_names
32056 .iter()
32057 .map(|n| format!("'{}'", n))
32058 .collect::<Vec<_>>()
32059 .join(", ")
32060 );
32061 ct.with_properties
32062 .push(("PARTITIONED_BY".to_string(), array_val));
32063 }
32064 }
32065 // Skip - don't keep in properties
32066 }
32067 _ => {
32068 if !is_duckdb_target {
32069 new_properties.push(prop);
32070 }
32071 }
32072 }
32073 }
32074 ct.properties = new_properties;
32075 } else {
32076 // For Hive/Spark targets, unquote format names in STORED AS
32077 for prop in &mut ct.properties {
32078 if let Expression::FileFormatProperty(ref mut ffp) = prop {
32079 if let Some(ref mut fmt_expr) = ffp.this {
32080 if let Expression::Literal(lit) = fmt_expr.as_ref() {
32081 if let Literal::String(s) = lit.as_ref() {
32082 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
32083 let unquoted = s.clone();
32084 *fmt_expr =
32085 Box::new(Expression::Identifier(Identifier::new(unquoted)));
32086 }
32087 }
32088 }
32089 }
32090 }
32091 }
32092 }
32093 }
32094
32095 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
32096 fn apply_partitioned_by(
32097 ct: &mut crate::expressions::CreateTable,
32098 partitioned_by_value: &str,
32099 target: DialectType,
32100 ) {
32101 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
32102
32103 // Parse the ARRAY['col1', 'col2'] value to extract column names
32104 let mut col_names: Vec<String> = Vec::new();
32105 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
32106 let inner = partitioned_by_value
32107 .trim()
32108 .trim_start_matches("ARRAY")
32109 .trim_start_matches('[')
32110 .trim_start_matches('(')
32111 .trim_end_matches(']')
32112 .trim_end_matches(')');
32113 for part in inner.split(',') {
32114 let col = part.trim().trim_matches('\'').trim_matches('"');
32115 if !col.is_empty() {
32116 col_names.push(col.to_string());
32117 }
32118 }
32119
32120 if col_names.is_empty() {
32121 return;
32122 }
32123
32124 if matches!(target, DialectType::Hive) {
32125 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
32126 let mut partition_col_defs = Vec::new();
32127 for col_name in &col_names {
32128 // Find and remove from columns
32129 if let Some(pos) = ct
32130 .columns
32131 .iter()
32132 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
32133 {
32134 let col_def = ct.columns.remove(pos);
32135 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
32136 }
32137 }
32138 if !partition_col_defs.is_empty() {
32139 ct.properties
32140 .push(Expression::PartitionedByProperty(Box::new(
32141 PartitionedByProperty {
32142 this: Box::new(Expression::Tuple(Box::new(Tuple {
32143 expressions: partition_col_defs,
32144 }))),
32145 },
32146 )));
32147 }
32148 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
32149 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
32150 // Use quoted identifiers to match the quoting style of the original column definitions
32151 let partition_exprs: Vec<Expression> = col_names
32152 .iter()
32153 .map(|name| {
32154 // Check if the column exists in the column list and use its quoting
32155 let is_quoted = ct
32156 .columns
32157 .iter()
32158 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
32159 let ident = if is_quoted {
32160 Identifier::quoted(name.clone())
32161 } else {
32162 Identifier::new(name.clone())
32163 };
32164 Expression::boxed_column(Column {
32165 name: ident,
32166 table: None,
32167 join_mark: false,
32168 trailing_comments: Vec::new(),
32169 span: None,
32170 inferred_type: None,
32171 })
32172 })
32173 .collect();
32174 ct.properties
32175 .push(Expression::PartitionedByProperty(Box::new(
32176 PartitionedByProperty {
32177 this: Box::new(Expression::Tuple(Box::new(Tuple {
32178 expressions: partition_exprs,
32179 }))),
32180 },
32181 )));
32182 }
32183 // DuckDB: strip partitioned_by entirely (already handled)
32184 }
32185
32186 /// Convert a DataType to Spark's type string format (using angle brackets)
32187 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
32188 use crate::expressions::DataType;
32189 match dt {
32190 DataType::Int { .. } => "INT".to_string(),
32191 DataType::BigInt { .. } => "BIGINT".to_string(),
32192 DataType::SmallInt { .. } => "SMALLINT".to_string(),
32193 DataType::TinyInt { .. } => "TINYINT".to_string(),
32194 DataType::Float { .. } => "FLOAT".to_string(),
32195 DataType::Double { .. } => "DOUBLE".to_string(),
32196 DataType::Decimal {
32197 precision: Some(p),
32198 scale: Some(s),
32199 } => format!("DECIMAL({}, {})", p, s),
32200 DataType::Decimal {
32201 precision: Some(p), ..
32202 } => format!("DECIMAL({})", p),
32203 DataType::Decimal { .. } => "DECIMAL".to_string(),
32204 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
32205 "STRING".to_string()
32206 }
32207 DataType::Char { .. } => "STRING".to_string(),
32208 DataType::Boolean => "BOOLEAN".to_string(),
32209 DataType::Date => "DATE".to_string(),
32210 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
32211 DataType::Json | DataType::JsonB => "STRING".to_string(),
32212 DataType::Binary { .. } => "BINARY".to_string(),
32213 DataType::Array { element_type, .. } => {
32214 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
32215 }
32216 DataType::Map {
32217 key_type,
32218 value_type,
32219 } => format!(
32220 "MAP<{}, {}>",
32221 Self::data_type_to_spark_string(key_type),
32222 Self::data_type_to_spark_string(value_type)
32223 ),
32224 DataType::Struct { fields, .. } => {
32225 let field_strs: Vec<String> = fields
32226 .iter()
32227 .map(|f| {
32228 if f.name.is_empty() {
32229 Self::data_type_to_spark_string(&f.data_type)
32230 } else {
32231 format!(
32232 "{}: {}",
32233 f.name,
32234 Self::data_type_to_spark_string(&f.data_type)
32235 )
32236 }
32237 })
32238 .collect();
32239 format!("STRUCT<{}>", field_strs.join(", "))
32240 }
32241 DataType::Custom { name } => name.clone(),
32242 _ => format!("{:?}", dt),
32243 }
32244 }
32245
32246 /// Extract value and unit from an Interval expression
32247 /// Returns (value_expression, IntervalUnit)
32248 fn extract_interval_parts(
32249 interval_expr: &Expression,
32250 ) -> Option<(Expression, crate::expressions::IntervalUnit)> {
32251 use crate::expressions::{DataType, IntervalUnit, IntervalUnitSpec, Literal};
32252
32253 fn unit_from_str(unit: &str) -> Option<IntervalUnit> {
32254 match unit.trim().to_ascii_uppercase().as_str() {
32255 "YEAR" | "YEARS" => Some(IntervalUnit::Year),
32256 "QUARTER" | "QUARTERS" => Some(IntervalUnit::Quarter),
32257 "MONTH" | "MONTHS" | "MON" | "MONS" | "MM" => Some(IntervalUnit::Month),
32258 "WEEK" | "WEEKS" | "ISOWEEK" => Some(IntervalUnit::Week),
32259 "DAY" | "DAYS" => Some(IntervalUnit::Day),
32260 "HOUR" | "HOURS" => Some(IntervalUnit::Hour),
32261 "MINUTE" | "MINUTES" => Some(IntervalUnit::Minute),
32262 "SECOND" | "SECONDS" => Some(IntervalUnit::Second),
32263 "MILLISECOND" | "MILLISECONDS" => Some(IntervalUnit::Millisecond),
32264 "MICROSECOND" | "MICROSECONDS" => Some(IntervalUnit::Microsecond),
32265 "NANOSECOND" | "NANOSECONDS" => Some(IntervalUnit::Nanosecond),
32266 _ => None,
32267 }
32268 }
32269
32270 fn parts_from_literal_string(s: &str) -> Option<(Expression, IntervalUnit)> {
32271 let mut parts = s.split_whitespace();
32272 let value = parts.next()?;
32273 let unit = unit_from_str(parts.next()?)?;
32274 Some((
32275 Expression::Literal(Box::new(Literal::String(value.to_string()))),
32276 unit,
32277 ))
32278 }
32279
32280 fn unit_from_spec(unit: &IntervalUnitSpec) -> Option<IntervalUnit> {
32281 match unit {
32282 IntervalUnitSpec::Simple { unit, .. } => Some(*unit),
32283 IntervalUnitSpec::Expr(expr) => match expr.as_ref() {
32284 Expression::Day(_) => Some(IntervalUnit::Day),
32285 Expression::Month(_) => Some(IntervalUnit::Month),
32286 Expression::Year(_) => Some(IntervalUnit::Year),
32287 Expression::Identifier(id) => unit_from_str(&id.name),
32288 Expression::Var(v) => unit_from_str(&v.this),
32289 Expression::Column(col) => unit_from_str(&col.name.name),
32290 _ => None,
32291 },
32292 _ => None,
32293 }
32294 }
32295
32296 match interval_expr {
32297 Expression::Interval(iv) => {
32298 let val = iv.this.clone().unwrap_or(Expression::number(0));
32299 if let Expression::Literal(lit) = &val {
32300 if let Literal::String(s) = lit.as_ref() {
32301 if let Some(parts) = parts_from_literal_string(s) {
32302 return Some(parts);
32303 }
32304 }
32305 }
32306 let unit = iv
32307 .unit
32308 .as_ref()
32309 .and_then(unit_from_spec)
32310 .unwrap_or(IntervalUnit::Day);
32311 Some((val, unit))
32312 }
32313 Expression::Cast(cast) if matches!(cast.to, DataType::Interval { .. }) => {
32314 if let Expression::Literal(lit) = &cast.this {
32315 if let Literal::String(s) = lit.as_ref() {
32316 if let Some(parts) = parts_from_literal_string(s) {
32317 return Some(parts);
32318 }
32319 }
32320 }
32321 let unit = match &cast.to {
32322 DataType::Interval {
32323 unit: Some(unit), ..
32324 } => unit_from_str(unit).unwrap_or(IntervalUnit::Day),
32325 _ => IntervalUnit::Day,
32326 };
32327 Some((cast.this.clone(), unit))
32328 }
32329 _ => None,
32330 }
32331 }
32332
32333 fn rewrite_tsql_interval_arithmetic(expr: &Expression) -> Option<Expression> {
32334 match expr {
32335 Expression::Add(op) => {
32336 Self::extract_interval_parts(&op.right)?;
32337 Some(Self::build_tsql_dateadd_from_interval(
32338 op.left.clone(),
32339 &op.right,
32340 false,
32341 ))
32342 }
32343 Expression::Sub(op) => {
32344 Self::extract_interval_parts(&op.right)?;
32345 Some(Self::build_tsql_dateadd_from_interval(
32346 op.left.clone(),
32347 &op.right,
32348 true,
32349 ))
32350 }
32351 _ => None,
32352 }
32353 }
32354
32355 fn build_tsql_dateadd_from_interval(
32356 date: Expression,
32357 interval: &Expression,
32358 subtract: bool,
32359 ) -> Expression {
32360 let (value, unit) = Self::extract_interval_parts(interval)
32361 .unwrap_or_else(|| (interval.clone(), crate::expressions::IntervalUnit::Day));
32362 let unit = Self::interval_unit_to_string(&unit);
32363 let amount = Self::tsql_dateadd_amount(value, subtract);
32364
32365 Expression::Function(Box::new(Function::new(
32366 "DATEADD".to_string(),
32367 vec![Expression::Identifier(Identifier::new(unit)), amount, date],
32368 )))
32369 }
32370
32371 fn tsql_dateadd_amount(value: Expression, negate: bool) -> Expression {
32372 use crate::expressions::{Parameter, ParameterStyle, UnaryOp};
32373
32374 fn numeric_literal_value(value: &Expression) -> Option<&str> {
32375 match value {
32376 Expression::Literal(lit) => match lit.as_ref() {
32377 crate::expressions::Literal::Number(n)
32378 | crate::expressions::Literal::String(n) => Some(n.as_str()),
32379 _ => None,
32380 },
32381 _ => None,
32382 }
32383 }
32384
32385 fn colon_parameter(value: &Expression) -> Option<Expression> {
32386 let Expression::Literal(lit) = value else {
32387 return None;
32388 };
32389 let crate::expressions::Literal::String(s) = lit.as_ref() else {
32390 return None;
32391 };
32392 let name = s.strip_prefix(':')?;
32393 if name.is_empty()
32394 || !name
32395 .chars()
32396 .all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
32397 {
32398 return None;
32399 }
32400
32401 Some(Expression::Parameter(Box::new(Parameter {
32402 name: if name.chars().all(|ch| ch.is_ascii_digit()) {
32403 None
32404 } else {
32405 Some(name.to_string())
32406 },
32407 index: name.parse::<u32>().ok(),
32408 style: ParameterStyle::Colon,
32409 quoted: false,
32410 string_quoted: false,
32411 expression: None,
32412 })))
32413 }
32414
32415 let value = colon_parameter(&value).unwrap_or(value);
32416
32417 if let Some(n) = numeric_literal_value(&value) {
32418 if let Ok(parsed) = n.parse::<f64>() {
32419 let normalized = if negate { -parsed } else { parsed };
32420 let rendered = if normalized.fract() == 0.0 {
32421 format!("{}", normalized as i64)
32422 } else {
32423 normalized.to_string()
32424 };
32425 return Expression::Literal(Box::new(crate::expressions::Literal::Number(
32426 rendered,
32427 )));
32428 }
32429 }
32430
32431 if !negate {
32432 return value;
32433 }
32434
32435 match value {
32436 Expression::Neg(op) => op.this,
32437 other => Expression::Neg(Box::new(UnaryOp {
32438 this: other,
32439 inferred_type: None,
32440 })),
32441 }
32442 }
32443
32444 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
32445 fn normalize_bigquery_function(
32446 e: Expression,
32447 source: DialectType,
32448 target: DialectType,
32449 ) -> Result<Expression> {
32450 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
32451
32452 let f = if let Expression::Function(f) = e {
32453 *f
32454 } else {
32455 return Ok(e);
32456 };
32457 let name = f.name.to_ascii_uppercase();
32458 let mut args = f.args;
32459
32460 /// Helper to extract unit string from an identifier, column, or literal expression
32461 fn get_unit_str(expr: &Expression) -> String {
32462 match expr {
32463 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
32464 Expression::Var(v) => v.this.to_ascii_uppercase(),
32465 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
32466 let Literal::String(s) = lit.as_ref() else {
32467 unreachable!()
32468 };
32469 s.to_ascii_uppercase()
32470 }
32471 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
32472 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
32473 Expression::Function(f) => {
32474 let base = f.name.to_ascii_uppercase();
32475 if !f.args.is_empty() {
32476 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
32477 let inner = get_unit_str(&f.args[0]);
32478 format!("{}({})", base, inner)
32479 } else {
32480 base
32481 }
32482 }
32483 _ => "DAY".to_string(),
32484 }
32485 }
32486
32487 /// Parse unit string to IntervalUnit
32488 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
32489 match s {
32490 "YEAR" => crate::expressions::IntervalUnit::Year,
32491 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
32492 "MONTH" => crate::expressions::IntervalUnit::Month,
32493 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
32494 "DAY" => crate::expressions::IntervalUnit::Day,
32495 "HOUR" => crate::expressions::IntervalUnit::Hour,
32496 "MINUTE" => crate::expressions::IntervalUnit::Minute,
32497 "SECOND" => crate::expressions::IntervalUnit::Second,
32498 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
32499 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
32500 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
32501 _ => crate::expressions::IntervalUnit::Day,
32502 }
32503 }
32504
32505 match name.as_str() {
32506 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
32507 // (BigQuery: result = date1 - date2, Standard: result = end - start)
32508 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
32509 let date1 = args.remove(0);
32510 let date2 = args.remove(0);
32511 let unit_expr = args.remove(0);
32512 let unit_str = get_unit_str(&unit_expr);
32513
32514 if matches!(target, DialectType::BigQuery) {
32515 // BigQuery -> BigQuery: just uppercase the unit
32516 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
32517 return Ok(Expression::Function(Box::new(Function::new(
32518 f.name,
32519 vec![date1, date2, unit],
32520 ))));
32521 }
32522
32523 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
32524 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
32525 if matches!(target, DialectType::Snowflake) {
32526 return Ok(Expression::TimestampDiff(Box::new(
32527 crate::expressions::TimestampDiff {
32528 this: Box::new(date2),
32529 expression: Box::new(date1),
32530 unit: Some(unit_str),
32531 },
32532 )));
32533 }
32534
32535 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
32536 if matches!(target, DialectType::DuckDB) {
32537 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
32538 // CAST to TIME
32539 let cast_fn = |e: Expression| -> Expression {
32540 match e {
32541 Expression::Literal(lit)
32542 if matches!(lit.as_ref(), Literal::String(_)) =>
32543 {
32544 let Literal::String(s) = lit.as_ref() else {
32545 unreachable!()
32546 };
32547 Expression::Cast(Box::new(Cast {
32548 this: Expression::Literal(Box::new(Literal::String(
32549 s.clone(),
32550 ))),
32551 to: DataType::Custom {
32552 name: "TIME".to_string(),
32553 },
32554 trailing_comments: vec![],
32555 double_colon_syntax: false,
32556 format: None,
32557 default: None,
32558 inferred_type: None,
32559 }))
32560 }
32561 other => other,
32562 }
32563 };
32564 (cast_fn(date1), cast_fn(date2))
32565 } else if name == "DATETIME_DIFF" {
32566 // CAST to TIMESTAMP
32567 (
32568 Self::ensure_cast_timestamp(date1),
32569 Self::ensure_cast_timestamp(date2),
32570 )
32571 } else {
32572 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
32573 (
32574 Self::ensure_cast_timestamptz(date1),
32575 Self::ensure_cast_timestamptz(date2),
32576 )
32577 };
32578 return Ok(Expression::Function(Box::new(Function::new(
32579 "DATE_DIFF".to_string(),
32580 vec![
32581 Expression::Literal(Box::new(Literal::String(unit_str))),
32582 cast_d2,
32583 cast_d1,
32584 ],
32585 ))));
32586 }
32587
32588 // Convert to standard TIMESTAMPDIFF(unit, start, end)
32589 let unit = Expression::Identifier(Identifier::new(unit_str));
32590 Ok(Expression::Function(Box::new(Function::new(
32591 "TIMESTAMPDIFF".to_string(),
32592 vec![unit, date2, date1],
32593 ))))
32594 }
32595
32596 // DATEDIFF(unit, start, end) -> target-specific form
32597 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
32598 "DATEDIFF" if args.len() == 3 => {
32599 let arg0 = args.remove(0);
32600 let arg1 = args.remove(0);
32601 let arg2 = args.remove(0);
32602 let unit_str = get_unit_str(&arg0);
32603
32604 // Redshift DATEDIFF(unit, start, end) order: result = end - start
32605 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
32606 // TSQL DATEDIFF(unit, start, end) order: result = end - start
32607
32608 if matches!(target, DialectType::Snowflake) {
32609 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
32610 let unit = Expression::Identifier(Identifier::new(unit_str));
32611 return Ok(Expression::Function(Box::new(Function::new(
32612 "DATEDIFF".to_string(),
32613 vec![unit, arg1, arg2],
32614 ))));
32615 }
32616
32617 if matches!(target, DialectType::DuckDB) {
32618 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
32619 let cast_d1 = Self::ensure_cast_timestamp(arg1);
32620 let cast_d2 = Self::ensure_cast_timestamp(arg2);
32621 return Ok(Expression::Function(Box::new(Function::new(
32622 "DATE_DIFF".to_string(),
32623 vec![
32624 Expression::Literal(Box::new(Literal::String(unit_str))),
32625 cast_d1,
32626 cast_d2,
32627 ],
32628 ))));
32629 }
32630
32631 if matches!(target, DialectType::BigQuery) {
32632 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
32633 let cast_d1 = Self::ensure_cast_datetime(arg1);
32634 let cast_d2 = Self::ensure_cast_datetime(arg2);
32635 let unit = Expression::Identifier(Identifier::new(unit_str));
32636 return Ok(Expression::Function(Box::new(Function::new(
32637 "DATE_DIFF".to_string(),
32638 vec![cast_d2, cast_d1, unit],
32639 ))));
32640 }
32641
32642 if matches!(target, DialectType::Spark | DialectType::Databricks) {
32643 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
32644 let unit = Expression::Identifier(Identifier::new(unit_str));
32645 return Ok(Expression::Function(Box::new(Function::new(
32646 "DATEDIFF".to_string(),
32647 vec![unit, arg1, arg2],
32648 ))));
32649 }
32650
32651 if matches!(target, DialectType::Hive) {
32652 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
32653 match unit_str.as_str() {
32654 "MONTH" => {
32655 return Ok(Expression::Function(Box::new(Function::new(
32656 "CAST".to_string(),
32657 vec![Expression::Function(Box::new(Function::new(
32658 "MONTHS_BETWEEN".to_string(),
32659 vec![arg2, arg1],
32660 )))],
32661 ))));
32662 }
32663 "WEEK" => {
32664 return Ok(Expression::Cast(Box::new(Cast {
32665 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
32666 Expression::Function(Box::new(Function::new(
32667 "DATEDIFF".to_string(),
32668 vec![arg2, arg1],
32669 ))),
32670 Expression::Literal(Box::new(Literal::Number("7".to_string()))),
32671 ))),
32672 to: DataType::Int {
32673 length: None,
32674 integer_spelling: false,
32675 },
32676 trailing_comments: vec![],
32677 double_colon_syntax: false,
32678 format: None,
32679 default: None,
32680 inferred_type: None,
32681 })));
32682 }
32683 _ => {
32684 // Default: DATEDIFF(end, start) for DAY
32685 return Ok(Expression::Function(Box::new(Function::new(
32686 "DATEDIFF".to_string(),
32687 vec![arg2, arg1],
32688 ))));
32689 }
32690 }
32691 }
32692
32693 if matches!(
32694 target,
32695 DialectType::Presto | DialectType::Trino | DialectType::Athena
32696 ) {
32697 // Presto/Trino: DATE_DIFF('UNIT', start, end)
32698 return Ok(Expression::Function(Box::new(Function::new(
32699 "DATE_DIFF".to_string(),
32700 vec![
32701 Expression::Literal(Box::new(Literal::String(unit_str))),
32702 arg1,
32703 arg2,
32704 ],
32705 ))));
32706 }
32707
32708 if matches!(target, DialectType::TSQL) {
32709 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
32710 let cast_d2 = Self::ensure_cast_datetime2(arg2);
32711 let unit = Expression::Identifier(Identifier::new(unit_str));
32712 return Ok(Expression::Function(Box::new(Function::new(
32713 "DATEDIFF".to_string(),
32714 vec![unit, arg1, cast_d2],
32715 ))));
32716 }
32717
32718 if matches!(target, DialectType::PostgreSQL) {
32719 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
32720 // For now, use DATEDIFF (passthrough) with uppercased unit
32721 let unit = Expression::Identifier(Identifier::new(unit_str));
32722 return Ok(Expression::Function(Box::new(Function::new(
32723 "DATEDIFF".to_string(),
32724 vec![unit, arg1, arg2],
32725 ))));
32726 }
32727
32728 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
32729 let unit = Expression::Identifier(Identifier::new(unit_str));
32730 Ok(Expression::Function(Box::new(Function::new(
32731 "DATEDIFF".to_string(),
32732 vec![unit, arg1, arg2],
32733 ))))
32734 }
32735
32736 // DATE_DIFF(date1, date2, unit) -> standard form
32737 "DATE_DIFF" if args.len() == 3 => {
32738 let date1 = args.remove(0);
32739 let date2 = args.remove(0);
32740 let unit_expr = args.remove(0);
32741 let unit_str = get_unit_str(&unit_expr);
32742
32743 if matches!(target, DialectType::BigQuery) {
32744 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
32745 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
32746 "WEEK".to_string()
32747 } else {
32748 unit_str
32749 };
32750 let norm_d1 = Self::date_literal_to_cast(date1);
32751 let norm_d2 = Self::date_literal_to_cast(date2);
32752 let unit = Expression::Identifier(Identifier::new(norm_unit));
32753 return Ok(Expression::Function(Box::new(Function::new(
32754 f.name,
32755 vec![norm_d1, norm_d2, unit],
32756 ))));
32757 }
32758
32759 if matches!(target, DialectType::MySQL) {
32760 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
32761 let norm_d1 = Self::date_literal_to_cast(date1);
32762 let norm_d2 = Self::date_literal_to_cast(date2);
32763 return Ok(Expression::Function(Box::new(Function::new(
32764 "DATEDIFF".to_string(),
32765 vec![norm_d1, norm_d2],
32766 ))));
32767 }
32768
32769 if matches!(target, DialectType::StarRocks) {
32770 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
32771 let norm_d1 = Self::date_literal_to_cast(date1);
32772 let norm_d2 = Self::date_literal_to_cast(date2);
32773 return Ok(Expression::Function(Box::new(Function::new(
32774 "DATE_DIFF".to_string(),
32775 vec![
32776 Expression::Literal(Box::new(Literal::String(unit_str))),
32777 norm_d1,
32778 norm_d2,
32779 ],
32780 ))));
32781 }
32782
32783 if matches!(target, DialectType::DuckDB) {
32784 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
32785 let norm_d1 = Self::ensure_cast_date(date1);
32786 let norm_d2 = Self::ensure_cast_date(date2);
32787
32788 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
32789 let is_week_variant = unit_str == "WEEK"
32790 || unit_str.starts_with("WEEK(")
32791 || unit_str == "ISOWEEK";
32792 if is_week_variant {
32793 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
32794 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
32795 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
32796 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
32797 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
32798 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
32799 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
32800 Some("1") // Shift Sunday to Monday alignment
32801 } else if unit_str == "WEEK(SATURDAY)" {
32802 Some("-5")
32803 } else if unit_str == "WEEK(TUESDAY)" {
32804 Some("-1")
32805 } else if unit_str == "WEEK(WEDNESDAY)" {
32806 Some("-2")
32807 } else if unit_str == "WEEK(THURSDAY)" {
32808 Some("-3")
32809 } else if unit_str == "WEEK(FRIDAY)" {
32810 Some("-4")
32811 } else {
32812 Some("1") // default to Sunday
32813 };
32814
32815 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
32816 let shifted = if let Some(off) = offset {
32817 let interval =
32818 Expression::Interval(Box::new(crate::expressions::Interval {
32819 this: Some(Expression::Literal(Box::new(Literal::String(
32820 off.to_string(),
32821 )))),
32822 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32823 unit: crate::expressions::IntervalUnit::Day,
32824 use_plural: false,
32825 }),
32826 }));
32827 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
32828 date, interval,
32829 )))
32830 } else {
32831 date
32832 };
32833 Expression::Function(Box::new(Function::new(
32834 "DATE_TRUNC".to_string(),
32835 vec![
32836 Expression::Literal(Box::new(Literal::String(
32837 "WEEK".to_string(),
32838 ))),
32839 shifted,
32840 ],
32841 )))
32842 };
32843
32844 let trunc_d2 = make_trunc(norm_d2, day_offset);
32845 let trunc_d1 = make_trunc(norm_d1, day_offset);
32846 return Ok(Expression::Function(Box::new(Function::new(
32847 "DATE_DIFF".to_string(),
32848 vec![
32849 Expression::Literal(Box::new(Literal::String("WEEK".to_string()))),
32850 trunc_d2,
32851 trunc_d1,
32852 ],
32853 ))));
32854 }
32855
32856 return Ok(Expression::Function(Box::new(Function::new(
32857 "DATE_DIFF".to_string(),
32858 vec![
32859 Expression::Literal(Box::new(Literal::String(unit_str))),
32860 norm_d2,
32861 norm_d1,
32862 ],
32863 ))));
32864 }
32865
32866 // Default: DATEDIFF(unit, date2, date1)
32867 let unit = Expression::Identifier(Identifier::new(unit_str));
32868 Ok(Expression::Function(Box::new(Function::new(
32869 "DATEDIFF".to_string(),
32870 vec![unit, date2, date1],
32871 ))))
32872 }
32873
32874 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
32875 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
32876 let ts = args.remove(0);
32877 let interval_expr = args.remove(0);
32878 let (val, unit) =
32879 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
32880 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
32881 });
32882
32883 match target {
32884 DialectType::Snowflake => {
32885 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
32886 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
32887 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
32888 let unit_str = Self::interval_unit_to_string(&unit);
32889 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
32890 Ok(Expression::TimestampAdd(Box::new(
32891 crate::expressions::TimestampAdd {
32892 this: Box::new(val),
32893 expression: Box::new(cast_ts),
32894 unit: Some(unit_str.to_string()),
32895 },
32896 )))
32897 }
32898 DialectType::Spark | DialectType::Databricks => {
32899 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
32900 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
32901 let interval =
32902 Expression::Interval(Box::new(crate::expressions::Interval {
32903 this: Some(val),
32904 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32905 unit,
32906 use_plural: false,
32907 }),
32908 }));
32909 Ok(Expression::Add(Box::new(
32910 crate::expressions::BinaryOp::new(ts, interval),
32911 )))
32912 } else if name == "DATETIME_ADD"
32913 && matches!(target, DialectType::Databricks)
32914 {
32915 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
32916 let unit_str = Self::interval_unit_to_string(&unit);
32917 Ok(Expression::Function(Box::new(Function::new(
32918 "TIMESTAMPADD".to_string(),
32919 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
32920 ))))
32921 } else {
32922 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
32923 let unit_str = Self::interval_unit_to_string(&unit);
32924 let cast_ts =
32925 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
32926 Self::maybe_cast_ts(ts)
32927 } else {
32928 ts
32929 };
32930 Ok(Expression::Function(Box::new(Function::new(
32931 "DATE_ADD".to_string(),
32932 vec![
32933 Expression::Identifier(Identifier::new(unit_str)),
32934 val,
32935 cast_ts,
32936 ],
32937 ))))
32938 }
32939 }
32940 DialectType::MySQL => {
32941 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
32942 let mysql_ts = if name.starts_with("TIMESTAMP") {
32943 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
32944 match &ts {
32945 Expression::Function(ref inner_f)
32946 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
32947 {
32948 // Already wrapped, keep as-is
32949 ts
32950 }
32951 _ => {
32952 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
32953 let unwrapped = match ts {
32954 Expression::Literal(lit)
32955 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
32956 {
32957 let Literal::Timestamp(s) = lit.as_ref() else {
32958 unreachable!()
32959 };
32960 Expression::Literal(Box::new(Literal::String(
32961 s.clone(),
32962 )))
32963 }
32964 other => other,
32965 };
32966 Expression::Function(Box::new(Function::new(
32967 "TIMESTAMP".to_string(),
32968 vec![unwrapped],
32969 )))
32970 }
32971 }
32972 } else {
32973 ts
32974 };
32975 Ok(Expression::DateAdd(Box::new(
32976 crate::expressions::DateAddFunc {
32977 this: mysql_ts,
32978 interval: val,
32979 unit,
32980 },
32981 )))
32982 }
32983 _ => {
32984 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
32985 let cast_ts = if matches!(target, DialectType::DuckDB) {
32986 if name == "DATETIME_ADD" {
32987 Self::ensure_cast_timestamp(ts)
32988 } else if name.starts_with("TIMESTAMP") {
32989 Self::maybe_cast_ts_to_tz(ts, &name)
32990 } else {
32991 ts
32992 }
32993 } else {
32994 ts
32995 };
32996 Ok(Expression::DateAdd(Box::new(
32997 crate::expressions::DateAddFunc {
32998 this: cast_ts,
32999 interval: val,
33000 unit,
33001 },
33002 )))
33003 }
33004 }
33005 }
33006
33007 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
33008 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
33009 let ts = args.remove(0);
33010 let interval_expr = args.remove(0);
33011 let (val, unit) =
33012 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
33013 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
33014 });
33015
33016 match target {
33017 DialectType::Snowflake => {
33018 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
33019 let unit_str = Self::interval_unit_to_string(&unit);
33020 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
33021 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
33022 val,
33023 Expression::Neg(Box::new(crate::expressions::UnaryOp {
33024 this: Expression::number(1),
33025 inferred_type: None,
33026 })),
33027 )));
33028 Ok(Expression::TimestampAdd(Box::new(
33029 crate::expressions::TimestampAdd {
33030 this: Box::new(neg_val),
33031 expression: Box::new(cast_ts),
33032 unit: Some(unit_str.to_string()),
33033 },
33034 )))
33035 }
33036 DialectType::Spark | DialectType::Databricks => {
33037 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
33038 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
33039 {
33040 // Spark: ts - INTERVAL val UNIT
33041 let cast_ts = if name.starts_with("TIMESTAMP") {
33042 Self::maybe_cast_ts(ts)
33043 } else {
33044 ts
33045 };
33046 let interval =
33047 Expression::Interval(Box::new(crate::expressions::Interval {
33048 this: Some(val),
33049 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33050 unit,
33051 use_plural: false,
33052 }),
33053 }));
33054 Ok(Expression::Sub(Box::new(
33055 crate::expressions::BinaryOp::new(cast_ts, interval),
33056 )))
33057 } else {
33058 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
33059 let unit_str = Self::interval_unit_to_string(&unit);
33060 let neg_val =
33061 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
33062 val,
33063 Expression::Neg(Box::new(crate::expressions::UnaryOp {
33064 this: Expression::number(1),
33065 inferred_type: None,
33066 })),
33067 )));
33068 Ok(Expression::Function(Box::new(Function::new(
33069 "TIMESTAMPADD".to_string(),
33070 vec![
33071 Expression::Identifier(Identifier::new(unit_str)),
33072 neg_val,
33073 ts,
33074 ],
33075 ))))
33076 }
33077 }
33078 DialectType::MySQL => {
33079 let mysql_ts = if name.starts_with("TIMESTAMP") {
33080 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
33081 match &ts {
33082 Expression::Function(ref inner_f)
33083 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
33084 {
33085 // Already wrapped, keep as-is
33086 ts
33087 }
33088 _ => {
33089 let unwrapped = match ts {
33090 Expression::Literal(lit)
33091 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
33092 {
33093 let Literal::Timestamp(s) = lit.as_ref() else {
33094 unreachable!()
33095 };
33096 Expression::Literal(Box::new(Literal::String(
33097 s.clone(),
33098 )))
33099 }
33100 other => other,
33101 };
33102 Expression::Function(Box::new(Function::new(
33103 "TIMESTAMP".to_string(),
33104 vec![unwrapped],
33105 )))
33106 }
33107 }
33108 } else {
33109 ts
33110 };
33111 Ok(Expression::DateSub(Box::new(
33112 crate::expressions::DateAddFunc {
33113 this: mysql_ts,
33114 interval: val,
33115 unit,
33116 },
33117 )))
33118 }
33119 _ => {
33120 let cast_ts = if matches!(target, DialectType::DuckDB) {
33121 if name == "DATETIME_SUB" {
33122 Self::ensure_cast_timestamp(ts)
33123 } else if name.starts_with("TIMESTAMP") {
33124 Self::maybe_cast_ts_to_tz(ts, &name)
33125 } else {
33126 ts
33127 }
33128 } else {
33129 ts
33130 };
33131 Ok(Expression::DateSub(Box::new(
33132 crate::expressions::DateAddFunc {
33133 this: cast_ts,
33134 interval: val,
33135 unit,
33136 },
33137 )))
33138 }
33139 }
33140 }
33141
33142 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
33143 "DATE_SUB" if args.len() == 2 => {
33144 let date = args.remove(0);
33145 let interval_expr = args.remove(0);
33146 let (val, unit) =
33147 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
33148 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
33149 });
33150
33151 match target {
33152 DialectType::Databricks | DialectType::Spark => {
33153 // Databricks/Spark: DATE_ADD(date, -val)
33154 // Use DateAdd expression with negative val so it generates correctly
33155 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
33156 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
33157 // Instead, we directly output as a simple negated DateSub
33158 Ok(Expression::DateSub(Box::new(
33159 crate::expressions::DateAddFunc {
33160 this: date,
33161 interval: val,
33162 unit,
33163 },
33164 )))
33165 }
33166 DialectType::DuckDB => {
33167 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
33168 let cast_date = Self::ensure_cast_date(date);
33169 let interval =
33170 Expression::Interval(Box::new(crate::expressions::Interval {
33171 this: Some(val),
33172 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33173 unit,
33174 use_plural: false,
33175 }),
33176 }));
33177 Ok(Expression::Sub(Box::new(
33178 crate::expressions::BinaryOp::new(cast_date, interval),
33179 )))
33180 }
33181 DialectType::Snowflake => {
33182 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
33183 // Just ensure the date is cast properly
33184 let cast_date = Self::ensure_cast_date(date);
33185 Ok(Expression::DateSub(Box::new(
33186 crate::expressions::DateAddFunc {
33187 this: cast_date,
33188 interval: val,
33189 unit,
33190 },
33191 )))
33192 }
33193 DialectType::PostgreSQL => {
33194 // PostgreSQL: date - INTERVAL 'val UNIT'
33195 let unit_str = Self::interval_unit_to_string(&unit);
33196 let interval =
33197 Expression::Interval(Box::new(crate::expressions::Interval {
33198 this: Some(Expression::Literal(Box::new(Literal::String(
33199 format!("{} {}", Self::expr_to_string(&val), unit_str),
33200 )))),
33201 unit: None,
33202 }));
33203 Ok(Expression::Sub(Box::new(
33204 crate::expressions::BinaryOp::new(date, interval),
33205 )))
33206 }
33207 _ => Ok(Expression::DateSub(Box::new(
33208 crate::expressions::DateAddFunc {
33209 this: date,
33210 interval: val,
33211 unit,
33212 },
33213 ))),
33214 }
33215 }
33216
33217 // DATEADD(unit, val, date) -> target-specific form
33218 // Used by: Redshift, Snowflake, TSQL, ClickHouse
33219 "DATEADD" if args.len() == 3 => {
33220 let arg0 = args.remove(0);
33221 let arg1 = args.remove(0);
33222 let arg2 = args.remove(0);
33223 let unit_str = get_unit_str(&arg0);
33224
33225 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
33226 // Keep DATEADD(UNIT, val, date) with uppercased unit
33227 let unit = Expression::Identifier(Identifier::new(unit_str));
33228 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
33229 let date = if matches!(target, DialectType::TSQL)
33230 && !matches!(
33231 source,
33232 DialectType::Spark | DialectType::Databricks | DialectType::Hive
33233 ) {
33234 Self::ensure_cast_datetime2(arg2)
33235 } else {
33236 arg2
33237 };
33238 return Ok(Expression::Function(Box::new(Function::new(
33239 "DATEADD".to_string(),
33240 vec![unit, arg1, date],
33241 ))));
33242 }
33243
33244 if matches!(target, DialectType::DuckDB) {
33245 // DuckDB: date + INTERVAL 'val' UNIT
33246 let iu = parse_interval_unit(&unit_str);
33247 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33248 this: Some(arg1),
33249 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33250 unit: iu,
33251 use_plural: false,
33252 }),
33253 }));
33254 let cast_date = Self::ensure_cast_timestamp(arg2);
33255 return Ok(Expression::Add(Box::new(
33256 crate::expressions::BinaryOp::new(cast_date, interval),
33257 )));
33258 }
33259
33260 if matches!(target, DialectType::BigQuery) {
33261 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
33262 let iu = parse_interval_unit(&unit_str);
33263 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33264 this: Some(arg1),
33265 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33266 unit: iu,
33267 use_plural: false,
33268 }),
33269 }));
33270 return Ok(Expression::Function(Box::new(Function::new(
33271 "DATE_ADD".to_string(),
33272 vec![arg2, interval],
33273 ))));
33274 }
33275
33276 if matches!(target, DialectType::Databricks) {
33277 // Databricks: keep DATEADD(UNIT, val, date) format
33278 let unit = Expression::Identifier(Identifier::new(unit_str));
33279 return Ok(Expression::Function(Box::new(Function::new(
33280 "DATEADD".to_string(),
33281 vec![unit, arg1, arg2],
33282 ))));
33283 }
33284
33285 if matches!(target, DialectType::Spark) {
33286 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
33287 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
33288 if let Expression::Literal(lit) = &expr {
33289 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
33290 if let Ok(val) = n.parse::<i64>() {
33291 return Expression::Literal(Box::new(
33292 crate::expressions::Literal::Number(
33293 (val * factor).to_string(),
33294 ),
33295 ));
33296 }
33297 }
33298 }
33299 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
33300 expr,
33301 Expression::Literal(Box::new(crate::expressions::Literal::Number(
33302 factor.to_string(),
33303 ))),
33304 )))
33305 }
33306 match unit_str.as_str() {
33307 "YEAR" => {
33308 let months = multiply_expr_dateadd(arg1, 12);
33309 return Ok(Expression::Function(Box::new(Function::new(
33310 "ADD_MONTHS".to_string(),
33311 vec![arg2, months],
33312 ))));
33313 }
33314 "QUARTER" => {
33315 let months = multiply_expr_dateadd(arg1, 3);
33316 return Ok(Expression::Function(Box::new(Function::new(
33317 "ADD_MONTHS".to_string(),
33318 vec![arg2, months],
33319 ))));
33320 }
33321 "MONTH" => {
33322 return Ok(Expression::Function(Box::new(Function::new(
33323 "ADD_MONTHS".to_string(),
33324 vec![arg2, arg1],
33325 ))));
33326 }
33327 "WEEK" => {
33328 let days = multiply_expr_dateadd(arg1, 7);
33329 return Ok(Expression::Function(Box::new(Function::new(
33330 "DATE_ADD".to_string(),
33331 vec![arg2, days],
33332 ))));
33333 }
33334 "DAY" => {
33335 return Ok(Expression::Function(Box::new(Function::new(
33336 "DATE_ADD".to_string(),
33337 vec![arg2, arg1],
33338 ))));
33339 }
33340 _ => {
33341 let unit = Expression::Identifier(Identifier::new(unit_str));
33342 return Ok(Expression::Function(Box::new(Function::new(
33343 "DATE_ADD".to_string(),
33344 vec![unit, arg1, arg2],
33345 ))));
33346 }
33347 }
33348 }
33349
33350 if matches!(target, DialectType::Hive) {
33351 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
33352 match unit_str.as_str() {
33353 "DAY" => {
33354 return Ok(Expression::Function(Box::new(Function::new(
33355 "DATE_ADD".to_string(),
33356 vec![arg2, arg1],
33357 ))));
33358 }
33359 "MONTH" => {
33360 return Ok(Expression::Function(Box::new(Function::new(
33361 "ADD_MONTHS".to_string(),
33362 vec![arg2, arg1],
33363 ))));
33364 }
33365 _ => {
33366 let iu = parse_interval_unit(&unit_str);
33367 let interval =
33368 Expression::Interval(Box::new(crate::expressions::Interval {
33369 this: Some(arg1),
33370 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33371 unit: iu,
33372 use_plural: false,
33373 }),
33374 }));
33375 return Ok(Expression::Add(Box::new(
33376 crate::expressions::BinaryOp::new(arg2, interval),
33377 )));
33378 }
33379 }
33380 }
33381
33382 if matches!(target, DialectType::PostgreSQL) {
33383 // PostgreSQL: date + INTERVAL 'val UNIT'
33384 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33385 this: Some(Expression::Literal(Box::new(Literal::String(format!(
33386 "{} {}",
33387 Self::expr_to_string(&arg1),
33388 unit_str
33389 ))))),
33390 unit: None,
33391 }));
33392 return Ok(Expression::Add(Box::new(
33393 crate::expressions::BinaryOp::new(arg2, interval),
33394 )));
33395 }
33396
33397 if matches!(
33398 target,
33399 DialectType::Presto | DialectType::Trino | DialectType::Athena
33400 ) {
33401 // Presto/Trino: DATE_ADD('UNIT', val, date)
33402 return Ok(Expression::Function(Box::new(Function::new(
33403 "DATE_ADD".to_string(),
33404 vec![
33405 Expression::Literal(Box::new(Literal::String(unit_str))),
33406 arg1,
33407 arg2,
33408 ],
33409 ))));
33410 }
33411
33412 if matches!(target, DialectType::ClickHouse) {
33413 // ClickHouse: DATE_ADD(UNIT, val, date)
33414 let unit = Expression::Identifier(Identifier::new(unit_str));
33415 return Ok(Expression::Function(Box::new(Function::new(
33416 "DATE_ADD".to_string(),
33417 vec![unit, arg1, arg2],
33418 ))));
33419 }
33420
33421 // Default: keep DATEADD with uppercased unit
33422 let unit = Expression::Identifier(Identifier::new(unit_str));
33423 Ok(Expression::Function(Box::new(Function::new(
33424 "DATEADD".to_string(),
33425 vec![unit, arg1, arg2],
33426 ))))
33427 }
33428
33429 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
33430 "DATE_ADD" if args.len() == 3 => {
33431 let arg0 = args.remove(0);
33432 let arg1 = args.remove(0);
33433 let arg2 = args.remove(0);
33434 let unit_str = get_unit_str(&arg0);
33435
33436 if matches!(
33437 target,
33438 DialectType::Presto | DialectType::Trino | DialectType::Athena
33439 ) {
33440 // Presto/Trino: DATE_ADD('UNIT', val, date)
33441 return Ok(Expression::Function(Box::new(Function::new(
33442 "DATE_ADD".to_string(),
33443 vec![
33444 Expression::Literal(Box::new(Literal::String(unit_str))),
33445 arg1,
33446 arg2,
33447 ],
33448 ))));
33449 }
33450
33451 if matches!(
33452 target,
33453 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
33454 ) {
33455 // DATEADD(UNIT, val, date)
33456 let unit = Expression::Identifier(Identifier::new(unit_str));
33457 let date = if matches!(target, DialectType::TSQL) {
33458 Self::ensure_cast_datetime2(arg2)
33459 } else {
33460 arg2
33461 };
33462 return Ok(Expression::Function(Box::new(Function::new(
33463 "DATEADD".to_string(),
33464 vec![unit, arg1, date],
33465 ))));
33466 }
33467
33468 if matches!(target, DialectType::DuckDB) {
33469 // DuckDB: date + INTERVAL val UNIT
33470 let iu = parse_interval_unit(&unit_str);
33471 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33472 this: Some(arg1),
33473 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33474 unit: iu,
33475 use_plural: false,
33476 }),
33477 }));
33478 return Ok(Expression::Add(Box::new(
33479 crate::expressions::BinaryOp::new(arg2, interval),
33480 )));
33481 }
33482
33483 if matches!(target, DialectType::Spark | DialectType::Databricks) {
33484 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
33485 let unit = Expression::Identifier(Identifier::new(unit_str));
33486 return Ok(Expression::Function(Box::new(Function::new(
33487 "DATE_ADD".to_string(),
33488 vec![unit, arg1, arg2],
33489 ))));
33490 }
33491
33492 // Default: DATE_ADD(UNIT, val, date)
33493 let unit = Expression::Identifier(Identifier::new(unit_str));
33494 Ok(Expression::Function(Box::new(Function::new(
33495 "DATE_ADD".to_string(),
33496 vec![unit, arg1, arg2],
33497 ))))
33498 }
33499
33500 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
33501 "DATE_ADD" if args.len() == 2 => {
33502 let date = args.remove(0);
33503 let interval_expr = args.remove(0);
33504 let (val, unit) =
33505 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
33506 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
33507 });
33508 let unit_str = Self::interval_unit_to_string(&unit);
33509
33510 match target {
33511 DialectType::DuckDB => {
33512 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
33513 let cast_date = Self::ensure_cast_date(date);
33514 let quoted_val = Self::quote_interval_val(&val);
33515 let interval =
33516 Expression::Interval(Box::new(crate::expressions::Interval {
33517 this: Some(quoted_val),
33518 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33519 unit,
33520 use_plural: false,
33521 }),
33522 }));
33523 Ok(Expression::Add(Box::new(
33524 crate::expressions::BinaryOp::new(cast_date, interval),
33525 )))
33526 }
33527 DialectType::PostgreSQL => {
33528 // PostgreSQL: date + INTERVAL 'val UNIT'
33529 let interval =
33530 Expression::Interval(Box::new(crate::expressions::Interval {
33531 this: Some(Expression::Literal(Box::new(Literal::String(
33532 format!("{} {}", Self::expr_to_string(&val), unit_str),
33533 )))),
33534 unit: None,
33535 }));
33536 Ok(Expression::Add(Box::new(
33537 crate::expressions::BinaryOp::new(date, interval),
33538 )))
33539 }
33540 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
33541 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
33542 let val_str = Self::expr_to_string(&val);
33543 Ok(Expression::Function(Box::new(Function::new(
33544 "DATE_ADD".to_string(),
33545 vec![
33546 Expression::Literal(Box::new(Literal::String(
33547 unit_str.to_string(),
33548 ))),
33549 Expression::Cast(Box::new(Cast {
33550 this: Expression::Literal(Box::new(Literal::String(val_str))),
33551 to: DataType::BigInt { length: None },
33552 trailing_comments: vec![],
33553 double_colon_syntax: false,
33554 format: None,
33555 default: None,
33556 inferred_type: None,
33557 })),
33558 date,
33559 ],
33560 ))))
33561 }
33562 DialectType::Spark | DialectType::Hive => {
33563 // Spark/Hive: DATE_ADD(date, val) for DAY
33564 match unit_str {
33565 "DAY" => Ok(Expression::Function(Box::new(Function::new(
33566 "DATE_ADD".to_string(),
33567 vec![date, val],
33568 )))),
33569 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
33570 "ADD_MONTHS".to_string(),
33571 vec![date, val],
33572 )))),
33573 _ => {
33574 let iu = parse_interval_unit(&unit_str);
33575 let interval =
33576 Expression::Interval(Box::new(crate::expressions::Interval {
33577 this: Some(val),
33578 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33579 unit: iu,
33580 use_plural: false,
33581 }),
33582 }));
33583 Ok(Expression::Function(Box::new(Function::new(
33584 "DATE_ADD".to_string(),
33585 vec![date, interval],
33586 ))))
33587 }
33588 }
33589 }
33590 DialectType::Snowflake => {
33591 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
33592 let cast_date = Self::ensure_cast_date(date);
33593 let val_str = Self::expr_to_string(&val);
33594 Ok(Expression::Function(Box::new(Function::new(
33595 "DATEADD".to_string(),
33596 vec![
33597 Expression::Identifier(Identifier::new(unit_str)),
33598 Expression::Literal(Box::new(Literal::String(val_str))),
33599 cast_date,
33600 ],
33601 ))))
33602 }
33603 DialectType::TSQL | DialectType::Fabric => {
33604 let cast_date = Self::ensure_cast_datetime2(date);
33605 Ok(Expression::Function(Box::new(Function::new(
33606 "DATEADD".to_string(),
33607 vec![
33608 Expression::Identifier(Identifier::new(unit_str)),
33609 val,
33610 cast_date,
33611 ],
33612 ))))
33613 }
33614 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
33615 "DATEADD".to_string(),
33616 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
33617 )))),
33618 DialectType::MySQL => {
33619 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
33620 let quoted_val = Self::quote_interval_val(&val);
33621 let iu = parse_interval_unit(&unit_str);
33622 let interval =
33623 Expression::Interval(Box::new(crate::expressions::Interval {
33624 this: Some(quoted_val),
33625 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33626 unit: iu,
33627 use_plural: false,
33628 }),
33629 }));
33630 Ok(Expression::Function(Box::new(Function::new(
33631 "DATE_ADD".to_string(),
33632 vec![date, interval],
33633 ))))
33634 }
33635 DialectType::BigQuery => {
33636 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
33637 let quoted_val = Self::quote_interval_val(&val);
33638 let iu = parse_interval_unit(&unit_str);
33639 let interval =
33640 Expression::Interval(Box::new(crate::expressions::Interval {
33641 this: Some(quoted_val),
33642 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33643 unit: iu,
33644 use_plural: false,
33645 }),
33646 }));
33647 Ok(Expression::Function(Box::new(Function::new(
33648 "DATE_ADD".to_string(),
33649 vec![date, interval],
33650 ))))
33651 }
33652 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
33653 "DATEADD".to_string(),
33654 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
33655 )))),
33656 _ => {
33657 // Default: keep as DATE_ADD with decomposed interval
33658 Ok(Expression::DateAdd(Box::new(
33659 crate::expressions::DateAddFunc {
33660 this: date,
33661 interval: val,
33662 unit,
33663 },
33664 )))
33665 }
33666 }
33667 }
33668
33669 // ADD_MONTHS(date, val) -> target-specific form
33670 "ADD_MONTHS" if args.len() == 2 => {
33671 let date = args.remove(0);
33672 let val = args.remove(0);
33673
33674 if matches!(target, DialectType::TSQL) {
33675 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
33676 let cast_date = Self::ensure_cast_datetime2(date);
33677 return Ok(Expression::Function(Box::new(Function::new(
33678 "DATEADD".to_string(),
33679 vec![
33680 Expression::Identifier(Identifier::new("MONTH")),
33681 val,
33682 cast_date,
33683 ],
33684 ))));
33685 }
33686
33687 if matches!(target, DialectType::DuckDB) {
33688 // DuckDB: date + INTERVAL val MONTH
33689 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33690 this: Some(val),
33691 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33692 unit: crate::expressions::IntervalUnit::Month,
33693 use_plural: false,
33694 }),
33695 }));
33696 return Ok(Expression::Add(Box::new(
33697 crate::expressions::BinaryOp::new(date, interval),
33698 )));
33699 }
33700
33701 if matches!(target, DialectType::Snowflake) {
33702 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
33703 if matches!(source, DialectType::Snowflake) {
33704 return Ok(Expression::Function(Box::new(Function::new(
33705 "ADD_MONTHS".to_string(),
33706 vec![date, val],
33707 ))));
33708 }
33709 return Ok(Expression::Function(Box::new(Function::new(
33710 "DATEADD".to_string(),
33711 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
33712 ))));
33713 }
33714
33715 if matches!(target, DialectType::Spark | DialectType::Databricks) {
33716 // Spark: ADD_MONTHS(date, val) - keep as is
33717 return Ok(Expression::Function(Box::new(Function::new(
33718 "ADD_MONTHS".to_string(),
33719 vec![date, val],
33720 ))));
33721 }
33722
33723 if matches!(target, DialectType::Hive) {
33724 return Ok(Expression::Function(Box::new(Function::new(
33725 "ADD_MONTHS".to_string(),
33726 vec![date, val],
33727 ))));
33728 }
33729
33730 if matches!(
33731 target,
33732 DialectType::Presto | DialectType::Trino | DialectType::Athena
33733 ) {
33734 // Presto: DATE_ADD('MONTH', val, date)
33735 return Ok(Expression::Function(Box::new(Function::new(
33736 "DATE_ADD".to_string(),
33737 vec![
33738 Expression::Literal(Box::new(Literal::String("MONTH".to_string()))),
33739 val,
33740 date,
33741 ],
33742 ))));
33743 }
33744
33745 // Default: keep ADD_MONTHS
33746 Ok(Expression::Function(Box::new(Function::new(
33747 "ADD_MONTHS".to_string(),
33748 vec![date, val],
33749 ))))
33750 }
33751
33752 // SAFE_DIVIDE(x, y) -> target-specific form directly
33753 "SAFE_DIVIDE" if args.len() == 2 => {
33754 let x = args.remove(0);
33755 let y = args.remove(0);
33756 // Wrap x and y in parens if they're complex expressions
33757 let y_ref = match &y {
33758 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
33759 y.clone()
33760 }
33761 _ => Expression::Paren(Box::new(Paren {
33762 this: y.clone(),
33763 trailing_comments: vec![],
33764 })),
33765 };
33766 let x_ref = match &x {
33767 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
33768 x.clone()
33769 }
33770 _ => Expression::Paren(Box::new(Paren {
33771 this: x.clone(),
33772 trailing_comments: vec![],
33773 })),
33774 };
33775 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
33776 y_ref.clone(),
33777 Expression::number(0),
33778 )));
33779 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
33780 x_ref.clone(),
33781 y_ref.clone(),
33782 )));
33783
33784 match target {
33785 DialectType::Spark | DialectType::Databricks => Ok(Expression::Function(
33786 Box::new(Function::new("TRY_DIVIDE".to_string(), vec![x, y])),
33787 )),
33788 DialectType::DuckDB | DialectType::PostgreSQL => {
33789 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
33790 let result_div = if matches!(target, DialectType::PostgreSQL) {
33791 let cast_x = Expression::Cast(Box::new(Cast {
33792 this: x_ref,
33793 to: DataType::Custom {
33794 name: "DOUBLE PRECISION".to_string(),
33795 },
33796 trailing_comments: vec![],
33797 double_colon_syntax: false,
33798 format: None,
33799 default: None,
33800 inferred_type: None,
33801 }));
33802 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
33803 cast_x, y_ref,
33804 )))
33805 } else {
33806 div_expr
33807 };
33808 Ok(Expression::Case(Box::new(crate::expressions::Case {
33809 operand: None,
33810 whens: vec![(condition, result_div)],
33811 else_: Some(Expression::Null(crate::expressions::Null)),
33812 comments: Vec::new(),
33813 inferred_type: None,
33814 })))
33815 }
33816 DialectType::Snowflake => {
33817 // IFF(y <> 0, x / y, NULL)
33818 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33819 condition,
33820 true_value: div_expr,
33821 false_value: Some(Expression::Null(crate::expressions::Null)),
33822 original_name: Some("IFF".to_string()),
33823 inferred_type: None,
33824 })))
33825 }
33826 DialectType::Presto | DialectType::Trino => {
33827 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
33828 let cast_x = Expression::Cast(Box::new(Cast {
33829 this: x_ref,
33830 to: DataType::Double {
33831 precision: None,
33832 scale: None,
33833 },
33834 trailing_comments: vec![],
33835 double_colon_syntax: false,
33836 format: None,
33837 default: None,
33838 inferred_type: None,
33839 }));
33840 let cast_div = Expression::Div(Box::new(
33841 crate::expressions::BinaryOp::new(cast_x, y_ref),
33842 ));
33843 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33844 condition,
33845 true_value: cast_div,
33846 false_value: Some(Expression::Null(crate::expressions::Null)),
33847 original_name: None,
33848 inferred_type: None,
33849 })))
33850 }
33851 _ => {
33852 // IF(y <> 0, x / y, NULL)
33853 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33854 condition,
33855 true_value: div_expr,
33856 false_value: Some(Expression::Null(crate::expressions::Null)),
33857 original_name: None,
33858 inferred_type: None,
33859 })))
33860 }
33861 }
33862 }
33863
33864 // GENERATE_UUID() -> UUID() with CAST to string
33865 "GENERATE_UUID" => {
33866 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
33867 this: None,
33868 name: None,
33869 is_string: None,
33870 }));
33871 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
33872 let cast_type = match target {
33873 DialectType::DuckDB => Some(DataType::Text),
33874 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
33875 length: None,
33876 parenthesized_length: false,
33877 }),
33878 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
33879 Some(DataType::String { length: None })
33880 }
33881 _ => None,
33882 };
33883 if let Some(dt) = cast_type {
33884 Ok(Expression::Cast(Box::new(Cast {
33885 this: uuid_expr,
33886 to: dt,
33887 trailing_comments: vec![],
33888 double_colon_syntax: false,
33889 format: None,
33890 default: None,
33891 inferred_type: None,
33892 })))
33893 } else {
33894 Ok(uuid_expr)
33895 }
33896 }
33897
33898 // COUNTIF(x) -> CountIf expression
33899 "COUNTIF" if args.len() == 1 => {
33900 let arg = args.remove(0);
33901 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
33902 this: arg,
33903 distinct: false,
33904 filter: None,
33905 order_by: vec![],
33906 name: None,
33907 ignore_nulls: None,
33908 having_max: None,
33909 limit: None,
33910 inferred_type: None,
33911 })))
33912 }
33913
33914 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
33915 "EDIT_DISTANCE" => {
33916 // Strip named arguments (max_distance => N) and pass as positional
33917 let mut positional_args: Vec<Expression> = vec![];
33918 for arg in args {
33919 match arg {
33920 Expression::NamedArgument(na) => {
33921 positional_args.push(na.value);
33922 }
33923 other => positional_args.push(other),
33924 }
33925 }
33926 if positional_args.len() >= 2 {
33927 let col1 = positional_args.remove(0);
33928 let col2 = positional_args.remove(0);
33929 let levenshtein = crate::expressions::BinaryFunc {
33930 this: col1,
33931 expression: col2,
33932 original_name: None,
33933 inferred_type: None,
33934 };
33935 // Pass extra args through a function wrapper with all args
33936 if !positional_args.is_empty() {
33937 let max_dist = positional_args.remove(0);
33938 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
33939 if matches!(target, DialectType::DuckDB) {
33940 let lev = Expression::Function(Box::new(Function::new(
33941 "LEVENSHTEIN".to_string(),
33942 vec![levenshtein.this, levenshtein.expression],
33943 )));
33944 let lev_is_null =
33945 Expression::IsNull(Box::new(crate::expressions::IsNull {
33946 this: lev.clone(),
33947 not: false,
33948 postfix_form: false,
33949 }));
33950 let max_is_null =
33951 Expression::IsNull(Box::new(crate::expressions::IsNull {
33952 this: max_dist.clone(),
33953 not: false,
33954 postfix_form: false,
33955 }));
33956 let null_check =
33957 Expression::Or(Box::new(crate::expressions::BinaryOp {
33958 left: lev_is_null,
33959 right: max_is_null,
33960 left_comments: Vec::new(),
33961 operator_comments: Vec::new(),
33962 trailing_comments: Vec::new(),
33963 inferred_type: None,
33964 }));
33965 let least =
33966 Expression::Least(Box::new(crate::expressions::VarArgFunc {
33967 expressions: vec![lev, max_dist],
33968 original_name: None,
33969 inferred_type: None,
33970 }));
33971 return Ok(Expression::Case(Box::new(crate::expressions::Case {
33972 operand: None,
33973 whens: vec![(
33974 null_check,
33975 Expression::Null(crate::expressions::Null),
33976 )],
33977 else_: Some(least),
33978 comments: Vec::new(),
33979 inferred_type: None,
33980 })));
33981 }
33982 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
33983 all_args.extend(positional_args);
33984 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
33985 let func_name = if matches!(target, DialectType::PostgreSQL) {
33986 "LEVENSHTEIN_LESS_EQUAL"
33987 } else {
33988 "LEVENSHTEIN"
33989 };
33990 return Ok(Expression::Function(Box::new(Function::new(
33991 func_name.to_string(),
33992 all_args,
33993 ))));
33994 }
33995 Ok(Expression::Levenshtein(Box::new(levenshtein)))
33996 } else {
33997 Ok(Expression::Function(Box::new(Function::new(
33998 "EDIT_DISTANCE".to_string(),
33999 positional_args,
34000 ))))
34001 }
34002 }
34003
34004 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
34005 "TIMESTAMP_SECONDS" if args.len() == 1 => {
34006 let arg = args.remove(0);
34007 Ok(Expression::UnixToTime(Box::new(
34008 crate::expressions::UnixToTime {
34009 this: Box::new(arg),
34010 scale: Some(0),
34011 zone: None,
34012 hours: None,
34013 minutes: None,
34014 format: None,
34015 target_type: None,
34016 },
34017 )))
34018 }
34019
34020 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
34021 "TIMESTAMP_MILLIS" if args.len() == 1 => {
34022 let arg = args.remove(0);
34023 Ok(Expression::UnixToTime(Box::new(
34024 crate::expressions::UnixToTime {
34025 this: Box::new(arg),
34026 scale: Some(3),
34027 zone: None,
34028 hours: None,
34029 minutes: None,
34030 format: None,
34031 target_type: None,
34032 },
34033 )))
34034 }
34035
34036 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
34037 "TIMESTAMP_MICROS" if args.len() == 1 => {
34038 let arg = args.remove(0);
34039 Ok(Expression::UnixToTime(Box::new(
34040 crate::expressions::UnixToTime {
34041 this: Box::new(arg),
34042 scale: Some(6),
34043 zone: None,
34044 hours: None,
34045 minutes: None,
34046 format: None,
34047 target_type: None,
34048 },
34049 )))
34050 }
34051
34052 // DIV(x, y) -> IntDiv expression
34053 "DIV" if args.len() == 2 => {
34054 let x = args.remove(0);
34055 let y = args.remove(0);
34056 Ok(Expression::IntDiv(Box::new(
34057 crate::expressions::BinaryFunc {
34058 this: x,
34059 expression: y,
34060 original_name: None,
34061 inferred_type: None,
34062 },
34063 )))
34064 }
34065
34066 // TO_HEX(x) -> target-specific form
34067 "TO_HEX" if args.len() == 1 => {
34068 let arg = args.remove(0);
34069 // Check if inner function already returns hex string in certain targets
34070 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
34071 if matches!(target, DialectType::BigQuery) {
34072 // BQ->BQ: keep as TO_HEX
34073 Ok(Expression::Function(Box::new(Function::new(
34074 "TO_HEX".to_string(),
34075 vec![arg],
34076 ))))
34077 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
34078 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
34079 Ok(arg)
34080 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
34081 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
34082 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
34083 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
34084 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
34085 if let Expression::Function(ref inner_f) = arg {
34086 let inner_args = inner_f.args.clone();
34087 let binary_func = match inner_f.name.to_ascii_uppercase().as_str() {
34088 "SHA1" => Expression::Function(Box::new(Function::new(
34089 "SHA1_BINARY".to_string(),
34090 inner_args,
34091 ))),
34092 "MD5" => Expression::Function(Box::new(Function::new(
34093 "MD5_BINARY".to_string(),
34094 inner_args,
34095 ))),
34096 "SHA256" => {
34097 let mut a = inner_args;
34098 a.push(Expression::number(256));
34099 Expression::Function(Box::new(Function::new(
34100 "SHA2_BINARY".to_string(),
34101 a,
34102 )))
34103 }
34104 "SHA512" => {
34105 let mut a = inner_args;
34106 a.push(Expression::number(512));
34107 Expression::Function(Box::new(Function::new(
34108 "SHA2_BINARY".to_string(),
34109 a,
34110 )))
34111 }
34112 _ => arg.clone(),
34113 };
34114 Ok(Expression::Function(Box::new(Function::new(
34115 "TO_CHAR".to_string(),
34116 vec![binary_func],
34117 ))))
34118 } else {
34119 let inner = Expression::Function(Box::new(Function::new(
34120 "HEX".to_string(),
34121 vec![arg],
34122 )));
34123 Ok(Expression::Lower(Box::new(
34124 crate::expressions::UnaryFunc::new(inner),
34125 )))
34126 }
34127 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
34128 let inner = Expression::Function(Box::new(Function::new(
34129 "TO_HEX".to_string(),
34130 vec![arg],
34131 )));
34132 Ok(Expression::Lower(Box::new(
34133 crate::expressions::UnaryFunc::new(inner),
34134 )))
34135 } else {
34136 let inner =
34137 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
34138 Ok(Expression::Lower(Box::new(
34139 crate::expressions::UnaryFunc::new(inner),
34140 )))
34141 }
34142 }
34143
34144 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
34145 "LAST_DAY" if args.len() == 2 => {
34146 let date = args.remove(0);
34147 let _unit = args.remove(0); // Strip the unit (MONTH is default)
34148 Ok(Expression::Function(Box::new(Function::new(
34149 "LAST_DAY".to_string(),
34150 vec![date],
34151 ))))
34152 }
34153
34154 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
34155 "GENERATE_ARRAY" => {
34156 let start = args.get(0).cloned();
34157 let end = args.get(1).cloned();
34158 let step = args.get(2).cloned();
34159 Ok(Expression::GenerateSeries(Box::new(
34160 crate::expressions::GenerateSeries {
34161 start: start.map(Box::new),
34162 end: end.map(Box::new),
34163 step: step.map(Box::new),
34164 is_end_exclusive: None,
34165 },
34166 )))
34167 }
34168
34169 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
34170 "GENERATE_TIMESTAMP_ARRAY" => {
34171 let start = args.get(0).cloned();
34172 let end = args.get(1).cloned();
34173 let step = args.get(2).cloned();
34174
34175 if matches!(target, DialectType::DuckDB) {
34176 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
34177 // Only cast string literals - leave columns/expressions as-is
34178 let maybe_cast_ts = |expr: Expression| -> Expression {
34179 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
34180 {
34181 Expression::Cast(Box::new(Cast {
34182 this: expr,
34183 to: DataType::Timestamp {
34184 precision: None,
34185 timezone: false,
34186 },
34187 trailing_comments: vec![],
34188 double_colon_syntax: false,
34189 format: None,
34190 default: None,
34191 inferred_type: None,
34192 }))
34193 } else {
34194 expr
34195 }
34196 };
34197 let cast_start = start.map(maybe_cast_ts);
34198 let cast_end = end.map(maybe_cast_ts);
34199 Ok(Expression::GenerateSeries(Box::new(
34200 crate::expressions::GenerateSeries {
34201 start: cast_start.map(Box::new),
34202 end: cast_end.map(Box::new),
34203 step: step.map(Box::new),
34204 is_end_exclusive: None,
34205 },
34206 )))
34207 } else {
34208 Ok(Expression::GenerateSeries(Box::new(
34209 crate::expressions::GenerateSeries {
34210 start: start.map(Box::new),
34211 end: end.map(Box::new),
34212 step: step.map(Box::new),
34213 is_end_exclusive: None,
34214 },
34215 )))
34216 }
34217 }
34218
34219 // TO_JSON(x) -> target-specific (from Spark/Hive)
34220 "TO_JSON" => {
34221 match target {
34222 DialectType::Presto | DialectType::Trino => {
34223 // JSON_FORMAT(CAST(x AS JSON))
34224 let arg = args
34225 .into_iter()
34226 .next()
34227 .unwrap_or(Expression::Null(crate::expressions::Null));
34228 let cast_json = Expression::Cast(Box::new(Cast {
34229 this: arg,
34230 to: DataType::Custom {
34231 name: "JSON".to_string(),
34232 },
34233 trailing_comments: vec![],
34234 double_colon_syntax: false,
34235 format: None,
34236 default: None,
34237 inferred_type: None,
34238 }));
34239 Ok(Expression::Function(Box::new(Function::new(
34240 "JSON_FORMAT".to_string(),
34241 vec![cast_json],
34242 ))))
34243 }
34244 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
34245 "TO_JSON_STRING".to_string(),
34246 args,
34247 )))),
34248 DialectType::DuckDB => {
34249 // CAST(TO_JSON(x) AS TEXT)
34250 let arg = args
34251 .into_iter()
34252 .next()
34253 .unwrap_or(Expression::Null(crate::expressions::Null));
34254 let to_json = Expression::Function(Box::new(Function::new(
34255 "TO_JSON".to_string(),
34256 vec![arg],
34257 )));
34258 Ok(Expression::Cast(Box::new(Cast {
34259 this: to_json,
34260 to: DataType::Text,
34261 trailing_comments: vec![],
34262 double_colon_syntax: false,
34263 format: None,
34264 default: None,
34265 inferred_type: None,
34266 })))
34267 }
34268 _ => Ok(Expression::Function(Box::new(Function::new(
34269 "TO_JSON".to_string(),
34270 args,
34271 )))),
34272 }
34273 }
34274
34275 // TO_JSON_STRING(x) -> target-specific
34276 "TO_JSON_STRING" => {
34277 match target {
34278 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
34279 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
34280 ),
34281 DialectType::Presto | DialectType::Trino => {
34282 // JSON_FORMAT(CAST(x AS JSON))
34283 let arg = args
34284 .into_iter()
34285 .next()
34286 .unwrap_or(Expression::Null(crate::expressions::Null));
34287 let cast_json = Expression::Cast(Box::new(Cast {
34288 this: arg,
34289 to: DataType::Custom {
34290 name: "JSON".to_string(),
34291 },
34292 trailing_comments: vec![],
34293 double_colon_syntax: false,
34294 format: None,
34295 default: None,
34296 inferred_type: None,
34297 }));
34298 Ok(Expression::Function(Box::new(Function::new(
34299 "JSON_FORMAT".to_string(),
34300 vec![cast_json],
34301 ))))
34302 }
34303 DialectType::DuckDB => {
34304 // CAST(TO_JSON(x) AS TEXT)
34305 let arg = args
34306 .into_iter()
34307 .next()
34308 .unwrap_or(Expression::Null(crate::expressions::Null));
34309 let to_json = Expression::Function(Box::new(Function::new(
34310 "TO_JSON".to_string(),
34311 vec![arg],
34312 )));
34313 Ok(Expression::Cast(Box::new(Cast {
34314 this: to_json,
34315 to: DataType::Text,
34316 trailing_comments: vec![],
34317 double_colon_syntax: false,
34318 format: None,
34319 default: None,
34320 inferred_type: None,
34321 })))
34322 }
34323 DialectType::Snowflake => {
34324 // TO_JSON(x)
34325 Ok(Expression::Function(Box::new(Function::new(
34326 "TO_JSON".to_string(),
34327 args,
34328 ))))
34329 }
34330 _ => Ok(Expression::Function(Box::new(Function::new(
34331 "TO_JSON_STRING".to_string(),
34332 args,
34333 )))),
34334 }
34335 }
34336
34337 // SAFE_ADD(x, y) -> SafeAdd expression
34338 "SAFE_ADD" if args.len() == 2 => {
34339 let x = args.remove(0);
34340 let y = args.remove(0);
34341 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
34342 this: Box::new(x),
34343 expression: Box::new(y),
34344 })))
34345 }
34346
34347 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
34348 "SAFE_SUBTRACT" if args.len() == 2 => {
34349 let x = args.remove(0);
34350 let y = args.remove(0);
34351 Ok(Expression::SafeSubtract(Box::new(
34352 crate::expressions::SafeSubtract {
34353 this: Box::new(x),
34354 expression: Box::new(y),
34355 },
34356 )))
34357 }
34358
34359 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
34360 "SAFE_MULTIPLY" if args.len() == 2 => {
34361 let x = args.remove(0);
34362 let y = args.remove(0);
34363 Ok(Expression::SafeMultiply(Box::new(
34364 crate::expressions::SafeMultiply {
34365 this: Box::new(x),
34366 expression: Box::new(y),
34367 },
34368 )))
34369 }
34370
34371 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
34372 "REGEXP_CONTAINS" if args.len() == 2 => {
34373 let str_expr = args.remove(0);
34374 let pattern = args.remove(0);
34375 Ok(Expression::RegexpLike(Box::new(
34376 crate::expressions::RegexpFunc {
34377 this: str_expr,
34378 pattern,
34379 flags: None,
34380 },
34381 )))
34382 }
34383
34384 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
34385 "CONTAINS_SUBSTR" if args.len() == 2 => {
34386 let a = args.remove(0);
34387 let b = args.remove(0);
34388 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
34389 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
34390 Ok(Expression::Function(Box::new(Function::new(
34391 "CONTAINS".to_string(),
34392 vec![lower_a, lower_b],
34393 ))))
34394 }
34395
34396 // INT64(x) -> CAST(x AS BIGINT)
34397 "INT64" if args.len() == 1 => {
34398 let arg = args.remove(0);
34399 Ok(Expression::Cast(Box::new(Cast {
34400 this: arg,
34401 to: DataType::BigInt { length: None },
34402 trailing_comments: vec![],
34403 double_colon_syntax: false,
34404 format: None,
34405 default: None,
34406 inferred_type: None,
34407 })))
34408 }
34409
34410 // INSTR(str, substr) -> target-specific
34411 "INSTR" if args.len() >= 2 => {
34412 let str_expr = args.remove(0);
34413 let substr = args.remove(0);
34414 if matches!(target, DialectType::Snowflake) {
34415 // CHARINDEX(substr, str)
34416 Ok(Expression::Function(Box::new(Function::new(
34417 "CHARINDEX".to_string(),
34418 vec![substr, str_expr],
34419 ))))
34420 } else if matches!(target, DialectType::BigQuery) {
34421 // Keep as INSTR
34422 Ok(Expression::Function(Box::new(Function::new(
34423 "INSTR".to_string(),
34424 vec![str_expr, substr],
34425 ))))
34426 } else {
34427 // Default: keep as INSTR
34428 Ok(Expression::Function(Box::new(Function::new(
34429 "INSTR".to_string(),
34430 vec![str_expr, substr],
34431 ))))
34432 }
34433 }
34434
34435 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
34436 "DATE_TRUNC" if args.len() == 2 => {
34437 let expr = args.remove(0);
34438 let unit_expr = args.remove(0);
34439 let unit_str = get_unit_str(&unit_expr);
34440
34441 match target {
34442 DialectType::DuckDB
34443 | DialectType::Snowflake
34444 | DialectType::PostgreSQL
34445 | DialectType::Presto
34446 | DialectType::Trino
34447 | DialectType::Databricks
34448 | DialectType::Spark
34449 | DialectType::Redshift
34450 | DialectType::ClickHouse
34451 | DialectType::TSQL => {
34452 // Standard: DATE_TRUNC('UNIT', expr)
34453 Ok(Expression::Function(Box::new(Function::new(
34454 "DATE_TRUNC".to_string(),
34455 vec![
34456 Expression::Literal(Box::new(Literal::String(unit_str))),
34457 expr,
34458 ],
34459 ))))
34460 }
34461 _ => {
34462 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
34463 Ok(Expression::Function(Box::new(Function::new(
34464 "DATE_TRUNC".to_string(),
34465 vec![expr, unit_expr],
34466 ))))
34467 }
34468 }
34469 }
34470
34471 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
34472 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
34473 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
34474 let ts = args.remove(0);
34475 let unit_expr = args.remove(0);
34476 let tz = if !args.is_empty() {
34477 Some(args.remove(0))
34478 } else {
34479 None
34480 };
34481 let unit_str = get_unit_str(&unit_expr);
34482
34483 match target {
34484 DialectType::DuckDB => {
34485 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
34486 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
34487 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
34488 let is_coarse = matches!(
34489 unit_str.as_str(),
34490 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
34491 );
34492 // For DATETIME_TRUNC, cast string args to TIMESTAMP
34493 let cast_ts = if name == "DATETIME_TRUNC" {
34494 match ts {
34495 Expression::Literal(ref lit)
34496 if matches!(lit.as_ref(), Literal::String(ref _s)) =>
34497 {
34498 Expression::Cast(Box::new(Cast {
34499 this: ts,
34500 to: DataType::Timestamp {
34501 precision: None,
34502 timezone: false,
34503 },
34504 trailing_comments: vec![],
34505 double_colon_syntax: false,
34506 format: None,
34507 default: None,
34508 inferred_type: None,
34509 }))
34510 }
34511 _ => Self::maybe_cast_ts_to_tz(ts, &name),
34512 }
34513 } else {
34514 Self::maybe_cast_ts_to_tz(ts, &name)
34515 };
34516
34517 if let Some(tz_arg) = tz {
34518 if is_coarse {
34519 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
34520 let at_tz = Expression::AtTimeZone(Box::new(
34521 crate::expressions::AtTimeZone {
34522 this: cast_ts,
34523 zone: tz_arg.clone(),
34524 },
34525 ));
34526 let date_trunc = Expression::Function(Box::new(Function::new(
34527 "DATE_TRUNC".to_string(),
34528 vec![
34529 Expression::Literal(Box::new(Literal::String(unit_str))),
34530 at_tz,
34531 ],
34532 )));
34533 Ok(Expression::AtTimeZone(Box::new(
34534 crate::expressions::AtTimeZone {
34535 this: date_trunc,
34536 zone: tz_arg,
34537 },
34538 )))
34539 } else {
34540 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
34541 Ok(Expression::Function(Box::new(Function::new(
34542 "DATE_TRUNC".to_string(),
34543 vec![
34544 Expression::Literal(Box::new(Literal::String(unit_str))),
34545 cast_ts,
34546 ],
34547 ))))
34548 }
34549 } else {
34550 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
34551 Ok(Expression::Function(Box::new(Function::new(
34552 "DATE_TRUNC".to_string(),
34553 vec![
34554 Expression::Literal(Box::new(Literal::String(unit_str))),
34555 cast_ts,
34556 ],
34557 ))))
34558 }
34559 }
34560 DialectType::Databricks | DialectType::Spark => {
34561 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
34562 Ok(Expression::Function(Box::new(Function::new(
34563 "DATE_TRUNC".to_string(),
34564 vec![Expression::Literal(Box::new(Literal::String(unit_str))), ts],
34565 ))))
34566 }
34567 _ => {
34568 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
34569 let unit = Expression::Literal(Box::new(Literal::String(unit_str)));
34570 let mut date_trunc_args = vec![unit, ts];
34571 if let Some(tz_arg) = tz {
34572 date_trunc_args.push(tz_arg);
34573 }
34574 Ok(Expression::Function(Box::new(Function::new(
34575 "TIMESTAMP_TRUNC".to_string(),
34576 date_trunc_args,
34577 ))))
34578 }
34579 }
34580 }
34581
34582 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
34583 "TIME" => {
34584 if args.len() == 3 {
34585 // TIME(h, m, s) constructor
34586 match target {
34587 DialectType::TSQL => {
34588 // TIMEFROMPARTS(h, m, s, 0, 0)
34589 args.push(Expression::number(0));
34590 args.push(Expression::number(0));
34591 Ok(Expression::Function(Box::new(Function::new(
34592 "TIMEFROMPARTS".to_string(),
34593 args,
34594 ))))
34595 }
34596 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
34597 "MAKETIME".to_string(),
34598 args,
34599 )))),
34600 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
34601 Function::new("MAKE_TIME".to_string(), args),
34602 ))),
34603 _ => Ok(Expression::Function(Box::new(Function::new(
34604 "TIME".to_string(),
34605 args,
34606 )))),
34607 }
34608 } else if args.len() == 1 {
34609 let arg = args.remove(0);
34610 if matches!(target, DialectType::Spark) {
34611 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
34612 Ok(Expression::Cast(Box::new(Cast {
34613 this: arg,
34614 to: DataType::Timestamp {
34615 timezone: false,
34616 precision: None,
34617 },
34618 trailing_comments: vec![],
34619 double_colon_syntax: false,
34620 format: None,
34621 default: None,
34622 inferred_type: None,
34623 })))
34624 } else {
34625 // Most targets: CAST(x AS TIME)
34626 Ok(Expression::Cast(Box::new(Cast {
34627 this: arg,
34628 to: DataType::Time {
34629 precision: None,
34630 timezone: false,
34631 },
34632 trailing_comments: vec![],
34633 double_colon_syntax: false,
34634 format: None,
34635 default: None,
34636 inferred_type: None,
34637 })))
34638 }
34639 } else if args.len() == 2 {
34640 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
34641 let expr = args.remove(0);
34642 let tz = args.remove(0);
34643 let cast_tstz = Expression::Cast(Box::new(Cast {
34644 this: expr,
34645 to: DataType::Timestamp {
34646 timezone: true,
34647 precision: None,
34648 },
34649 trailing_comments: vec![],
34650 double_colon_syntax: false,
34651 format: None,
34652 default: None,
34653 inferred_type: None,
34654 }));
34655 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34656 this: cast_tstz,
34657 zone: tz,
34658 }));
34659 Ok(Expression::Cast(Box::new(Cast {
34660 this: at_tz,
34661 to: DataType::Time {
34662 precision: None,
34663 timezone: false,
34664 },
34665 trailing_comments: vec![],
34666 double_colon_syntax: false,
34667 format: None,
34668 default: None,
34669 inferred_type: None,
34670 })))
34671 } else {
34672 Ok(Expression::Function(Box::new(Function::new(
34673 "TIME".to_string(),
34674 args,
34675 ))))
34676 }
34677 }
34678
34679 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
34680 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
34681 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
34682 // DATETIME(y, m, d, h, min, s) -> target-specific
34683 "DATETIME" => {
34684 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
34685 if matches!(target, DialectType::BigQuery) {
34686 if args.len() == 2 {
34687 let has_time_literal = matches!(&args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
34688 if has_time_literal {
34689 let first = args.remove(0);
34690 let second = args.remove(0);
34691 let time_as_cast = match second {
34692 Expression::Literal(lit)
34693 if matches!(lit.as_ref(), Literal::Time(_)) =>
34694 {
34695 let Literal::Time(s) = lit.as_ref() else {
34696 unreachable!()
34697 };
34698 Expression::Cast(Box::new(Cast {
34699 this: Expression::Literal(Box::new(Literal::String(
34700 s.clone(),
34701 ))),
34702 to: DataType::Time {
34703 precision: None,
34704 timezone: false,
34705 },
34706 trailing_comments: vec![],
34707 double_colon_syntax: false,
34708 format: None,
34709 default: None,
34710 inferred_type: None,
34711 }))
34712 }
34713 other => other,
34714 };
34715 return Ok(Expression::Function(Box::new(Function::new(
34716 "DATETIME".to_string(),
34717 vec![first, time_as_cast],
34718 ))));
34719 }
34720 }
34721 return Ok(Expression::Function(Box::new(Function::new(
34722 "DATETIME".to_string(),
34723 args,
34724 ))));
34725 }
34726
34727 if args.len() == 1 {
34728 let arg = args.remove(0);
34729 Ok(Expression::Cast(Box::new(Cast {
34730 this: arg,
34731 to: DataType::Timestamp {
34732 timezone: false,
34733 precision: None,
34734 },
34735 trailing_comments: vec![],
34736 double_colon_syntax: false,
34737 format: None,
34738 default: None,
34739 inferred_type: None,
34740 })))
34741 } else if args.len() == 2 {
34742 let first = args.remove(0);
34743 let second = args.remove(0);
34744 // Check if second arg is a TIME literal
34745 let is_time_literal = matches!(&second, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
34746 if is_time_literal {
34747 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
34748 let cast_date = Expression::Cast(Box::new(Cast {
34749 this: first,
34750 to: DataType::Date,
34751 trailing_comments: vec![],
34752 double_colon_syntax: false,
34753 format: None,
34754 default: None,
34755 inferred_type: None,
34756 }));
34757 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
34758 let time_as_string = match second {
34759 Expression::Literal(lit)
34760 if matches!(lit.as_ref(), Literal::Time(_)) =>
34761 {
34762 let Literal::Time(s) = lit.as_ref() else {
34763 unreachable!()
34764 };
34765 Expression::Literal(Box::new(Literal::String(s.clone())))
34766 }
34767 other => other,
34768 };
34769 let cast_time = Expression::Cast(Box::new(Cast {
34770 this: time_as_string,
34771 to: DataType::Time {
34772 precision: None,
34773 timezone: false,
34774 },
34775 trailing_comments: vec![],
34776 double_colon_syntax: false,
34777 format: None,
34778 default: None,
34779 inferred_type: None,
34780 }));
34781 let add_expr =
34782 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
34783 Ok(Expression::Cast(Box::new(Cast {
34784 this: add_expr,
34785 to: DataType::Timestamp {
34786 timezone: false,
34787 precision: None,
34788 },
34789 trailing_comments: vec![],
34790 double_colon_syntax: false,
34791 format: None,
34792 default: None,
34793 inferred_type: None,
34794 })))
34795 } else {
34796 // DATETIME('string', 'timezone')
34797 let cast_tstz = Expression::Cast(Box::new(Cast {
34798 this: first,
34799 to: DataType::Timestamp {
34800 timezone: true,
34801 precision: None,
34802 },
34803 trailing_comments: vec![],
34804 double_colon_syntax: false,
34805 format: None,
34806 default: None,
34807 inferred_type: None,
34808 }));
34809 let at_tz =
34810 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34811 this: cast_tstz,
34812 zone: second,
34813 }));
34814 Ok(Expression::Cast(Box::new(Cast {
34815 this: at_tz,
34816 to: DataType::Timestamp {
34817 timezone: false,
34818 precision: None,
34819 },
34820 trailing_comments: vec![],
34821 double_colon_syntax: false,
34822 format: None,
34823 default: None,
34824 inferred_type: None,
34825 })))
34826 }
34827 } else if args.len() >= 3 {
34828 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
34829 // For other targets, use MAKE_TIMESTAMP or similar
34830 if matches!(target, DialectType::Snowflake) {
34831 Ok(Expression::Function(Box::new(Function::new(
34832 "TIMESTAMP_FROM_PARTS".to_string(),
34833 args,
34834 ))))
34835 } else {
34836 Ok(Expression::Function(Box::new(Function::new(
34837 "DATETIME".to_string(),
34838 args,
34839 ))))
34840 }
34841 } else {
34842 Ok(Expression::Function(Box::new(Function::new(
34843 "DATETIME".to_string(),
34844 args,
34845 ))))
34846 }
34847 }
34848
34849 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
34850 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
34851 "TIMESTAMP" => {
34852 if args.len() == 1 {
34853 let arg = args.remove(0);
34854 Ok(Expression::Cast(Box::new(Cast {
34855 this: arg,
34856 to: DataType::Timestamp {
34857 timezone: true,
34858 precision: None,
34859 },
34860 trailing_comments: vec![],
34861 double_colon_syntax: false,
34862 format: None,
34863 default: None,
34864 inferred_type: None,
34865 })))
34866 } else if args.len() == 2 {
34867 let arg = args.remove(0);
34868 let tz = args.remove(0);
34869 let cast_ts = Expression::Cast(Box::new(Cast {
34870 this: arg,
34871 to: DataType::Timestamp {
34872 timezone: false,
34873 precision: None,
34874 },
34875 trailing_comments: vec![],
34876 double_colon_syntax: false,
34877 format: None,
34878 default: None,
34879 inferred_type: None,
34880 }));
34881 if matches!(target, DialectType::Snowflake) {
34882 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
34883 Ok(Expression::Function(Box::new(Function::new(
34884 "CONVERT_TIMEZONE".to_string(),
34885 vec![tz, cast_ts],
34886 ))))
34887 } else {
34888 Ok(Expression::AtTimeZone(Box::new(
34889 crate::expressions::AtTimeZone {
34890 this: cast_ts,
34891 zone: tz,
34892 },
34893 )))
34894 }
34895 } else {
34896 Ok(Expression::Function(Box::new(Function::new(
34897 "TIMESTAMP".to_string(),
34898 args,
34899 ))))
34900 }
34901 }
34902
34903 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
34904 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
34905 "STRING" => {
34906 if args.len() == 1 {
34907 let arg = args.remove(0);
34908 let cast_type = match target {
34909 DialectType::DuckDB => DataType::Text,
34910 _ => DataType::VarChar {
34911 length: None,
34912 parenthesized_length: false,
34913 },
34914 };
34915 Ok(Expression::Cast(Box::new(Cast {
34916 this: arg,
34917 to: cast_type,
34918 trailing_comments: vec![],
34919 double_colon_syntax: false,
34920 format: None,
34921 default: None,
34922 inferred_type: None,
34923 })))
34924 } else if args.len() == 2 {
34925 let arg = args.remove(0);
34926 let tz = args.remove(0);
34927 let cast_type = match target {
34928 DialectType::DuckDB => DataType::Text,
34929 _ => DataType::VarChar {
34930 length: None,
34931 parenthesized_length: false,
34932 },
34933 };
34934 if matches!(target, DialectType::Snowflake) {
34935 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
34936 let convert_tz = Expression::Function(Box::new(Function::new(
34937 "CONVERT_TIMEZONE".to_string(),
34938 vec![
34939 Expression::Literal(Box::new(Literal::String("UTC".to_string()))),
34940 tz,
34941 arg,
34942 ],
34943 )));
34944 Ok(Expression::Cast(Box::new(Cast {
34945 this: convert_tz,
34946 to: cast_type,
34947 trailing_comments: vec![],
34948 double_colon_syntax: false,
34949 format: None,
34950 default: None,
34951 inferred_type: None,
34952 })))
34953 } else {
34954 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
34955 let cast_ts = Expression::Cast(Box::new(Cast {
34956 this: arg,
34957 to: DataType::Timestamp {
34958 timezone: false,
34959 precision: None,
34960 },
34961 trailing_comments: vec![],
34962 double_colon_syntax: false,
34963 format: None,
34964 default: None,
34965 inferred_type: None,
34966 }));
34967 let at_utc =
34968 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34969 this: cast_ts,
34970 zone: Expression::Literal(Box::new(Literal::String(
34971 "UTC".to_string(),
34972 ))),
34973 }));
34974 let at_tz =
34975 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34976 this: at_utc,
34977 zone: tz,
34978 }));
34979 Ok(Expression::Cast(Box::new(Cast {
34980 this: at_tz,
34981 to: cast_type,
34982 trailing_comments: vec![],
34983 double_colon_syntax: false,
34984 format: None,
34985 default: None,
34986 inferred_type: None,
34987 })))
34988 }
34989 } else {
34990 Ok(Expression::Function(Box::new(Function::new(
34991 "STRING".to_string(),
34992 args,
34993 ))))
34994 }
34995 }
34996
34997 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
34998 "UNIX_SECONDS" if args.len() == 1 => {
34999 let ts = args.remove(0);
35000 match target {
35001 DialectType::DuckDB => {
35002 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
35003 let cast_ts = Self::ensure_cast_timestamptz(ts);
35004 let epoch = Expression::Function(Box::new(Function::new(
35005 "EPOCH".to_string(),
35006 vec![cast_ts],
35007 )));
35008 Ok(Expression::Cast(Box::new(Cast {
35009 this: epoch,
35010 to: DataType::BigInt { length: None },
35011 trailing_comments: vec![],
35012 double_colon_syntax: false,
35013 format: None,
35014 default: None,
35015 inferred_type: None,
35016 })))
35017 }
35018 DialectType::Snowflake => {
35019 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
35020 let epoch = Expression::Cast(Box::new(Cast {
35021 this: Expression::Literal(Box::new(Literal::String(
35022 "1970-01-01 00:00:00+00".to_string(),
35023 ))),
35024 to: DataType::Timestamp {
35025 timezone: true,
35026 precision: None,
35027 },
35028 trailing_comments: vec![],
35029 double_colon_syntax: false,
35030 format: None,
35031 default: None,
35032 inferred_type: None,
35033 }));
35034 Ok(Expression::TimestampDiff(Box::new(
35035 crate::expressions::TimestampDiff {
35036 this: Box::new(epoch),
35037 expression: Box::new(ts),
35038 unit: Some("SECONDS".to_string()),
35039 },
35040 )))
35041 }
35042 _ => Ok(Expression::Function(Box::new(Function::new(
35043 "UNIX_SECONDS".to_string(),
35044 vec![ts],
35045 )))),
35046 }
35047 }
35048
35049 "UNIX_MILLIS" if args.len() == 1 => {
35050 let ts = args.remove(0);
35051 match target {
35052 DialectType::DuckDB => {
35053 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
35054 let cast_ts = Self::ensure_cast_timestamptz(ts);
35055 Ok(Expression::Function(Box::new(Function::new(
35056 "EPOCH_MS".to_string(),
35057 vec![cast_ts],
35058 ))))
35059 }
35060 _ => Ok(Expression::Function(Box::new(Function::new(
35061 "UNIX_MILLIS".to_string(),
35062 vec![ts],
35063 )))),
35064 }
35065 }
35066
35067 "UNIX_MICROS" if args.len() == 1 => {
35068 let ts = args.remove(0);
35069 match target {
35070 DialectType::DuckDB => {
35071 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
35072 let cast_ts = Self::ensure_cast_timestamptz(ts);
35073 Ok(Expression::Function(Box::new(Function::new(
35074 "EPOCH_US".to_string(),
35075 vec![cast_ts],
35076 ))))
35077 }
35078 _ => Ok(Expression::Function(Box::new(Function::new(
35079 "UNIX_MICROS".to_string(),
35080 vec![ts],
35081 )))),
35082 }
35083 }
35084
35085 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
35086 "ARRAY_CONCAT" | "LIST_CONCAT" => {
35087 match target {
35088 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
35089 // CONCAT(arr1, arr2, ...)
35090 Ok(Expression::Function(Box::new(Function::new(
35091 "CONCAT".to_string(),
35092 args,
35093 ))))
35094 }
35095 DialectType::Presto | DialectType::Trino => {
35096 // CONCAT(arr1, arr2, ...)
35097 Ok(Expression::Function(Box::new(Function::new(
35098 "CONCAT".to_string(),
35099 args,
35100 ))))
35101 }
35102 DialectType::Snowflake => {
35103 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
35104 if args.len() == 1 {
35105 // ARRAY_CAT requires 2 args, add empty array as []
35106 let empty_arr = Expression::ArrayFunc(Box::new(
35107 crate::expressions::ArrayConstructor {
35108 expressions: vec![],
35109 bracket_notation: true,
35110 use_list_keyword: false,
35111 },
35112 ));
35113 let mut new_args = args;
35114 new_args.push(empty_arr);
35115 Ok(Expression::Function(Box::new(Function::new(
35116 "ARRAY_CAT".to_string(),
35117 new_args,
35118 ))))
35119 } else if args.is_empty() {
35120 Ok(Expression::Function(Box::new(Function::new(
35121 "ARRAY_CAT".to_string(),
35122 args,
35123 ))))
35124 } else {
35125 let mut it = args.into_iter().rev();
35126 let mut result = it.next().unwrap();
35127 for arr in it {
35128 result = Expression::Function(Box::new(Function::new(
35129 "ARRAY_CAT".to_string(),
35130 vec![arr, result],
35131 )));
35132 }
35133 Ok(result)
35134 }
35135 }
35136 DialectType::PostgreSQL => {
35137 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
35138 if args.len() <= 1 {
35139 Ok(Expression::Function(Box::new(Function::new(
35140 "ARRAY_CAT".to_string(),
35141 args,
35142 ))))
35143 } else {
35144 let mut it = args.into_iter().rev();
35145 let mut result = it.next().unwrap();
35146 for arr in it {
35147 result = Expression::Function(Box::new(Function::new(
35148 "ARRAY_CAT".to_string(),
35149 vec![arr, result],
35150 )));
35151 }
35152 Ok(result)
35153 }
35154 }
35155 DialectType::Redshift => {
35156 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
35157 if args.len() <= 2 {
35158 Ok(Expression::Function(Box::new(Function::new(
35159 "ARRAY_CONCAT".to_string(),
35160 args,
35161 ))))
35162 } else {
35163 let mut it = args.into_iter().rev();
35164 let mut result = it.next().unwrap();
35165 for arr in it {
35166 result = Expression::Function(Box::new(Function::new(
35167 "ARRAY_CONCAT".to_string(),
35168 vec![arr, result],
35169 )));
35170 }
35171 Ok(result)
35172 }
35173 }
35174 DialectType::DuckDB => {
35175 // LIST_CONCAT supports multiple args natively in DuckDB
35176 Ok(Expression::Function(Box::new(Function::new(
35177 "LIST_CONCAT".to_string(),
35178 args,
35179 ))))
35180 }
35181 _ => Ok(Expression::Function(Box::new(Function::new(
35182 "ARRAY_CONCAT".to_string(),
35183 args,
35184 )))),
35185 }
35186 }
35187
35188 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
35189 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
35190 let arg = args.remove(0);
35191 match target {
35192 DialectType::Snowflake => {
35193 let array_agg =
35194 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
35195 this: arg,
35196 distinct: false,
35197 filter: None,
35198 order_by: vec![],
35199 name: None,
35200 ignore_nulls: None,
35201 having_max: None,
35202 limit: None,
35203 inferred_type: None,
35204 }));
35205 Ok(Expression::Function(Box::new(Function::new(
35206 "ARRAY_FLATTEN".to_string(),
35207 vec![array_agg],
35208 ))))
35209 }
35210 _ => Ok(Expression::Function(Box::new(Function::new(
35211 "ARRAY_CONCAT_AGG".to_string(),
35212 vec![arg],
35213 )))),
35214 }
35215 }
35216
35217 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
35218 "MD5" if args.len() == 1 => {
35219 let arg = args.remove(0);
35220 match target {
35221 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
35222 // UNHEX(MD5(x))
35223 let md5 = Expression::Function(Box::new(Function::new(
35224 "MD5".to_string(),
35225 vec![arg],
35226 )));
35227 Ok(Expression::Function(Box::new(Function::new(
35228 "UNHEX".to_string(),
35229 vec![md5],
35230 ))))
35231 }
35232 DialectType::Snowflake => {
35233 // MD5_BINARY(x)
35234 Ok(Expression::Function(Box::new(Function::new(
35235 "MD5_BINARY".to_string(),
35236 vec![arg],
35237 ))))
35238 }
35239 _ => Ok(Expression::Function(Box::new(Function::new(
35240 "MD5".to_string(),
35241 vec![arg],
35242 )))),
35243 }
35244 }
35245
35246 "SHA1" if args.len() == 1 => {
35247 let arg = args.remove(0);
35248 match target {
35249 DialectType::DuckDB => {
35250 // UNHEX(SHA1(x))
35251 let sha1 = Expression::Function(Box::new(Function::new(
35252 "SHA1".to_string(),
35253 vec![arg],
35254 )));
35255 Ok(Expression::Function(Box::new(Function::new(
35256 "UNHEX".to_string(),
35257 vec![sha1],
35258 ))))
35259 }
35260 _ => Ok(Expression::Function(Box::new(Function::new(
35261 "SHA1".to_string(),
35262 vec![arg],
35263 )))),
35264 }
35265 }
35266
35267 "SHA256" if args.len() == 1 => {
35268 let arg = args.remove(0);
35269 match target {
35270 DialectType::DuckDB => {
35271 // UNHEX(SHA256(x))
35272 let sha = Expression::Function(Box::new(Function::new(
35273 "SHA256".to_string(),
35274 vec![arg],
35275 )));
35276 Ok(Expression::Function(Box::new(Function::new(
35277 "UNHEX".to_string(),
35278 vec![sha],
35279 ))))
35280 }
35281 DialectType::Snowflake => {
35282 // SHA2_BINARY(x, 256)
35283 Ok(Expression::Function(Box::new(Function::new(
35284 "SHA2_BINARY".to_string(),
35285 vec![arg, Expression::number(256)],
35286 ))))
35287 }
35288 DialectType::Redshift | DialectType::Spark => {
35289 // SHA2(x, 256)
35290 Ok(Expression::Function(Box::new(Function::new(
35291 "SHA2".to_string(),
35292 vec![arg, Expression::number(256)],
35293 ))))
35294 }
35295 _ => Ok(Expression::Function(Box::new(Function::new(
35296 "SHA256".to_string(),
35297 vec![arg],
35298 )))),
35299 }
35300 }
35301
35302 "SHA512" if args.len() == 1 => {
35303 let arg = args.remove(0);
35304 match target {
35305 DialectType::Snowflake => {
35306 // SHA2_BINARY(x, 512)
35307 Ok(Expression::Function(Box::new(Function::new(
35308 "SHA2_BINARY".to_string(),
35309 vec![arg, Expression::number(512)],
35310 ))))
35311 }
35312 DialectType::Redshift | DialectType::Spark => {
35313 // SHA2(x, 512)
35314 Ok(Expression::Function(Box::new(Function::new(
35315 "SHA2".to_string(),
35316 vec![arg, Expression::number(512)],
35317 ))))
35318 }
35319 _ => Ok(Expression::Function(Box::new(Function::new(
35320 "SHA512".to_string(),
35321 vec![arg],
35322 )))),
35323 }
35324 }
35325
35326 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
35327 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
35328 let str_expr = args.remove(0);
35329 let pattern = args.remove(0);
35330
35331 // Check if pattern contains capturing groups (parentheses)
35332 let has_groups = match &pattern {
35333 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
35334 let Literal::String(s) = lit.as_ref() else {
35335 unreachable!()
35336 };
35337 s.contains('(') && s.contains(')')
35338 }
35339 _ => false,
35340 };
35341
35342 match target {
35343 DialectType::DuckDB => {
35344 let group = if has_groups {
35345 Expression::number(1)
35346 } else {
35347 Expression::number(0)
35348 };
35349 Ok(Expression::Function(Box::new(Function::new(
35350 "REGEXP_EXTRACT_ALL".to_string(),
35351 vec![str_expr, pattern, group],
35352 ))))
35353 }
35354 DialectType::Spark | DialectType::Databricks => {
35355 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
35356 if has_groups {
35357 Ok(Expression::Function(Box::new(Function::new(
35358 "REGEXP_EXTRACT_ALL".to_string(),
35359 vec![str_expr, pattern],
35360 ))))
35361 } else {
35362 Ok(Expression::Function(Box::new(Function::new(
35363 "REGEXP_EXTRACT_ALL".to_string(),
35364 vec![str_expr, pattern, Expression::number(0)],
35365 ))))
35366 }
35367 }
35368 DialectType::Presto | DialectType::Trino => {
35369 if has_groups {
35370 Ok(Expression::Function(Box::new(Function::new(
35371 "REGEXP_EXTRACT_ALL".to_string(),
35372 vec![str_expr, pattern, Expression::number(1)],
35373 ))))
35374 } else {
35375 Ok(Expression::Function(Box::new(Function::new(
35376 "REGEXP_EXTRACT_ALL".to_string(),
35377 vec![str_expr, pattern],
35378 ))))
35379 }
35380 }
35381 DialectType::Snowflake => {
35382 if has_groups {
35383 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
35384 Ok(Expression::Function(Box::new(Function::new(
35385 "REGEXP_EXTRACT_ALL".to_string(),
35386 vec![
35387 str_expr,
35388 pattern,
35389 Expression::number(1),
35390 Expression::number(1),
35391 Expression::Literal(Box::new(Literal::String("c".to_string()))),
35392 Expression::number(1),
35393 ],
35394 ))))
35395 } else {
35396 Ok(Expression::Function(Box::new(Function::new(
35397 "REGEXP_EXTRACT_ALL".to_string(),
35398 vec![str_expr, pattern],
35399 ))))
35400 }
35401 }
35402 _ => Ok(Expression::Function(Box::new(Function::new(
35403 "REGEXP_EXTRACT_ALL".to_string(),
35404 vec![str_expr, pattern],
35405 )))),
35406 }
35407 }
35408
35409 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
35410 "MOD" if args.len() == 2 => {
35411 match target {
35412 DialectType::PostgreSQL
35413 | DialectType::DuckDB
35414 | DialectType::Presto
35415 | DialectType::Trino
35416 | DialectType::Athena
35417 | DialectType::Snowflake => {
35418 let x = args.remove(0);
35419 let y = args.remove(0);
35420 // Wrap complex expressions in parens to preserve precedence
35421 let needs_paren = |e: &Expression| {
35422 matches!(
35423 e,
35424 Expression::Add(_)
35425 | Expression::Sub(_)
35426 | Expression::Mul(_)
35427 | Expression::Div(_)
35428 )
35429 };
35430 let x = if needs_paren(&x) {
35431 Expression::Paren(Box::new(crate::expressions::Paren {
35432 this: x,
35433 trailing_comments: vec![],
35434 }))
35435 } else {
35436 x
35437 };
35438 let y = if needs_paren(&y) {
35439 Expression::Paren(Box::new(crate::expressions::Paren {
35440 this: y,
35441 trailing_comments: vec![],
35442 }))
35443 } else {
35444 y
35445 };
35446 Ok(Expression::Mod(Box::new(
35447 crate::expressions::BinaryOp::new(x, y),
35448 )))
35449 }
35450 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
35451 // Hive/Spark: a % b
35452 let x = args.remove(0);
35453 let y = args.remove(0);
35454 let needs_paren = |e: &Expression| {
35455 matches!(
35456 e,
35457 Expression::Add(_)
35458 | Expression::Sub(_)
35459 | Expression::Mul(_)
35460 | Expression::Div(_)
35461 )
35462 };
35463 let x = if needs_paren(&x) {
35464 Expression::Paren(Box::new(crate::expressions::Paren {
35465 this: x,
35466 trailing_comments: vec![],
35467 }))
35468 } else {
35469 x
35470 };
35471 let y = if needs_paren(&y) {
35472 Expression::Paren(Box::new(crate::expressions::Paren {
35473 this: y,
35474 trailing_comments: vec![],
35475 }))
35476 } else {
35477 y
35478 };
35479 Ok(Expression::Mod(Box::new(
35480 crate::expressions::BinaryOp::new(x, y),
35481 )))
35482 }
35483 _ => Ok(Expression::Function(Box::new(Function::new(
35484 "MOD".to_string(),
35485 args,
35486 )))),
35487 }
35488 }
35489
35490 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
35491 "ARRAY_FILTER" if args.len() == 2 => {
35492 let name = match target {
35493 DialectType::DuckDB => "LIST_FILTER",
35494 DialectType::StarRocks => "ARRAY_FILTER",
35495 _ => "FILTER",
35496 };
35497 Ok(Expression::Function(Box::new(Function::new(
35498 name.to_string(),
35499 args,
35500 ))))
35501 }
35502 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
35503 "FILTER" if args.len() == 2 => {
35504 let name = match target {
35505 DialectType::DuckDB => "LIST_FILTER",
35506 DialectType::StarRocks => "ARRAY_FILTER",
35507 _ => "FILTER",
35508 };
35509 Ok(Expression::Function(Box::new(Function::new(
35510 name.to_string(),
35511 args,
35512 ))))
35513 }
35514 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
35515 "REDUCE" if args.len() >= 3 => {
35516 let name = match target {
35517 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
35518 _ => "REDUCE",
35519 };
35520 Ok(Expression::Function(Box::new(Function::new(
35521 name.to_string(),
35522 args,
35523 ))))
35524 }
35525 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
35526 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
35527 Function::new("ARRAY_REVERSE".to_string(), args),
35528 ))),
35529
35530 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
35531 "CONCAT" if args.len() > 2 => match target {
35532 DialectType::DuckDB => {
35533 let mut it = args.into_iter();
35534 let mut result = it.next().unwrap();
35535 for arg in it {
35536 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
35537 this: Box::new(result),
35538 expression: Box::new(arg),
35539 safe: None,
35540 }));
35541 }
35542 Ok(result)
35543 }
35544 _ => Ok(Expression::Function(Box::new(Function::new(
35545 "CONCAT".to_string(),
35546 args,
35547 )))),
35548 },
35549
35550 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
35551 "GENERATE_DATE_ARRAY" => {
35552 if matches!(target, DialectType::BigQuery) {
35553 // BQ->BQ: add default interval if not present
35554 if args.len() == 2 {
35555 let start = args.remove(0);
35556 let end = args.remove(0);
35557 let default_interval =
35558 Expression::Interval(Box::new(crate::expressions::Interval {
35559 this: Some(Expression::Literal(Box::new(Literal::String(
35560 "1".to_string(),
35561 )))),
35562 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35563 unit: crate::expressions::IntervalUnit::Day,
35564 use_plural: false,
35565 }),
35566 }));
35567 Ok(Expression::Function(Box::new(Function::new(
35568 "GENERATE_DATE_ARRAY".to_string(),
35569 vec![start, end, default_interval],
35570 ))))
35571 } else {
35572 Ok(Expression::Function(Box::new(Function::new(
35573 "GENERATE_DATE_ARRAY".to_string(),
35574 args,
35575 ))))
35576 }
35577 } else if matches!(target, DialectType::DuckDB) {
35578 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
35579 let start = args.get(0).cloned();
35580 let end = args.get(1).cloned();
35581 let step = args.get(2).cloned().or_else(|| {
35582 Some(Expression::Interval(Box::new(
35583 crate::expressions::Interval {
35584 this: Some(Expression::Literal(Box::new(Literal::String(
35585 "1".to_string(),
35586 )))),
35587 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35588 unit: crate::expressions::IntervalUnit::Day,
35589 use_plural: false,
35590 }),
35591 },
35592 )))
35593 });
35594
35595 // Wrap start/end in CAST(... AS DATE) only for string literals
35596 let maybe_cast_date = |expr: Expression| -> Expression {
35597 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
35598 {
35599 Expression::Cast(Box::new(Cast {
35600 this: expr,
35601 to: DataType::Date,
35602 trailing_comments: vec![],
35603 double_colon_syntax: false,
35604 format: None,
35605 default: None,
35606 inferred_type: None,
35607 }))
35608 } else {
35609 expr
35610 }
35611 };
35612 let cast_start = start.map(maybe_cast_date);
35613 let cast_end = end.map(maybe_cast_date);
35614
35615 let gen_series =
35616 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
35617 start: cast_start.map(Box::new),
35618 end: cast_end.map(Box::new),
35619 step: step.map(Box::new),
35620 is_end_exclusive: None,
35621 }));
35622
35623 // Wrap in CAST(... AS DATE[])
35624 Ok(Expression::Cast(Box::new(Cast {
35625 this: gen_series,
35626 to: DataType::Array {
35627 element_type: Box::new(DataType::Date),
35628 dimension: None,
35629 },
35630 trailing_comments: vec![],
35631 double_colon_syntax: false,
35632 format: None,
35633 default: None,
35634 inferred_type: None,
35635 })))
35636 } else if matches!(target, DialectType::Snowflake) {
35637 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
35638 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
35639 if args.len() == 2 {
35640 let start = args.remove(0);
35641 let end = args.remove(0);
35642 let default_interval =
35643 Expression::Interval(Box::new(crate::expressions::Interval {
35644 this: Some(Expression::Literal(Box::new(Literal::String(
35645 "1".to_string(),
35646 )))),
35647 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35648 unit: crate::expressions::IntervalUnit::Day,
35649 use_plural: false,
35650 }),
35651 }));
35652 Ok(Expression::Function(Box::new(Function::new(
35653 "GENERATE_DATE_ARRAY".to_string(),
35654 vec![start, end, default_interval],
35655 ))))
35656 } else {
35657 Ok(Expression::Function(Box::new(Function::new(
35658 "GENERATE_DATE_ARRAY".to_string(),
35659 args,
35660 ))))
35661 }
35662 } else {
35663 // Convert to GenerateSeries for other targets
35664 let start = args.get(0).cloned();
35665 let end = args.get(1).cloned();
35666 let step = args.get(2).cloned().or_else(|| {
35667 Some(Expression::Interval(Box::new(
35668 crate::expressions::Interval {
35669 this: Some(Expression::Literal(Box::new(Literal::String(
35670 "1".to_string(),
35671 )))),
35672 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35673 unit: crate::expressions::IntervalUnit::Day,
35674 use_plural: false,
35675 }),
35676 },
35677 )))
35678 });
35679 Ok(Expression::GenerateSeries(Box::new(
35680 crate::expressions::GenerateSeries {
35681 start: start.map(Box::new),
35682 end: end.map(Box::new),
35683 step: step.map(Box::new),
35684 is_end_exclusive: None,
35685 },
35686 )))
35687 }
35688 }
35689
35690 // PARSE_DATE(format, str) -> target-specific
35691 "PARSE_DATE" if args.len() == 2 => {
35692 let format = args.remove(0);
35693 let str_expr = args.remove(0);
35694 match target {
35695 DialectType::DuckDB => {
35696 // CAST(STRPTIME(str, duck_format) AS DATE)
35697 let duck_format = Self::bq_format_to_duckdb(&format);
35698 let strptime = Expression::Function(Box::new(Function::new(
35699 "STRPTIME".to_string(),
35700 vec![str_expr, duck_format],
35701 )));
35702 Ok(Expression::Cast(Box::new(Cast {
35703 this: strptime,
35704 to: DataType::Date,
35705 trailing_comments: vec![],
35706 double_colon_syntax: false,
35707 format: None,
35708 default: None,
35709 inferred_type: None,
35710 })))
35711 }
35712 DialectType::Snowflake => {
35713 // _POLYGLOT_DATE(str, snowflake_format)
35714 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
35715 let sf_format = Self::bq_format_to_snowflake(&format);
35716 Ok(Expression::Function(Box::new(Function::new(
35717 "_POLYGLOT_DATE".to_string(),
35718 vec![str_expr, sf_format],
35719 ))))
35720 }
35721 _ => Ok(Expression::Function(Box::new(Function::new(
35722 "PARSE_DATE".to_string(),
35723 vec![format, str_expr],
35724 )))),
35725 }
35726 }
35727
35728 // PARSE_TIMESTAMP(format, str) -> target-specific
35729 "PARSE_TIMESTAMP" if args.len() >= 2 => {
35730 let format = args.remove(0);
35731 let str_expr = args.remove(0);
35732 let tz = if !args.is_empty() {
35733 Some(args.remove(0))
35734 } else {
35735 None
35736 };
35737 match target {
35738 DialectType::DuckDB => {
35739 let duck_format = Self::bq_format_to_duckdb(&format);
35740 let strptime = Expression::Function(Box::new(Function::new(
35741 "STRPTIME".to_string(),
35742 vec![str_expr, duck_format],
35743 )));
35744 Ok(strptime)
35745 }
35746 _ => {
35747 let mut result_args = vec![format, str_expr];
35748 if let Some(tz_arg) = tz {
35749 result_args.push(tz_arg);
35750 }
35751 Ok(Expression::Function(Box::new(Function::new(
35752 "PARSE_TIMESTAMP".to_string(),
35753 result_args,
35754 ))))
35755 }
35756 }
35757 }
35758
35759 // FORMAT_DATE(format, date) -> target-specific
35760 "FORMAT_DATE" if args.len() == 2 => {
35761 let format = args.remove(0);
35762 let date_expr = args.remove(0);
35763 match target {
35764 DialectType::DuckDB => {
35765 // STRFTIME(CAST(date AS DATE), format)
35766 let cast_date = Expression::Cast(Box::new(Cast {
35767 this: date_expr,
35768 to: DataType::Date,
35769 trailing_comments: vec![],
35770 double_colon_syntax: false,
35771 format: None,
35772 default: None,
35773 inferred_type: None,
35774 }));
35775 Ok(Expression::Function(Box::new(Function::new(
35776 "STRFTIME".to_string(),
35777 vec![cast_date, format],
35778 ))))
35779 }
35780 _ => Ok(Expression::Function(Box::new(Function::new(
35781 "FORMAT_DATE".to_string(),
35782 vec![format, date_expr],
35783 )))),
35784 }
35785 }
35786
35787 // FORMAT_DATETIME(format, datetime) -> target-specific
35788 "FORMAT_DATETIME" if args.len() == 2 => {
35789 let format = args.remove(0);
35790 let dt_expr = args.remove(0);
35791
35792 if matches!(target, DialectType::BigQuery) {
35793 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
35794 let norm_format = Self::bq_format_normalize_bq(&format);
35795 // Also strip DATETIME keyword from typed literals
35796 let norm_dt = match dt_expr {
35797 Expression::Literal(lit)
35798 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
35799 {
35800 let Literal::Timestamp(s) = lit.as_ref() else {
35801 unreachable!()
35802 };
35803 Expression::Cast(Box::new(Cast {
35804 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35805 to: DataType::Custom {
35806 name: "DATETIME".to_string(),
35807 },
35808 trailing_comments: vec![],
35809 double_colon_syntax: false,
35810 format: None,
35811 default: None,
35812 inferred_type: None,
35813 }))
35814 }
35815 other => other,
35816 };
35817 return Ok(Expression::Function(Box::new(Function::new(
35818 "FORMAT_DATETIME".to_string(),
35819 vec![norm_format, norm_dt],
35820 ))));
35821 }
35822
35823 match target {
35824 DialectType::DuckDB => {
35825 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
35826 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
35827 let duck_format = Self::bq_format_to_duckdb(&format);
35828 Ok(Expression::Function(Box::new(Function::new(
35829 "STRFTIME".to_string(),
35830 vec![cast_dt, duck_format],
35831 ))))
35832 }
35833 _ => Ok(Expression::Function(Box::new(Function::new(
35834 "FORMAT_DATETIME".to_string(),
35835 vec![format, dt_expr],
35836 )))),
35837 }
35838 }
35839
35840 // FORMAT_TIMESTAMP(format, ts) -> target-specific
35841 "FORMAT_TIMESTAMP" if args.len() == 2 => {
35842 let format = args.remove(0);
35843 let ts_expr = args.remove(0);
35844 match target {
35845 DialectType::DuckDB => {
35846 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
35847 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
35848 let cast_ts = Expression::Cast(Box::new(Cast {
35849 this: cast_tstz,
35850 to: DataType::Timestamp {
35851 timezone: false,
35852 precision: None,
35853 },
35854 trailing_comments: vec![],
35855 double_colon_syntax: false,
35856 format: None,
35857 default: None,
35858 inferred_type: None,
35859 }));
35860 Ok(Expression::Function(Box::new(Function::new(
35861 "STRFTIME".to_string(),
35862 vec![cast_ts, format],
35863 ))))
35864 }
35865 DialectType::Snowflake => {
35866 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
35867 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
35868 let cast_ts = Expression::Cast(Box::new(Cast {
35869 this: cast_tstz,
35870 to: DataType::Timestamp {
35871 timezone: false,
35872 precision: None,
35873 },
35874 trailing_comments: vec![],
35875 double_colon_syntax: false,
35876 format: None,
35877 default: None,
35878 inferred_type: None,
35879 }));
35880 let sf_format = Self::bq_format_to_snowflake(&format);
35881 Ok(Expression::Function(Box::new(Function::new(
35882 "TO_CHAR".to_string(),
35883 vec![cast_ts, sf_format],
35884 ))))
35885 }
35886 _ => Ok(Expression::Function(Box::new(Function::new(
35887 "FORMAT_TIMESTAMP".to_string(),
35888 vec![format, ts_expr],
35889 )))),
35890 }
35891 }
35892
35893 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
35894 "UNIX_DATE" if args.len() == 1 => {
35895 let date = args.remove(0);
35896 match target {
35897 DialectType::DuckDB => {
35898 let epoch = Expression::Cast(Box::new(Cast {
35899 this: Expression::Literal(Box::new(Literal::String(
35900 "1970-01-01".to_string(),
35901 ))),
35902 to: DataType::Date,
35903 trailing_comments: vec![],
35904 double_colon_syntax: false,
35905 format: None,
35906 default: None,
35907 inferred_type: None,
35908 }));
35909 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
35910 // Need to convert DATE literal to CAST
35911 let norm_date = Self::date_literal_to_cast(date);
35912 Ok(Expression::Function(Box::new(Function::new(
35913 "DATE_DIFF".to_string(),
35914 vec![
35915 Expression::Literal(Box::new(Literal::String("DAY".to_string()))),
35916 epoch,
35917 norm_date,
35918 ],
35919 ))))
35920 }
35921 _ => Ok(Expression::Function(Box::new(Function::new(
35922 "UNIX_DATE".to_string(),
35923 vec![date],
35924 )))),
35925 }
35926 }
35927
35928 // UNIX_SECONDS(ts) -> target-specific
35929 "UNIX_SECONDS" if args.len() == 1 => {
35930 let ts = args.remove(0);
35931 match target {
35932 DialectType::DuckDB => {
35933 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
35934 let norm_ts = Self::ts_literal_to_cast_tz(ts);
35935 let epoch = Expression::Function(Box::new(Function::new(
35936 "EPOCH".to_string(),
35937 vec![norm_ts],
35938 )));
35939 Ok(Expression::Cast(Box::new(Cast {
35940 this: epoch,
35941 to: DataType::BigInt { length: None },
35942 trailing_comments: vec![],
35943 double_colon_syntax: false,
35944 format: None,
35945 default: None,
35946 inferred_type: None,
35947 })))
35948 }
35949 DialectType::Snowflake => {
35950 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
35951 let epoch = Expression::Cast(Box::new(Cast {
35952 this: Expression::Literal(Box::new(Literal::String(
35953 "1970-01-01 00:00:00+00".to_string(),
35954 ))),
35955 to: DataType::Timestamp {
35956 timezone: true,
35957 precision: None,
35958 },
35959 trailing_comments: vec![],
35960 double_colon_syntax: false,
35961 format: None,
35962 default: None,
35963 inferred_type: None,
35964 }));
35965 Ok(Expression::Function(Box::new(Function::new(
35966 "TIMESTAMPDIFF".to_string(),
35967 vec![
35968 Expression::Identifier(Identifier::new("SECONDS".to_string())),
35969 epoch,
35970 ts,
35971 ],
35972 ))))
35973 }
35974 _ => Ok(Expression::Function(Box::new(Function::new(
35975 "UNIX_SECONDS".to_string(),
35976 vec![ts],
35977 )))),
35978 }
35979 }
35980
35981 // UNIX_MILLIS(ts) -> target-specific
35982 "UNIX_MILLIS" if args.len() == 1 => {
35983 let ts = args.remove(0);
35984 match target {
35985 DialectType::DuckDB => {
35986 let norm_ts = Self::ts_literal_to_cast_tz(ts);
35987 Ok(Expression::Function(Box::new(Function::new(
35988 "EPOCH_MS".to_string(),
35989 vec![norm_ts],
35990 ))))
35991 }
35992 _ => Ok(Expression::Function(Box::new(Function::new(
35993 "UNIX_MILLIS".to_string(),
35994 vec![ts],
35995 )))),
35996 }
35997 }
35998
35999 // UNIX_MICROS(ts) -> target-specific
36000 "UNIX_MICROS" if args.len() == 1 => {
36001 let ts = args.remove(0);
36002 match target {
36003 DialectType::DuckDB => {
36004 let norm_ts = Self::ts_literal_to_cast_tz(ts);
36005 Ok(Expression::Function(Box::new(Function::new(
36006 "EPOCH_US".to_string(),
36007 vec![norm_ts],
36008 ))))
36009 }
36010 _ => Ok(Expression::Function(Box::new(Function::new(
36011 "UNIX_MICROS".to_string(),
36012 vec![ts],
36013 )))),
36014 }
36015 }
36016
36017 // INSTR(str, substr) -> target-specific
36018 "INSTR" => {
36019 if matches!(target, DialectType::BigQuery) {
36020 // BQ->BQ: keep as INSTR
36021 Ok(Expression::Function(Box::new(Function::new(
36022 "INSTR".to_string(),
36023 args,
36024 ))))
36025 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
36026 // Snowflake: CHARINDEX(substr, str) - swap args
36027 let str_expr = args.remove(0);
36028 let substr = args.remove(0);
36029 Ok(Expression::Function(Box::new(Function::new(
36030 "CHARINDEX".to_string(),
36031 vec![substr, str_expr],
36032 ))))
36033 } else {
36034 // Keep as INSTR for other targets
36035 Ok(Expression::Function(Box::new(Function::new(
36036 "INSTR".to_string(),
36037 args,
36038 ))))
36039 }
36040 }
36041
36042 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
36043 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
36044 if matches!(target, DialectType::BigQuery) {
36045 // BQ->BQ: always output with parens (function form), keep any timezone arg
36046 Ok(Expression::Function(Box::new(Function::new(name, args))))
36047 } else if name == "CURRENT_DATE" && args.len() == 1 {
36048 // CURRENT_DATE('UTC') - has timezone arg
36049 let tz_arg = args.remove(0);
36050 match target {
36051 DialectType::DuckDB => {
36052 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
36053 let ct = Expression::CurrentTimestamp(
36054 crate::expressions::CurrentTimestamp {
36055 precision: None,
36056 sysdate: false,
36057 },
36058 );
36059 let at_tz =
36060 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
36061 this: ct,
36062 zone: tz_arg,
36063 }));
36064 Ok(Expression::Cast(Box::new(Cast {
36065 this: at_tz,
36066 to: DataType::Date,
36067 trailing_comments: vec![],
36068 double_colon_syntax: false,
36069 format: None,
36070 default: None,
36071 inferred_type: None,
36072 })))
36073 }
36074 DialectType::Snowflake => {
36075 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
36076 let ct = Expression::Function(Box::new(Function::new(
36077 "CURRENT_TIMESTAMP".to_string(),
36078 vec![],
36079 )));
36080 let convert = Expression::Function(Box::new(Function::new(
36081 "CONVERT_TIMEZONE".to_string(),
36082 vec![tz_arg, ct],
36083 )));
36084 Ok(Expression::Cast(Box::new(Cast {
36085 this: convert,
36086 to: DataType::Date,
36087 trailing_comments: vec![],
36088 double_colon_syntax: false,
36089 format: None,
36090 default: None,
36091 inferred_type: None,
36092 })))
36093 }
36094 _ => {
36095 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
36096 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
36097 Ok(Expression::AtTimeZone(Box::new(
36098 crate::expressions::AtTimeZone {
36099 this: cd,
36100 zone: tz_arg,
36101 },
36102 )))
36103 }
36104 }
36105 } else if (name == "CURRENT_TIMESTAMP"
36106 || name == "CURRENT_TIME"
36107 || name == "CURRENT_DATE")
36108 && args.is_empty()
36109 && matches!(
36110 target,
36111 DialectType::PostgreSQL
36112 | DialectType::DuckDB
36113 | DialectType::Presto
36114 | DialectType::Trino
36115 )
36116 {
36117 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
36118 if name == "CURRENT_TIMESTAMP" {
36119 Ok(Expression::CurrentTimestamp(
36120 crate::expressions::CurrentTimestamp {
36121 precision: None,
36122 sysdate: false,
36123 },
36124 ))
36125 } else if name == "CURRENT_DATE" {
36126 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
36127 } else {
36128 // CURRENT_TIME
36129 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
36130 precision: None,
36131 }))
36132 }
36133 } else {
36134 // All other targets: keep as function (with parens)
36135 Ok(Expression::Function(Box::new(Function::new(name, args))))
36136 }
36137 }
36138
36139 // JSON_QUERY(json, path) -> target-specific
36140 "JSON_QUERY" if args.len() == 2 => {
36141 match target {
36142 DialectType::DuckDB | DialectType::SQLite => {
36143 // json -> path syntax
36144 let json_expr = args.remove(0);
36145 let path = args.remove(0);
36146 Ok(Expression::JsonExtract(Box::new(
36147 crate::expressions::JsonExtractFunc {
36148 this: json_expr,
36149 path,
36150 returning: None,
36151 arrow_syntax: true,
36152 hash_arrow_syntax: false,
36153 wrapper_option: None,
36154 quotes_option: None,
36155 on_scalar_string: false,
36156 on_error: None,
36157 },
36158 )))
36159 }
36160 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
36161 Ok(Expression::Function(Box::new(Function::new(
36162 "GET_JSON_OBJECT".to_string(),
36163 args,
36164 ))))
36165 }
36166 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
36167 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
36168 )),
36169 _ => Ok(Expression::Function(Box::new(Function::new(
36170 "JSON_QUERY".to_string(),
36171 args,
36172 )))),
36173 }
36174 }
36175
36176 // JSON_VALUE_ARRAY(json, path) -> target-specific
36177 "JSON_VALUE_ARRAY" if args.len() == 2 => {
36178 match target {
36179 DialectType::DuckDB => {
36180 // CAST(json -> path AS TEXT[])
36181 let json_expr = args.remove(0);
36182 let path = args.remove(0);
36183 let arrow = Expression::JsonExtract(Box::new(
36184 crate::expressions::JsonExtractFunc {
36185 this: json_expr,
36186 path,
36187 returning: None,
36188 arrow_syntax: true,
36189 hash_arrow_syntax: false,
36190 wrapper_option: None,
36191 quotes_option: None,
36192 on_scalar_string: false,
36193 on_error: None,
36194 },
36195 ));
36196 Ok(Expression::Cast(Box::new(Cast {
36197 this: arrow,
36198 to: DataType::Array {
36199 element_type: Box::new(DataType::Text),
36200 dimension: None,
36201 },
36202 trailing_comments: vec![],
36203 double_colon_syntax: false,
36204 format: None,
36205 default: None,
36206 inferred_type: None,
36207 })))
36208 }
36209 DialectType::Snowflake => {
36210 let json_expr = args.remove(0);
36211 let path_expr = args.remove(0);
36212 // Convert JSON path from $.path to just path
36213 let sf_path = if let Expression::Literal(ref lit) = path_expr {
36214 if let Literal::String(ref s) = lit.as_ref() {
36215 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
36216 Expression::Literal(Box::new(Literal::String(trimmed.to_string())))
36217 } else {
36218 path_expr.clone()
36219 }
36220 } else {
36221 path_expr
36222 };
36223 let parse_json = Expression::Function(Box::new(Function::new(
36224 "PARSE_JSON".to_string(),
36225 vec![json_expr],
36226 )));
36227 let get_path = Expression::Function(Box::new(Function::new(
36228 "GET_PATH".to_string(),
36229 vec![parse_json, sf_path],
36230 )));
36231 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
36232 let cast_expr = Expression::Cast(Box::new(Cast {
36233 this: Expression::Identifier(Identifier::new("x")),
36234 to: DataType::VarChar {
36235 length: None,
36236 parenthesized_length: false,
36237 },
36238 trailing_comments: vec![],
36239 double_colon_syntax: false,
36240 format: None,
36241 default: None,
36242 inferred_type: None,
36243 }));
36244 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
36245 parameters: vec![Identifier::new("x")],
36246 body: cast_expr,
36247 colon: false,
36248 parameter_types: vec![],
36249 }));
36250 Ok(Expression::Function(Box::new(Function::new(
36251 "TRANSFORM".to_string(),
36252 vec![get_path, lambda],
36253 ))))
36254 }
36255 _ => Ok(Expression::Function(Box::new(Function::new(
36256 "JSON_VALUE_ARRAY".to_string(),
36257 args,
36258 )))),
36259 }
36260 }
36261
36262 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
36263 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
36264 // This is different from Hive/Spark where 3rd arg is "group_index"
36265 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
36266 match target {
36267 DialectType::DuckDB
36268 | DialectType::Presto
36269 | DialectType::Trino
36270 | DialectType::Athena => {
36271 if args.len() == 2 {
36272 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
36273 args.push(Expression::number(1));
36274 Ok(Expression::Function(Box::new(Function::new(
36275 "REGEXP_EXTRACT".to_string(),
36276 args,
36277 ))))
36278 } else if args.len() == 3 {
36279 let val = args.remove(0);
36280 let regex = args.remove(0);
36281 let position = args.remove(0);
36282 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
36283 if is_pos_1 {
36284 Ok(Expression::Function(Box::new(Function::new(
36285 "REGEXP_EXTRACT".to_string(),
36286 vec![val, regex, Expression::number(1)],
36287 ))))
36288 } else {
36289 let substring_expr = Expression::Function(Box::new(Function::new(
36290 "SUBSTRING".to_string(),
36291 vec![val, position],
36292 )));
36293 let nullif_expr = Expression::Function(Box::new(Function::new(
36294 "NULLIF".to_string(),
36295 vec![
36296 substring_expr,
36297 Expression::Literal(Box::new(Literal::String(
36298 String::new(),
36299 ))),
36300 ],
36301 )));
36302 Ok(Expression::Function(Box::new(Function::new(
36303 "REGEXP_EXTRACT".to_string(),
36304 vec![nullif_expr, regex, Expression::number(1)],
36305 ))))
36306 }
36307 } else if args.len() == 4 {
36308 let val = args.remove(0);
36309 let regex = args.remove(0);
36310 let position = args.remove(0);
36311 let occurrence = args.remove(0);
36312 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
36313 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
36314 if is_pos_1 && is_occ_1 {
36315 Ok(Expression::Function(Box::new(Function::new(
36316 "REGEXP_EXTRACT".to_string(),
36317 vec![val, regex, Expression::number(1)],
36318 ))))
36319 } else {
36320 let subject = if is_pos_1 {
36321 val
36322 } else {
36323 let substring_expr = Expression::Function(Box::new(
36324 Function::new("SUBSTRING".to_string(), vec![val, position]),
36325 ));
36326 Expression::Function(Box::new(Function::new(
36327 "NULLIF".to_string(),
36328 vec![
36329 substring_expr,
36330 Expression::Literal(Box::new(Literal::String(
36331 String::new(),
36332 ))),
36333 ],
36334 )))
36335 };
36336 let extract_all = Expression::Function(Box::new(Function::new(
36337 "REGEXP_EXTRACT_ALL".to_string(),
36338 vec![subject, regex, Expression::number(1)],
36339 )));
36340 Ok(Expression::Function(Box::new(Function::new(
36341 "ARRAY_EXTRACT".to_string(),
36342 vec![extract_all, occurrence],
36343 ))))
36344 }
36345 } else {
36346 Ok(Expression::Function(Box::new(Function {
36347 name: f.name,
36348 args,
36349 distinct: f.distinct,
36350 trailing_comments: f.trailing_comments,
36351 use_bracket_syntax: f.use_bracket_syntax,
36352 no_parens: f.no_parens,
36353 quoted: f.quoted,
36354 span: None,
36355 inferred_type: None,
36356 })))
36357 }
36358 }
36359 DialectType::Snowflake => {
36360 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
36361 Ok(Expression::Function(Box::new(Function::new(
36362 "REGEXP_SUBSTR".to_string(),
36363 args,
36364 ))))
36365 }
36366 _ => {
36367 // For other targets (Hive/Spark/BigQuery): pass through as-is
36368 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
36369 Ok(Expression::Function(Box::new(Function {
36370 name: f.name,
36371 args,
36372 distinct: f.distinct,
36373 trailing_comments: f.trailing_comments,
36374 use_bracket_syntax: f.use_bracket_syntax,
36375 no_parens: f.no_parens,
36376 quoted: f.quoted,
36377 span: None,
36378 inferred_type: None,
36379 })))
36380 }
36381 }
36382 }
36383
36384 // BigQuery STRUCT(args) -> target-specific struct expression
36385 "STRUCT" => {
36386 // Convert Function args to Struct fields
36387 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
36388 for (i, arg) in args.into_iter().enumerate() {
36389 match arg {
36390 Expression::Alias(a) => {
36391 // Named field: expr AS name
36392 fields.push((Some(a.alias.name.clone()), a.this));
36393 }
36394 other => {
36395 // Unnamed field: for Spark/Hive, keep as None
36396 // For Snowflake, auto-name as _N
36397 // For DuckDB, use column name for column refs, _N for others
36398 if matches!(target, DialectType::Snowflake) {
36399 fields.push((Some(format!("_{}", i)), other));
36400 } else if matches!(target, DialectType::DuckDB) {
36401 let auto_name = match &other {
36402 Expression::Column(col) => col.name.name.clone(),
36403 _ => format!("_{}", i),
36404 };
36405 fields.push((Some(auto_name), other));
36406 } else {
36407 fields.push((None, other));
36408 }
36409 }
36410 }
36411 }
36412
36413 match target {
36414 DialectType::Snowflake => {
36415 // OBJECT_CONSTRUCT('name', value, ...)
36416 let mut oc_args = Vec::new();
36417 for (name, val) in &fields {
36418 if let Some(n) = name {
36419 oc_args.push(Expression::Literal(Box::new(Literal::String(
36420 n.clone(),
36421 ))));
36422 oc_args.push(val.clone());
36423 } else {
36424 oc_args.push(val.clone());
36425 }
36426 }
36427 Ok(Expression::Function(Box::new(Function::new(
36428 "OBJECT_CONSTRUCT".to_string(),
36429 oc_args,
36430 ))))
36431 }
36432 DialectType::DuckDB => {
36433 // {'name': value, ...}
36434 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
36435 fields,
36436 })))
36437 }
36438 DialectType::Hive => {
36439 // STRUCT(val1, val2, ...) - strip aliases
36440 let hive_fields: Vec<(Option<String>, Expression)> =
36441 fields.into_iter().map(|(_, v)| (None, v)).collect();
36442 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
36443 fields: hive_fields,
36444 })))
36445 }
36446 DialectType::Spark | DialectType::Databricks => {
36447 // Use Expression::Struct to bypass Spark target transform auto-naming
36448 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
36449 fields,
36450 })))
36451 }
36452 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
36453 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
36454 let all_named =
36455 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
36456 let all_types_inferable = all_named
36457 && fields
36458 .iter()
36459 .all(|(_, val)| Self::can_infer_presto_type(val));
36460 let row_args: Vec<Expression> =
36461 fields.iter().map(|(_, v)| v.clone()).collect();
36462 let row_expr = Expression::Function(Box::new(Function::new(
36463 "ROW".to_string(),
36464 row_args,
36465 )));
36466 if all_named && all_types_inferable {
36467 // Build ROW type with inferred types
36468 let mut row_type_fields = Vec::new();
36469 for (name, val) in &fields {
36470 if let Some(n) = name {
36471 let type_str = Self::infer_sql_type_for_presto(val);
36472 row_type_fields.push(crate::expressions::StructField::new(
36473 n.clone(),
36474 crate::expressions::DataType::Custom { name: type_str },
36475 ));
36476 }
36477 }
36478 let row_type = crate::expressions::DataType::Struct {
36479 fields: row_type_fields,
36480 nested: true,
36481 };
36482 Ok(Expression::Cast(Box::new(Cast {
36483 this: row_expr,
36484 to: row_type,
36485 trailing_comments: Vec::new(),
36486 double_colon_syntax: false,
36487 format: None,
36488 default: None,
36489 inferred_type: None,
36490 })))
36491 } else {
36492 Ok(row_expr)
36493 }
36494 }
36495 _ => {
36496 // Default: keep as STRUCT function with original args
36497 let mut new_args = Vec::new();
36498 for (name, val) in fields {
36499 if let Some(n) = name {
36500 new_args.push(Expression::Alias(Box::new(
36501 crate::expressions::Alias::new(val, Identifier::new(n)),
36502 )));
36503 } else {
36504 new_args.push(val);
36505 }
36506 }
36507 Ok(Expression::Function(Box::new(Function::new(
36508 "STRUCT".to_string(),
36509 new_args,
36510 ))))
36511 }
36512 }
36513 }
36514
36515 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
36516 "ROUND" if args.len() == 3 => {
36517 let x = args.remove(0);
36518 let n = args.remove(0);
36519 let mode = args.remove(0);
36520 // Check if mode is 'ROUND_HALF_EVEN'
36521 let is_half_even = matches!(&mode, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN")));
36522 if is_half_even && matches!(target, DialectType::DuckDB) {
36523 Ok(Expression::Function(Box::new(Function::new(
36524 "ROUND_EVEN".to_string(),
36525 vec![x, n],
36526 ))))
36527 } else {
36528 // Pass through with all args
36529 Ok(Expression::Function(Box::new(Function::new(
36530 "ROUND".to_string(),
36531 vec![x, n, mode],
36532 ))))
36533 }
36534 }
36535
36536 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
36537 "MAKE_INTERVAL" => {
36538 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
36539 // The positional args are: year, month
36540 // Named args are: day =>, minute =>, etc.
36541 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
36542 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
36543 // For BigQuery->BigQuery: reorder named args (day before minute)
36544 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
36545 let mut parts: Vec<(String, String)> = Vec::new();
36546 let mut pos_idx = 0;
36547 let pos_units = ["year", "month"];
36548 for arg in &args {
36549 if let Expression::NamedArgument(na) = arg {
36550 // Named arg like minute => 5
36551 let unit = na.name.name.clone();
36552 if let Expression::Literal(lit) = &na.value {
36553 if let Literal::Number(n) = lit.as_ref() {
36554 parts.push((unit, n.clone()));
36555 }
36556 }
36557 } else if pos_idx < pos_units.len() {
36558 if let Expression::Literal(lit) = arg {
36559 if let Literal::Number(n) = lit.as_ref() {
36560 parts.push((pos_units[pos_idx].to_string(), n.clone()));
36561 }
36562 }
36563 pos_idx += 1;
36564 }
36565 }
36566 // Don't sort - preserve original argument order
36567 let separator = if matches!(target, DialectType::Snowflake) {
36568 ", "
36569 } else {
36570 " "
36571 };
36572 let interval_str = parts
36573 .iter()
36574 .map(|(u, v)| format!("{} {}", v, u))
36575 .collect::<Vec<_>>()
36576 .join(separator);
36577 Ok(Expression::Interval(Box::new(
36578 crate::expressions::Interval {
36579 this: Some(Expression::Literal(Box::new(Literal::String(
36580 interval_str,
36581 )))),
36582 unit: None,
36583 },
36584 )))
36585 } else if matches!(target, DialectType::BigQuery) {
36586 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
36587 let mut positional = Vec::new();
36588 let mut named: Vec<(
36589 String,
36590 Expression,
36591 crate::expressions::NamedArgSeparator,
36592 )> = Vec::new();
36593 let _pos_units = ["year", "month"];
36594 let mut _pos_idx = 0;
36595 for arg in args {
36596 if let Expression::NamedArgument(na) = arg {
36597 named.push((na.name.name.clone(), na.value, na.separator));
36598 } else {
36599 positional.push(arg);
36600 _pos_idx += 1;
36601 }
36602 }
36603 // Sort named args by: day, hour, minute, second
36604 let unit_order = |u: &str| -> usize {
36605 match u.to_ascii_lowercase().as_str() {
36606 "day" => 0,
36607 "hour" => 1,
36608 "minute" => 2,
36609 "second" => 3,
36610 _ => 4,
36611 }
36612 };
36613 named.sort_by_key(|(u, _, _)| unit_order(u));
36614 let mut result_args = positional;
36615 for (name, value, sep) in named {
36616 result_args.push(Expression::NamedArgument(Box::new(
36617 crate::expressions::NamedArgument {
36618 name: Identifier::new(&name),
36619 value,
36620 separator: sep,
36621 },
36622 )));
36623 }
36624 Ok(Expression::Function(Box::new(Function::new(
36625 "MAKE_INTERVAL".to_string(),
36626 result_args,
36627 ))))
36628 } else {
36629 Ok(Expression::Function(Box::new(Function::new(
36630 "MAKE_INTERVAL".to_string(),
36631 args,
36632 ))))
36633 }
36634 }
36635
36636 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
36637 "ARRAY_TO_STRING" if args.len() == 3 => {
36638 let arr = args.remove(0);
36639 let sep = args.remove(0);
36640 let null_text = args.remove(0);
36641 match target {
36642 DialectType::DuckDB => {
36643 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
36644 let _lambda_param =
36645 Expression::Identifier(crate::expressions::Identifier::new("x"));
36646 let coalesce =
36647 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
36648 original_name: None,
36649 expressions: vec![
36650 Expression::Identifier(crate::expressions::Identifier::new(
36651 "x",
36652 )),
36653 null_text,
36654 ],
36655 inferred_type: None,
36656 }));
36657 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
36658 parameters: vec![crate::expressions::Identifier::new("x")],
36659 body: coalesce,
36660 colon: false,
36661 parameter_types: vec![],
36662 }));
36663 let list_transform = Expression::Function(Box::new(Function::new(
36664 "LIST_TRANSFORM".to_string(),
36665 vec![arr, lambda],
36666 )));
36667 Ok(Expression::Function(Box::new(Function::new(
36668 "ARRAY_TO_STRING".to_string(),
36669 vec![list_transform, sep],
36670 ))))
36671 }
36672 _ => Ok(Expression::Function(Box::new(Function::new(
36673 "ARRAY_TO_STRING".to_string(),
36674 vec![arr, sep, null_text],
36675 )))),
36676 }
36677 }
36678
36679 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
36680 "LENGTH" if args.len() == 1 => {
36681 let arg = args.remove(0);
36682 match target {
36683 DialectType::DuckDB => {
36684 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
36685 let typeof_func = Expression::Function(Box::new(Function::new(
36686 "TYPEOF".to_string(),
36687 vec![arg.clone()],
36688 )));
36689 let blob_cast = Expression::Cast(Box::new(Cast {
36690 this: arg.clone(),
36691 to: DataType::VarBinary { length: None },
36692 trailing_comments: vec![],
36693 double_colon_syntax: false,
36694 format: None,
36695 default: None,
36696 inferred_type: None,
36697 }));
36698 let octet_length = Expression::Function(Box::new(Function::new(
36699 "OCTET_LENGTH".to_string(),
36700 vec![blob_cast],
36701 )));
36702 let text_cast = Expression::Cast(Box::new(Cast {
36703 this: arg,
36704 to: DataType::Text,
36705 trailing_comments: vec![],
36706 double_colon_syntax: false,
36707 format: None,
36708 default: None,
36709 inferred_type: None,
36710 }));
36711 let length_text = Expression::Function(Box::new(Function::new(
36712 "LENGTH".to_string(),
36713 vec![text_cast],
36714 )));
36715 Ok(Expression::Case(Box::new(crate::expressions::Case {
36716 operand: Some(typeof_func),
36717 whens: vec![(
36718 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
36719 octet_length,
36720 )],
36721 else_: Some(length_text),
36722 comments: Vec::new(),
36723 inferred_type: None,
36724 })))
36725 }
36726 _ => Ok(Expression::Function(Box::new(Function::new(
36727 "LENGTH".to_string(),
36728 vec![arg],
36729 )))),
36730 }
36731 }
36732
36733 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
36734 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
36735 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
36736 // The args should be [x, fraction] with the null handling stripped
36737 // For DuckDB: QUANTILE_CONT(x, fraction)
36738 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
36739 match target {
36740 DialectType::DuckDB => {
36741 // Strip down to just 2 args, rename to QUANTILE_CONT
36742 let x = args[0].clone();
36743 let frac = args[1].clone();
36744 Ok(Expression::Function(Box::new(Function::new(
36745 "QUANTILE_CONT".to_string(),
36746 vec![x, frac],
36747 ))))
36748 }
36749 _ => Ok(Expression::Function(Box::new(Function::new(
36750 "PERCENTILE_CONT".to_string(),
36751 args,
36752 )))),
36753 }
36754 }
36755
36756 // All others: pass through
36757 _ => Ok(Expression::Function(Box::new(Function {
36758 name: f.name,
36759 args,
36760 distinct: f.distinct,
36761 trailing_comments: f.trailing_comments,
36762 use_bracket_syntax: f.use_bracket_syntax,
36763 no_parens: f.no_parens,
36764 quoted: f.quoted,
36765 span: None,
36766 inferred_type: None,
36767 }))),
36768 }
36769 }
36770
36771 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
36772 /// Returns false for column references and other non-literal expressions where the type is unknown.
36773 fn can_infer_presto_type(expr: &Expression) -> bool {
36774 match expr {
36775 Expression::Literal(_) => true,
36776 Expression::Boolean(_) => true,
36777 Expression::Array(_) | Expression::ArrayFunc(_) => true,
36778 Expression::Struct(_) | Expression::StructFunc(_) => true,
36779 Expression::Function(f) => {
36780 f.name.eq_ignore_ascii_case("STRUCT")
36781 || f.name.eq_ignore_ascii_case("ROW")
36782 || f.name.eq_ignore_ascii_case("CURRENT_DATE")
36783 || f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
36784 || f.name.eq_ignore_ascii_case("NOW")
36785 }
36786 Expression::Cast(_) => true,
36787 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
36788 _ => false,
36789 }
36790 }
36791
36792 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
36793 fn infer_sql_type_for_presto(expr: &Expression) -> String {
36794 use crate::expressions::Literal;
36795 match expr {
36796 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36797 "VARCHAR".to_string()
36798 }
36799 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36800 let Literal::Number(n) = lit.as_ref() else {
36801 unreachable!()
36802 };
36803 if n.contains('.') {
36804 "DOUBLE".to_string()
36805 } else {
36806 "INTEGER".to_string()
36807 }
36808 }
36809 Expression::Boolean(_) => "BOOLEAN".to_string(),
36810 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
36811 "DATE".to_string()
36812 }
36813 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
36814 "TIMESTAMP".to_string()
36815 }
36816 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
36817 "TIMESTAMP".to_string()
36818 }
36819 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
36820 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
36821 Expression::Function(f) => {
36822 if f.name.eq_ignore_ascii_case("STRUCT") || f.name.eq_ignore_ascii_case("ROW") {
36823 "ROW".to_string()
36824 } else if f.name.eq_ignore_ascii_case("CURRENT_DATE") {
36825 "DATE".to_string()
36826 } else if f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
36827 || f.name.eq_ignore_ascii_case("NOW")
36828 {
36829 "TIMESTAMP".to_string()
36830 } else {
36831 "VARCHAR".to_string()
36832 }
36833 }
36834 Expression::Cast(c) => {
36835 // If already cast, use the target type
36836 Self::data_type_to_presto_string(&c.to)
36837 }
36838 _ => "VARCHAR".to_string(),
36839 }
36840 }
36841
36842 /// Convert a DataType to its Presto/Trino string representation for ROW type
36843 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
36844 use crate::expressions::DataType;
36845 match dt {
36846 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
36847 "VARCHAR".to_string()
36848 }
36849 DataType::Int { .. }
36850 | DataType::BigInt { .. }
36851 | DataType::SmallInt { .. }
36852 | DataType::TinyInt { .. } => "INTEGER".to_string(),
36853 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
36854 DataType::Boolean => "BOOLEAN".to_string(),
36855 DataType::Date => "DATE".to_string(),
36856 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
36857 DataType::Struct { fields, .. } => {
36858 let field_strs: Vec<String> = fields
36859 .iter()
36860 .map(|f| {
36861 format!(
36862 "{} {}",
36863 f.name,
36864 Self::data_type_to_presto_string(&f.data_type)
36865 )
36866 })
36867 .collect();
36868 format!("ROW({})", field_strs.join(", "))
36869 }
36870 DataType::Array { element_type, .. } => {
36871 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
36872 }
36873 DataType::Custom { name } => {
36874 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
36875 name.clone()
36876 }
36877 _ => "VARCHAR".to_string(),
36878 }
36879 }
36880
36881 /// Convert IntervalUnit to string
36882 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> &'static str {
36883 match unit {
36884 crate::expressions::IntervalUnit::Year => "YEAR",
36885 crate::expressions::IntervalUnit::Quarter => "QUARTER",
36886 crate::expressions::IntervalUnit::Month => "MONTH",
36887 crate::expressions::IntervalUnit::Week => "WEEK",
36888 crate::expressions::IntervalUnit::Day => "DAY",
36889 crate::expressions::IntervalUnit::Hour => "HOUR",
36890 crate::expressions::IntervalUnit::Minute => "MINUTE",
36891 crate::expressions::IntervalUnit::Second => "SECOND",
36892 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
36893 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
36894 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
36895 }
36896 }
36897
36898 /// Extract unit string from an expression (uppercased)
36899 fn get_unit_str_static(expr: &Expression) -> String {
36900 use crate::expressions::Literal;
36901 match expr {
36902 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
36903 Expression::Var(v) => v.this.to_ascii_uppercase(),
36904 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36905 let Literal::String(s) = lit.as_ref() else {
36906 unreachable!()
36907 };
36908 s.to_ascii_uppercase()
36909 }
36910 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
36911 Expression::Function(f) => {
36912 let base = f.name.to_ascii_uppercase();
36913 if !f.args.is_empty() {
36914 let inner = Self::get_unit_str_static(&f.args[0]);
36915 format!("{}({})", base, inner)
36916 } else {
36917 base
36918 }
36919 }
36920 _ => "DAY".to_string(),
36921 }
36922 }
36923
36924 /// Parse unit string to IntervalUnit
36925 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
36926 match s {
36927 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
36928 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
36929 "MONTH" | "MONTHS" | "MON" | "MONS" | "MM" | "M" => {
36930 crate::expressions::IntervalUnit::Month
36931 }
36932 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
36933 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
36934 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
36935 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
36936 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
36937 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
36938 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
36939 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
36940 _ => crate::expressions::IntervalUnit::Day,
36941 }
36942 }
36943
36944 /// Convert expression to simple string for interval building
36945 fn expr_to_string_static(expr: &Expression) -> String {
36946 use crate::expressions::Literal;
36947 match expr {
36948 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36949 let Literal::Number(s) = lit.as_ref() else {
36950 unreachable!()
36951 };
36952 s.clone()
36953 }
36954 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36955 let Literal::String(s) = lit.as_ref() else {
36956 unreachable!()
36957 };
36958 s.clone()
36959 }
36960 Expression::Identifier(id) => id.name.clone(),
36961 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
36962 _ => "1".to_string(),
36963 }
36964 }
36965
36966 /// Extract a simple string representation from a literal expression
36967 fn expr_to_string(expr: &Expression) -> String {
36968 use crate::expressions::Literal;
36969 match expr {
36970 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36971 let Literal::Number(s) = lit.as_ref() else {
36972 unreachable!()
36973 };
36974 s.clone()
36975 }
36976 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36977 let Literal::String(s) = lit.as_ref() else {
36978 unreachable!()
36979 };
36980 s.clone()
36981 }
36982 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
36983 Expression::Identifier(id) => id.name.clone(),
36984 _ => "1".to_string(),
36985 }
36986 }
36987
36988 /// Quote an interval value expression as a string literal if it's a number (or negated number)
36989 fn quote_interval_val(expr: &Expression) -> Expression {
36990 use crate::expressions::Literal;
36991 match expr {
36992 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36993 let Literal::Number(n) = lit.as_ref() else {
36994 unreachable!()
36995 };
36996 Expression::Literal(Box::new(Literal::String(n.clone())))
36997 }
36998 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => expr.clone(),
36999 Expression::Neg(inner) => {
37000 if let Expression::Literal(lit) = &inner.this {
37001 if let Literal::Number(n) = lit.as_ref() {
37002 Expression::Literal(Box::new(Literal::String(format!("-{}", n))))
37003 } else {
37004 inner.this.clone()
37005 }
37006 } else {
37007 expr.clone()
37008 }
37009 }
37010 _ => expr.clone(),
37011 }
37012 }
37013
37014 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
37015 fn timestamp_string_has_timezone(ts: &str) -> bool {
37016 let trimmed = ts.trim();
37017 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
37018 if let Some(last_space) = trimmed.rfind(' ') {
37019 let suffix = &trimmed[last_space + 1..];
37020 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
37021 let rest = &suffix[1..];
37022 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
37023 return true;
37024 }
37025 }
37026 }
37027 // Check for named timezone abbreviations
37028 let ts_lower = trimmed.to_ascii_lowercase();
37029 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
37030 for abbrev in &tz_abbrevs {
37031 if ts_lower.ends_with(abbrev) {
37032 return true;
37033 }
37034 }
37035 false
37036 }
37037
37038 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
37039 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
37040 use crate::expressions::{Cast, DataType, Literal};
37041 match expr {
37042 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37043 let Literal::Timestamp(s) = lit.as_ref() else {
37044 unreachable!()
37045 };
37046 let tz = func_name.starts_with("TIMESTAMP");
37047 Expression::Cast(Box::new(Cast {
37048 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37049 to: if tz {
37050 DataType::Timestamp {
37051 timezone: true,
37052 precision: None,
37053 }
37054 } else {
37055 DataType::Timestamp {
37056 timezone: false,
37057 precision: None,
37058 }
37059 },
37060 trailing_comments: vec![],
37061 double_colon_syntax: false,
37062 format: None,
37063 default: None,
37064 inferred_type: None,
37065 }))
37066 }
37067 other => other,
37068 }
37069 }
37070
37071 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
37072 fn maybe_cast_ts(expr: Expression) -> Expression {
37073 use crate::expressions::{Cast, DataType, Literal};
37074 match expr {
37075 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37076 let Literal::Timestamp(s) = lit.as_ref() else {
37077 unreachable!()
37078 };
37079 Expression::Cast(Box::new(Cast {
37080 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37081 to: DataType::Timestamp {
37082 timezone: false,
37083 precision: None,
37084 },
37085 trailing_comments: vec![],
37086 double_colon_syntax: false,
37087 format: None,
37088 default: None,
37089 inferred_type: None,
37090 }))
37091 }
37092 other => other,
37093 }
37094 }
37095
37096 /// Convert DATE 'x' literal to CAST('x' AS DATE)
37097 fn date_literal_to_cast(expr: Expression) -> Expression {
37098 use crate::expressions::{Cast, DataType, Literal};
37099 match expr {
37100 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
37101 let Literal::Date(s) = lit.as_ref() else {
37102 unreachable!()
37103 };
37104 Expression::Cast(Box::new(Cast {
37105 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37106 to: DataType::Date,
37107 trailing_comments: vec![],
37108 double_colon_syntax: false,
37109 format: None,
37110 default: None,
37111 inferred_type: None,
37112 }))
37113 }
37114 other => other,
37115 }
37116 }
37117
37118 /// Ensure an expression that should be a date is CAST(... AS DATE).
37119 /// Handles both DATE literals and string literals that look like dates.
37120 fn ensure_cast_date(expr: Expression) -> Expression {
37121 use crate::expressions::{Cast, DataType, Literal};
37122 match expr {
37123 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
37124 let Literal::Date(s) = lit.as_ref() else {
37125 unreachable!()
37126 };
37127 Expression::Cast(Box::new(Cast {
37128 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37129 to: DataType::Date,
37130 trailing_comments: vec![],
37131 double_colon_syntax: false,
37132 format: None,
37133 default: None,
37134 inferred_type: None,
37135 }))
37136 }
37137 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37138 // String literal that should be a date -> CAST('s' AS DATE)
37139 Expression::Cast(Box::new(Cast {
37140 this: expr,
37141 to: DataType::Date,
37142 trailing_comments: vec![],
37143 double_colon_syntax: false,
37144 format: None,
37145 default: None,
37146 inferred_type: None,
37147 }))
37148 }
37149 // Already a CAST or other expression -> leave as-is
37150 other => other,
37151 }
37152 }
37153
37154 /// Force CAST(expr AS DATE) for any expression (not just literals)
37155 /// Skips if the expression is already a CAST to DATE
37156 fn force_cast_date(expr: Expression) -> Expression {
37157 use crate::expressions::{Cast, DataType};
37158 // If it's already a CAST to DATE, don't double-wrap
37159 if let Expression::Cast(ref c) = expr {
37160 if matches!(c.to, DataType::Date) {
37161 return expr;
37162 }
37163 }
37164 Expression::Cast(Box::new(Cast {
37165 this: expr,
37166 to: DataType::Date,
37167 trailing_comments: vec![],
37168 double_colon_syntax: false,
37169 format: None,
37170 default: None,
37171 inferred_type: None,
37172 }))
37173 }
37174
37175 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
37176 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
37177 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
37178 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
37179
37180 fn ensure_to_date_preserved(expr: Expression) -> Expression {
37181 use crate::expressions::{Function, Literal};
37182 if matches!(expr, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)))
37183 {
37184 Expression::Function(Box::new(Function::new(
37185 Self::PRESERVED_TO_DATE.to_string(),
37186 vec![expr],
37187 )))
37188 } else {
37189 expr
37190 }
37191 }
37192
37193 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
37194 fn try_cast_date(expr: Expression) -> Expression {
37195 use crate::expressions::{Cast, DataType};
37196 Expression::TryCast(Box::new(Cast {
37197 this: expr,
37198 to: DataType::Date,
37199 trailing_comments: vec![],
37200 double_colon_syntax: false,
37201 format: None,
37202 default: None,
37203 inferred_type: None,
37204 }))
37205 }
37206
37207 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
37208 fn double_cast_timestamp_date(expr: Expression) -> Expression {
37209 use crate::expressions::{Cast, DataType};
37210 let inner = Expression::Cast(Box::new(Cast {
37211 this: expr,
37212 to: DataType::Timestamp {
37213 timezone: false,
37214 precision: None,
37215 },
37216 trailing_comments: vec![],
37217 double_colon_syntax: false,
37218 format: None,
37219 default: None,
37220 inferred_type: None,
37221 }));
37222 Expression::Cast(Box::new(Cast {
37223 this: inner,
37224 to: DataType::Date,
37225 trailing_comments: vec![],
37226 double_colon_syntax: false,
37227 format: None,
37228 default: None,
37229 inferred_type: None,
37230 }))
37231 }
37232
37233 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
37234 fn double_cast_datetime_date(expr: Expression) -> Expression {
37235 use crate::expressions::{Cast, DataType};
37236 let inner = Expression::Cast(Box::new(Cast {
37237 this: expr,
37238 to: DataType::Custom {
37239 name: "DATETIME".to_string(),
37240 },
37241 trailing_comments: vec![],
37242 double_colon_syntax: false,
37243 format: None,
37244 default: None,
37245 inferred_type: None,
37246 }));
37247 Expression::Cast(Box::new(Cast {
37248 this: inner,
37249 to: DataType::Date,
37250 trailing_comments: vec![],
37251 double_colon_syntax: false,
37252 format: None,
37253 default: None,
37254 inferred_type: None,
37255 }))
37256 }
37257
37258 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
37259 fn double_cast_datetime2_date(expr: Expression) -> Expression {
37260 use crate::expressions::{Cast, DataType};
37261 let inner = Expression::Cast(Box::new(Cast {
37262 this: expr,
37263 to: DataType::Custom {
37264 name: "DATETIME2".to_string(),
37265 },
37266 trailing_comments: vec![],
37267 double_colon_syntax: false,
37268 format: None,
37269 default: None,
37270 inferred_type: None,
37271 }));
37272 Expression::Cast(Box::new(Cast {
37273 this: inner,
37274 to: DataType::Date,
37275 trailing_comments: vec![],
37276 double_colon_syntax: false,
37277 format: None,
37278 default: None,
37279 inferred_type: None,
37280 }))
37281 }
37282
37283 /// Convert Hive/Java-style date format strings to C-style (strftime) format
37284 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
37285 fn hive_format_to_c_format(fmt: &str) -> String {
37286 let mut result = String::new();
37287 let chars: Vec<char> = fmt.chars().collect();
37288 let mut i = 0;
37289 while i < chars.len() {
37290 match chars[i] {
37291 'y' => {
37292 let mut count = 0;
37293 while i < chars.len() && chars[i] == 'y' {
37294 count += 1;
37295 i += 1;
37296 }
37297 if count >= 4 {
37298 result.push_str("%Y");
37299 } else if count == 2 {
37300 result.push_str("%y");
37301 } else {
37302 result.push_str("%Y");
37303 }
37304 }
37305 'M' => {
37306 let mut count = 0;
37307 while i < chars.len() && chars[i] == 'M' {
37308 count += 1;
37309 i += 1;
37310 }
37311 if count >= 3 {
37312 result.push_str("%b");
37313 } else if count == 2 {
37314 result.push_str("%m");
37315 } else {
37316 result.push_str("%m");
37317 }
37318 }
37319 'd' => {
37320 let mut _count = 0;
37321 while i < chars.len() && chars[i] == 'd' {
37322 _count += 1;
37323 i += 1;
37324 }
37325 result.push_str("%d");
37326 }
37327 'H' => {
37328 let mut _count = 0;
37329 while i < chars.len() && chars[i] == 'H' {
37330 _count += 1;
37331 i += 1;
37332 }
37333 result.push_str("%H");
37334 }
37335 'h' => {
37336 let mut _count = 0;
37337 while i < chars.len() && chars[i] == 'h' {
37338 _count += 1;
37339 i += 1;
37340 }
37341 result.push_str("%I");
37342 }
37343 'm' => {
37344 let mut _count = 0;
37345 while i < chars.len() && chars[i] == 'm' {
37346 _count += 1;
37347 i += 1;
37348 }
37349 result.push_str("%M");
37350 }
37351 's' => {
37352 let mut _count = 0;
37353 while i < chars.len() && chars[i] == 's' {
37354 _count += 1;
37355 i += 1;
37356 }
37357 result.push_str("%S");
37358 }
37359 'S' => {
37360 // Fractional seconds - skip
37361 while i < chars.len() && chars[i] == 'S' {
37362 i += 1;
37363 }
37364 result.push_str("%f");
37365 }
37366 'a' => {
37367 // AM/PM
37368 while i < chars.len() && chars[i] == 'a' {
37369 i += 1;
37370 }
37371 result.push_str("%p");
37372 }
37373 'E' => {
37374 let mut count = 0;
37375 while i < chars.len() && chars[i] == 'E' {
37376 count += 1;
37377 i += 1;
37378 }
37379 if count >= 4 {
37380 result.push_str("%A");
37381 } else {
37382 result.push_str("%a");
37383 }
37384 }
37385 '\'' => {
37386 // Quoted literal text - pass through the quotes and content
37387 result.push('\'');
37388 i += 1;
37389 while i < chars.len() && chars[i] != '\'' {
37390 result.push(chars[i]);
37391 i += 1;
37392 }
37393 if i < chars.len() {
37394 result.push('\'');
37395 i += 1;
37396 }
37397 }
37398 c => {
37399 result.push(c);
37400 i += 1;
37401 }
37402 }
37403 }
37404 result
37405 }
37406
37407 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
37408 fn hive_format_to_presto_format(fmt: &str) -> String {
37409 let c_fmt = Self::hive_format_to_c_format(fmt);
37410 // Presto uses %T for HH:MM:SS
37411 c_fmt.replace("%H:%M:%S", "%T")
37412 }
37413
37414 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
37415 fn ensure_cast_timestamp(expr: Expression) -> Expression {
37416 use crate::expressions::{Cast, DataType, Literal};
37417 match expr {
37418 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37419 let Literal::Timestamp(s) = lit.as_ref() else {
37420 unreachable!()
37421 };
37422 Expression::Cast(Box::new(Cast {
37423 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37424 to: DataType::Timestamp {
37425 timezone: false,
37426 precision: None,
37427 },
37428 trailing_comments: vec![],
37429 double_colon_syntax: false,
37430 format: None,
37431 default: None,
37432 inferred_type: None,
37433 }))
37434 }
37435 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37436 Expression::Cast(Box::new(Cast {
37437 this: expr,
37438 to: DataType::Timestamp {
37439 timezone: false,
37440 precision: None,
37441 },
37442 trailing_comments: vec![],
37443 double_colon_syntax: false,
37444 format: None,
37445 default: None,
37446 inferred_type: None,
37447 }))
37448 }
37449 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
37450 let Literal::Datetime(s) = lit.as_ref() else {
37451 unreachable!()
37452 };
37453 Expression::Cast(Box::new(Cast {
37454 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37455 to: DataType::Timestamp {
37456 timezone: false,
37457 precision: None,
37458 },
37459 trailing_comments: vec![],
37460 double_colon_syntax: false,
37461 format: None,
37462 default: None,
37463 inferred_type: None,
37464 }))
37465 }
37466 other => other,
37467 }
37468 }
37469
37470 /// Force CAST to TIMESTAMP for any expression (not just literals)
37471 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
37472 fn force_cast_timestamp(expr: Expression) -> Expression {
37473 use crate::expressions::{Cast, DataType};
37474 // Don't double-wrap if already a CAST to TIMESTAMP
37475 if let Expression::Cast(ref c) = expr {
37476 if matches!(c.to, DataType::Timestamp { .. }) {
37477 return expr;
37478 }
37479 }
37480 Expression::Cast(Box::new(Cast {
37481 this: expr,
37482 to: DataType::Timestamp {
37483 timezone: false,
37484 precision: None,
37485 },
37486 trailing_comments: vec![],
37487 double_colon_syntax: false,
37488 format: None,
37489 default: None,
37490 inferred_type: None,
37491 }))
37492 }
37493
37494 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
37495 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
37496 use crate::expressions::{Cast, DataType, Literal};
37497 match expr {
37498 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37499 let Literal::Timestamp(s) = lit.as_ref() else {
37500 unreachable!()
37501 };
37502 Expression::Cast(Box::new(Cast {
37503 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37504 to: DataType::Timestamp {
37505 timezone: true,
37506 precision: None,
37507 },
37508 trailing_comments: vec![],
37509 double_colon_syntax: false,
37510 format: None,
37511 default: None,
37512 inferred_type: None,
37513 }))
37514 }
37515 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37516 Expression::Cast(Box::new(Cast {
37517 this: expr,
37518 to: DataType::Timestamp {
37519 timezone: true,
37520 precision: None,
37521 },
37522 trailing_comments: vec![],
37523 double_colon_syntax: false,
37524 format: None,
37525 default: None,
37526 inferred_type: None,
37527 }))
37528 }
37529 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
37530 let Literal::Datetime(s) = lit.as_ref() else {
37531 unreachable!()
37532 };
37533 Expression::Cast(Box::new(Cast {
37534 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37535 to: DataType::Timestamp {
37536 timezone: true,
37537 precision: None,
37538 },
37539 trailing_comments: vec![],
37540 double_colon_syntax: false,
37541 format: None,
37542 default: None,
37543 inferred_type: None,
37544 }))
37545 }
37546 other => other,
37547 }
37548 }
37549
37550 /// Ensure expression is CAST to DATETIME (for BigQuery)
37551 fn ensure_cast_datetime(expr: Expression) -> Expression {
37552 use crate::expressions::{Cast, DataType, Literal};
37553 match expr {
37554 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37555 Expression::Cast(Box::new(Cast {
37556 this: expr,
37557 to: DataType::Custom {
37558 name: "DATETIME".to_string(),
37559 },
37560 trailing_comments: vec![],
37561 double_colon_syntax: false,
37562 format: None,
37563 default: None,
37564 inferred_type: None,
37565 }))
37566 }
37567 other => other,
37568 }
37569 }
37570
37571 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
37572 fn force_cast_datetime(expr: Expression) -> Expression {
37573 use crate::expressions::{Cast, DataType};
37574 if let Expression::Cast(ref c) = expr {
37575 if let DataType::Custom { ref name } = c.to {
37576 if name.eq_ignore_ascii_case("DATETIME") {
37577 return expr;
37578 }
37579 }
37580 }
37581 Expression::Cast(Box::new(Cast {
37582 this: expr,
37583 to: DataType::Custom {
37584 name: "DATETIME".to_string(),
37585 },
37586 trailing_comments: vec![],
37587 double_colon_syntax: false,
37588 format: None,
37589 default: None,
37590 inferred_type: None,
37591 }))
37592 }
37593
37594 /// Ensure expression is CAST to DATETIME2 (for TSQL)
37595 fn ensure_cast_datetime2(expr: Expression) -> Expression {
37596 use crate::expressions::{Cast, DataType, Literal};
37597 match expr {
37598 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37599 Expression::Cast(Box::new(Cast {
37600 this: expr,
37601 to: DataType::Custom {
37602 name: "DATETIME2".to_string(),
37603 },
37604 trailing_comments: vec![],
37605 double_colon_syntax: false,
37606 format: None,
37607 default: None,
37608 inferred_type: None,
37609 }))
37610 }
37611 other => other,
37612 }
37613 }
37614
37615 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
37616 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
37617 use crate::expressions::{Cast, DataType, Literal};
37618 match expr {
37619 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37620 let Literal::Timestamp(s) = lit.as_ref() else {
37621 unreachable!()
37622 };
37623 Expression::Cast(Box::new(Cast {
37624 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37625 to: DataType::Timestamp {
37626 timezone: true,
37627 precision: None,
37628 },
37629 trailing_comments: vec![],
37630 double_colon_syntax: false,
37631 format: None,
37632 default: None,
37633 inferred_type: None,
37634 }))
37635 }
37636 other => other,
37637 }
37638 }
37639
37640 /// Convert BigQuery format string to Snowflake format string
37641 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
37642 use crate::expressions::Literal;
37643 if let Expression::Literal(lit) = format_expr {
37644 if let Literal::String(s) = lit.as_ref() {
37645 let sf = s
37646 .replace("%Y", "yyyy")
37647 .replace("%m", "mm")
37648 .replace("%d", "DD")
37649 .replace("%H", "HH24")
37650 .replace("%M", "MI")
37651 .replace("%S", "SS")
37652 .replace("%b", "mon")
37653 .replace("%B", "Month")
37654 .replace("%e", "FMDD");
37655 Expression::Literal(Box::new(Literal::String(sf)))
37656 } else {
37657 format_expr.clone()
37658 }
37659 } else {
37660 format_expr.clone()
37661 }
37662 }
37663
37664 /// Convert BigQuery format string to DuckDB format string
37665 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
37666 use crate::expressions::Literal;
37667 if let Expression::Literal(lit) = format_expr {
37668 if let Literal::String(s) = lit.as_ref() {
37669 let duck = s
37670 .replace("%T", "%H:%M:%S")
37671 .replace("%F", "%Y-%m-%d")
37672 .replace("%D", "%m/%d/%y")
37673 .replace("%x", "%m/%d/%y")
37674 .replace("%c", "%a %b %-d %H:%M:%S %Y")
37675 .replace("%e", "%-d")
37676 .replace("%E6S", "%S.%f");
37677 Expression::Literal(Box::new(Literal::String(duck)))
37678 } else {
37679 format_expr.clone()
37680 }
37681 } else {
37682 format_expr.clone()
37683 }
37684 }
37685
37686 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
37687 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
37688 use crate::expressions::Literal;
37689 if let Expression::Literal(lit) = format_expr {
37690 if let Literal::String(s) = lit.as_ref() {
37691 // Replace format elements from longest to shortest to avoid partial matches
37692 let result = s
37693 .replace("YYYYMMDD", "%Y%m%d")
37694 .replace("YYYY", "%Y")
37695 .replace("YY", "%y")
37696 .replace("MONTH", "%B")
37697 .replace("MON", "%b")
37698 .replace("MM", "%m")
37699 .replace("DD", "%d")
37700 .replace("HH24", "%H")
37701 .replace("HH12", "%I")
37702 .replace("HH", "%I")
37703 .replace("MI", "%M")
37704 .replace("SSTZH", "%S%z")
37705 .replace("SS", "%S")
37706 .replace("TZH", "%z");
37707 Expression::Literal(Box::new(Literal::String(result)))
37708 } else {
37709 format_expr.clone()
37710 }
37711 } else {
37712 format_expr.clone()
37713 }
37714 }
37715
37716 /// Normalize BigQuery format strings for BQ->BQ output
37717 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
37718 use crate::expressions::Literal;
37719 if let Expression::Literal(lit) = format_expr {
37720 if let Literal::String(s) = lit.as_ref() {
37721 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
37722 Expression::Literal(Box::new(Literal::String(norm)))
37723 } else {
37724 format_expr.clone()
37725 }
37726 } else {
37727 format_expr.clone()
37728 }
37729 }
37730}
37731
37732#[cfg(test)]
37733mod tests {
37734 use super::*;
37735
37736 #[test]
37737 fn test_dialect_type_from_str() {
37738 assert_eq!(
37739 "postgres".parse::<DialectType>().unwrap(),
37740 DialectType::PostgreSQL
37741 );
37742 assert_eq!(
37743 "postgresql".parse::<DialectType>().unwrap(),
37744 DialectType::PostgreSQL
37745 );
37746 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
37747 assert_eq!(
37748 "bigquery".parse::<DialectType>().unwrap(),
37749 DialectType::BigQuery
37750 );
37751 }
37752
37753 #[test]
37754 fn test_basic_transpile() {
37755 let dialect = Dialect::get(DialectType::Generic);
37756 let result = dialect
37757 .transpile("SELECT 1", DialectType::PostgreSQL)
37758 .unwrap();
37759 assert_eq!(result.len(), 1);
37760 assert_eq!(result[0], "SELECT 1");
37761 }
37762
37763 #[test]
37764 fn test_function_transformation_mysql() {
37765 // NVL should be transformed to IFNULL in MySQL
37766 let dialect = Dialect::get(DialectType::Generic);
37767 let result = dialect
37768 .transpile("SELECT NVL(a, b)", DialectType::MySQL)
37769 .unwrap();
37770 assert_eq!(result[0], "SELECT IFNULL(a, b)");
37771 }
37772
37773 #[test]
37774 fn test_get_path_duckdb() {
37775 // Test: step by step
37776 let snowflake = Dialect::get(DialectType::Snowflake);
37777
37778 // Step 1: Parse and check what Snowflake produces as intermediate
37779 let result_sf_sf = snowflake
37780 .transpile(
37781 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
37782 DialectType::Snowflake,
37783 )
37784 .unwrap();
37785 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
37786
37787 // Step 2: DuckDB target
37788 let result_sf_dk = snowflake
37789 .transpile(
37790 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
37791 DialectType::DuckDB,
37792 )
37793 .unwrap();
37794 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
37795
37796 // Step 3: GET_PATH directly
37797 let result_gp = snowflake
37798 .transpile(
37799 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
37800 DialectType::DuckDB,
37801 )
37802 .unwrap();
37803 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
37804 }
37805
37806 #[test]
37807 fn test_function_transformation_postgres() {
37808 // IFNULL should be transformed to COALESCE in PostgreSQL
37809 let dialect = Dialect::get(DialectType::Generic);
37810 let result = dialect
37811 .transpile("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
37812 .unwrap();
37813 assert_eq!(result[0], "SELECT COALESCE(a, b)");
37814
37815 // NVL should also be transformed to COALESCE
37816 let result = dialect
37817 .transpile("SELECT NVL(a, b)", DialectType::PostgreSQL)
37818 .unwrap();
37819 assert_eq!(result[0], "SELECT COALESCE(a, b)");
37820 }
37821
37822 #[test]
37823 fn test_hive_cast_to_trycast() {
37824 // Hive CAST should become TRY_CAST for targets that support it
37825 let hive = Dialect::get(DialectType::Hive);
37826 let result = hive
37827 .transpile("CAST(1 AS INT)", DialectType::DuckDB)
37828 .unwrap();
37829 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
37830
37831 let result = hive
37832 .transpile("CAST(1 AS INT)", DialectType::Presto)
37833 .unwrap();
37834 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
37835 }
37836
37837 #[test]
37838 fn test_hive_array_identity() {
37839 // Hive ARRAY<DATE> should preserve angle bracket syntax
37840 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
37841 let hive = Dialect::get(DialectType::Hive);
37842
37843 // Test via transpile (this works)
37844 let result = hive.transpile(sql, DialectType::Hive).unwrap();
37845 eprintln!("Hive ARRAY via transpile: {}", result[0]);
37846 assert!(
37847 result[0].contains("ARRAY<DATE>"),
37848 "transpile: Expected ARRAY<DATE>, got: {}",
37849 result[0]
37850 );
37851
37852 // Test via parse -> transform -> generate (identity test path)
37853 let ast = hive.parse(sql).unwrap();
37854 let transformed = hive.transform(ast[0].clone()).unwrap();
37855 let output = hive.generate(&transformed).unwrap();
37856 eprintln!("Hive ARRAY via identity path: {}", output);
37857 assert!(
37858 output.contains("ARRAY<DATE>"),
37859 "identity path: Expected ARRAY<DATE>, got: {}",
37860 output
37861 );
37862 }
37863
37864 #[test]
37865 fn test_starrocks_delete_between_expansion() {
37866 // StarRocks doesn't support BETWEEN in DELETE statements
37867 let dialect = Dialect::get(DialectType::Generic);
37868
37869 // BETWEEN should be expanded to >= AND <= in DELETE
37870 let result = dialect
37871 .transpile(
37872 "DELETE FROM t WHERE a BETWEEN b AND c",
37873 DialectType::StarRocks,
37874 )
37875 .unwrap();
37876 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
37877
37878 // NOT BETWEEN should be expanded to < OR > in DELETE
37879 let result = dialect
37880 .transpile(
37881 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
37882 DialectType::StarRocks,
37883 )
37884 .unwrap();
37885 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
37886
37887 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
37888 let result = dialect
37889 .transpile(
37890 "SELECT * FROM t WHERE a BETWEEN b AND c",
37891 DialectType::StarRocks,
37892 )
37893 .unwrap();
37894 assert!(
37895 result[0].contains("BETWEEN"),
37896 "BETWEEN should be preserved in SELECT"
37897 );
37898 }
37899
37900 #[test]
37901 fn test_snowflake_ltrim_rtrim_parse() {
37902 let sf = Dialect::get(DialectType::Snowflake);
37903 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
37904 let result = sf.transpile(sql, DialectType::DuckDB);
37905 match &result {
37906 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
37907 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
37908 }
37909 assert!(
37910 result.is_ok(),
37911 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
37912 result.err()
37913 );
37914 }
37915
37916 #[test]
37917 fn test_duckdb_count_if_parse() {
37918 let duck = Dialect::get(DialectType::DuckDB);
37919 let sql = "COUNT_IF(x)";
37920 let result = duck.transpile(sql, DialectType::DuckDB);
37921 match &result {
37922 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
37923 Err(e) => eprintln!("COUNT_IF error: {}", e),
37924 }
37925 assert!(
37926 result.is_ok(),
37927 "Expected successful parse of COUNT_IF(x), got error: {:?}",
37928 result.err()
37929 );
37930 }
37931
37932 #[test]
37933 fn test_tsql_cast_tinyint_parse() {
37934 let tsql = Dialect::get(DialectType::TSQL);
37935 let sql = "CAST(X AS TINYINT)";
37936 let result = tsql.transpile(sql, DialectType::DuckDB);
37937 match &result {
37938 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
37939 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
37940 }
37941 assert!(
37942 result.is_ok(),
37943 "Expected successful transpile, got error: {:?}",
37944 result.err()
37945 );
37946 }
37947
37948 #[test]
37949 fn test_pg_hash_bitwise_xor() {
37950 let dialect = Dialect::get(DialectType::PostgreSQL);
37951 let result = dialect.transpile("x # y", DialectType::PostgreSQL).unwrap();
37952 assert_eq!(result[0], "x # y");
37953 }
37954
37955 #[test]
37956 fn test_pg_array_to_duckdb() {
37957 let dialect = Dialect::get(DialectType::PostgreSQL);
37958 let result = dialect
37959 .transpile("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
37960 .unwrap();
37961 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
37962 }
37963
37964 #[test]
37965 fn test_array_remove_bigquery() {
37966 let dialect = Dialect::get(DialectType::Generic);
37967 let result = dialect
37968 .transpile("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
37969 .unwrap();
37970 assert_eq!(
37971 result[0],
37972 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
37973 );
37974 }
37975
37976 #[test]
37977 fn test_map_clickhouse_case() {
37978 let dialect = Dialect::get(DialectType::Generic);
37979 let parsed = dialect
37980 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
37981 .unwrap();
37982 eprintln!("MAP parsed: {:?}", parsed);
37983 let result = dialect
37984 .transpile(
37985 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
37986 DialectType::ClickHouse,
37987 )
37988 .unwrap();
37989 eprintln!("MAP result: {}", result[0]);
37990 }
37991
37992 #[test]
37993 fn test_generate_date_array_presto() {
37994 let dialect = Dialect::get(DialectType::Generic);
37995 let result = dialect.transpile(
37996 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37997 DialectType::Presto,
37998 ).unwrap();
37999 eprintln!("GDA -> Presto: {}", result[0]);
38000 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
38001 }
38002
38003 #[test]
38004 fn test_generate_date_array_postgres() {
38005 let dialect = Dialect::get(DialectType::Generic);
38006 let result = dialect.transpile(
38007 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
38008 DialectType::PostgreSQL,
38009 ).unwrap();
38010 eprintln!("GDA -> PostgreSQL: {}", result[0]);
38011 }
38012
38013 #[test]
38014 fn test_generate_date_array_snowflake() {
38015 let dialect = Dialect::get(DialectType::Generic);
38016 let result = dialect
38017 .transpile(
38018 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
38019 DialectType::Snowflake,
38020 )
38021 .unwrap();
38022 eprintln!("GDA -> Snowflake: {}", result[0]);
38023 }
38024
38025 #[test]
38026 fn test_array_length_generate_date_array_snowflake() {
38027 let dialect = Dialect::get(DialectType::Generic);
38028 let result = dialect.transpile(
38029 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
38030 DialectType::Snowflake,
38031 ).unwrap();
38032 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
38033 }
38034
38035 #[test]
38036 fn test_generate_date_array_mysql() {
38037 let dialect = Dialect::get(DialectType::Generic);
38038 let result = dialect.transpile(
38039 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
38040 DialectType::MySQL,
38041 ).unwrap();
38042 eprintln!("GDA -> MySQL: {}", result[0]);
38043 }
38044
38045 #[test]
38046 fn test_generate_date_array_redshift() {
38047 let dialect = Dialect::get(DialectType::Generic);
38048 let result = dialect.transpile(
38049 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
38050 DialectType::Redshift,
38051 ).unwrap();
38052 eprintln!("GDA -> Redshift: {}", result[0]);
38053 }
38054
38055 #[test]
38056 fn test_generate_date_array_tsql() {
38057 let dialect = Dialect::get(DialectType::Generic);
38058 let result = dialect.transpile(
38059 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
38060 DialectType::TSQL,
38061 ).unwrap();
38062 eprintln!("GDA -> TSQL: {}", result[0]);
38063 }
38064
38065 #[test]
38066 fn test_struct_colon_syntax() {
38067 let dialect = Dialect::get(DialectType::Generic);
38068 // Test without colon first
38069 let result = dialect.transpile(
38070 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
38071 DialectType::ClickHouse,
38072 );
38073 match result {
38074 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
38075 Err(e) => eprintln!("STRUCT no colon error: {}", e),
38076 }
38077 // Now test with colon
38078 let result = dialect.transpile(
38079 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
38080 DialectType::ClickHouse,
38081 );
38082 match result {
38083 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
38084 Err(e) => eprintln!("STRUCT colon error: {}", e),
38085 }
38086 }
38087
38088 #[test]
38089 fn test_generate_date_array_cte_wrapped_mysql() {
38090 let dialect = Dialect::get(DialectType::Generic);
38091 let result = dialect.transpile(
38092 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
38093 DialectType::MySQL,
38094 ).unwrap();
38095 eprintln!("GDA CTE -> MySQL: {}", result[0]);
38096 }
38097
38098 #[test]
38099 fn test_generate_date_array_cte_wrapped_tsql() {
38100 let dialect = Dialect::get(DialectType::Generic);
38101 let result = dialect.transpile(
38102 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
38103 DialectType::TSQL,
38104 ).unwrap();
38105 eprintln!("GDA CTE -> TSQL: {}", result[0]);
38106 }
38107
38108 #[test]
38109 fn test_decode_literal_no_null_check() {
38110 // Oracle DECODE with all literals should produce simple equality, no IS NULL
38111 let dialect = Dialect::get(DialectType::Oracle);
38112 let result = dialect
38113 .transpile("SELECT decode(1,2,3,4)", DialectType::DuckDB)
38114 .unwrap();
38115 assert_eq!(
38116 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
38117 "Literal DECODE should not have IS NULL checks"
38118 );
38119 }
38120
38121 #[test]
38122 fn test_decode_column_vs_literal_no_null_check() {
38123 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
38124 let dialect = Dialect::get(DialectType::Oracle);
38125 let result = dialect
38126 .transpile("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
38127 .unwrap();
38128 assert_eq!(
38129 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
38130 "Column vs literal DECODE should not have IS NULL checks"
38131 );
38132 }
38133
38134 #[test]
38135 fn test_decode_column_vs_column_keeps_null_check() {
38136 // Oracle DECODE with column vs column should keep null-safe comparison
38137 let dialect = Dialect::get(DialectType::Oracle);
38138 let result = dialect
38139 .transpile("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
38140 .unwrap();
38141 assert!(
38142 result[0].contains("IS NULL"),
38143 "Column vs column DECODE should have IS NULL checks, got: {}",
38144 result[0]
38145 );
38146 }
38147
38148 #[test]
38149 fn test_decode_null_search() {
38150 // Oracle DECODE with NULL search should use IS NULL
38151 let dialect = Dialect::get(DialectType::Oracle);
38152 let result = dialect
38153 .transpile("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
38154 .unwrap();
38155 assert_eq!(
38156 result[0],
38157 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
38158 );
38159 }
38160
38161 // =========================================================================
38162 // REGEXP function transpilation tests
38163 // =========================================================================
38164
38165 #[test]
38166 fn test_regexp_substr_snowflake_to_duckdb_2arg() {
38167 let dialect = Dialect::get(DialectType::Snowflake);
38168 let result = dialect
38169 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern')", DialectType::DuckDB)
38170 .unwrap();
38171 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
38172 }
38173
38174 #[test]
38175 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos1() {
38176 let dialect = Dialect::get(DialectType::Snowflake);
38177 let result = dialect
38178 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 1)", DialectType::DuckDB)
38179 .unwrap();
38180 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
38181 }
38182
38183 #[test]
38184 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos_gt1() {
38185 let dialect = Dialect::get(DialectType::Snowflake);
38186 let result = dialect
38187 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 3)", DialectType::DuckDB)
38188 .unwrap();
38189 assert_eq!(
38190 result[0],
38191 "SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(s, 3), ''), 'pattern')"
38192 );
38193 }
38194
38195 #[test]
38196 fn test_regexp_substr_snowflake_to_duckdb_4arg_occ_gt1() {
38197 let dialect = Dialect::get(DialectType::Snowflake);
38198 let result = dialect
38199 .transpile(
38200 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 3)",
38201 DialectType::DuckDB,
38202 )
38203 .unwrap();
38204 assert_eq!(
38205 result[0],
38206 "SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, 'pattern'), 3)"
38207 );
38208 }
38209
38210 #[test]
38211 fn test_regexp_substr_snowflake_to_duckdb_5arg_e_flag() {
38212 let dialect = Dialect::get(DialectType::Snowflake);
38213 let result = dialect
38214 .transpile(
38215 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')",
38216 DialectType::DuckDB,
38217 )
38218 .unwrap();
38219 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
38220 }
38221
38222 #[test]
38223 fn test_regexp_substr_snowflake_to_duckdb_6arg_group0() {
38224 let dialect = Dialect::get(DialectType::Snowflake);
38225 let result = dialect
38226 .transpile(
38227 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
38228 DialectType::DuckDB,
38229 )
38230 .unwrap();
38231 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
38232 }
38233
38234 #[test]
38235 fn test_regexp_substr_snowflake_identity_strip_group0() {
38236 let dialect = Dialect::get(DialectType::Snowflake);
38237 let result = dialect
38238 .transpile(
38239 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
38240 DialectType::Snowflake,
38241 )
38242 .unwrap();
38243 assert_eq!(result[0], "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')");
38244 }
38245
38246 #[test]
38247 fn test_regexp_substr_all_snowflake_to_duckdb_2arg() {
38248 let dialect = Dialect::get(DialectType::Snowflake);
38249 let result = dialect
38250 .transpile(
38251 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')",
38252 DialectType::DuckDB,
38253 )
38254 .unwrap();
38255 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
38256 }
38257
38258 #[test]
38259 fn test_regexp_substr_all_snowflake_to_duckdb_3arg_pos_gt1() {
38260 let dialect = Dialect::get(DialectType::Snowflake);
38261 let result = dialect
38262 .transpile(
38263 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 3)",
38264 DialectType::DuckDB,
38265 )
38266 .unwrap();
38267 assert_eq!(
38268 result[0],
38269 "SELECT REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')"
38270 );
38271 }
38272
38273 #[test]
38274 fn test_regexp_substr_all_snowflake_to_duckdb_5arg_e_flag() {
38275 let dialect = Dialect::get(DialectType::Snowflake);
38276 let result = dialect
38277 .transpile(
38278 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')",
38279 DialectType::DuckDB,
38280 )
38281 .unwrap();
38282 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
38283 }
38284
38285 #[test]
38286 fn test_regexp_substr_all_snowflake_to_duckdb_6arg_group0() {
38287 let dialect = Dialect::get(DialectType::Snowflake);
38288 let result = dialect
38289 .transpile(
38290 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
38291 DialectType::DuckDB,
38292 )
38293 .unwrap();
38294 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
38295 }
38296
38297 #[test]
38298 fn test_regexp_substr_all_snowflake_identity_strip_group0() {
38299 let dialect = Dialect::get(DialectType::Snowflake);
38300 let result = dialect
38301 .transpile(
38302 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
38303 DialectType::Snowflake,
38304 )
38305 .unwrap();
38306 assert_eq!(
38307 result[0],
38308 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')"
38309 );
38310 }
38311
38312 #[test]
38313 fn test_regexp_count_snowflake_to_duckdb_2arg() {
38314 let dialect = Dialect::get(DialectType::Snowflake);
38315 let result = dialect
38316 .transpile("SELECT REGEXP_COUNT(s, 'pattern')", DialectType::DuckDB)
38317 .unwrap();
38318 assert_eq!(
38319 result[0],
38320 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, 'pattern')) END"
38321 );
38322 }
38323
38324 #[test]
38325 fn test_regexp_count_snowflake_to_duckdb_3arg() {
38326 let dialect = Dialect::get(DialectType::Snowflake);
38327 let result = dialect
38328 .transpile("SELECT REGEXP_COUNT(s, 'pattern', 3)", DialectType::DuckDB)
38329 .unwrap();
38330 assert_eq!(
38331 result[0],
38332 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')) END"
38333 );
38334 }
38335
38336 #[test]
38337 fn test_regexp_count_snowflake_to_duckdb_4arg_flags() {
38338 let dialect = Dialect::get(DialectType::Snowflake);
38339 let result = dialect
38340 .transpile(
38341 "SELECT REGEXP_COUNT(s, 'pattern', 1, 'i')",
38342 DialectType::DuckDB,
38343 )
38344 .unwrap();
38345 assert_eq!(
38346 result[0],
38347 "SELECT CASE WHEN '(?i)' || 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 1), '(?i)' || 'pattern')) END"
38348 );
38349 }
38350
38351 #[test]
38352 fn test_regexp_count_snowflake_to_duckdb_4arg_flags_literal_string() {
38353 let dialect = Dialect::get(DialectType::Snowflake);
38354 let result = dialect
38355 .transpile(
38356 "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')",
38357 DialectType::DuckDB,
38358 )
38359 .unwrap();
38360 assert_eq!(
38361 result[0],
38362 "SELECT CASE WHEN '(?im)' || 'L' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('Hello World', 1), '(?im)' || 'L')) END"
38363 );
38364 }
38365
38366 #[test]
38367 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos1_occ1() {
38368 let dialect = Dialect::get(DialectType::Snowflake);
38369 let result = dialect
38370 .transpile(
38371 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 1, 1)",
38372 DialectType::DuckDB,
38373 )
38374 .unwrap();
38375 assert_eq!(result[0], "SELECT REGEXP_REPLACE(s, 'pattern', 'repl')");
38376 }
38377
38378 #[test]
38379 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ0() {
38380 let dialect = Dialect::get(DialectType::Snowflake);
38381 let result = dialect
38382 .transpile(
38383 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 0)",
38384 DialectType::DuckDB,
38385 )
38386 .unwrap();
38387 assert_eq!(
38388 result[0],
38389 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl', 'g')"
38390 );
38391 }
38392
38393 #[test]
38394 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ1() {
38395 let dialect = Dialect::get(DialectType::Snowflake);
38396 let result = dialect
38397 .transpile(
38398 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 1)",
38399 DialectType::DuckDB,
38400 )
38401 .unwrap();
38402 assert_eq!(
38403 result[0],
38404 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl')"
38405 );
38406 }
38407
38408 #[test]
38409 fn test_rlike_snowflake_to_duckdb_2arg() {
38410 let dialect = Dialect::get(DialectType::Snowflake);
38411 let result = dialect
38412 .transpile("SELECT RLIKE(a, b)", DialectType::DuckDB)
38413 .unwrap();
38414 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b)");
38415 }
38416
38417 #[test]
38418 fn test_rlike_snowflake_to_duckdb_3arg_flags() {
38419 let dialect = Dialect::get(DialectType::Snowflake);
38420 let result = dialect
38421 .transpile("SELECT RLIKE(a, b, 'i')", DialectType::DuckDB)
38422 .unwrap();
38423 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b, 'i')");
38424 }
38425
38426 #[test]
38427 fn test_regexp_extract_all_bigquery_to_snowflake_no_capture() {
38428 let dialect = Dialect::get(DialectType::BigQuery);
38429 let result = dialect
38430 .transpile(
38431 "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')",
38432 DialectType::Snowflake,
38433 )
38434 .unwrap();
38435 assert_eq!(result[0], "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')");
38436 }
38437
38438 #[test]
38439 fn test_regexp_extract_all_bigquery_to_snowflake_with_capture() {
38440 let dialect = Dialect::get(DialectType::BigQuery);
38441 let result = dialect
38442 .transpile(
38443 "SELECT REGEXP_EXTRACT_ALL(s, '(a)[0-9]')",
38444 DialectType::Snowflake,
38445 )
38446 .unwrap();
38447 assert_eq!(
38448 result[0],
38449 "SELECT REGEXP_SUBSTR_ALL(s, '(a)[0-9]', 1, 1, 'c', 1)"
38450 );
38451 }
38452
38453 #[test]
38454 fn test_regexp_instr_snowflake_to_duckdb_2arg() {
38455 let dialect = Dialect::get(DialectType::Snowflake);
38456 let result = dialect
38457 .transpile("SELECT REGEXP_INSTR(s, 'pattern')", DialectType::DuckDB)
38458 .unwrap();
38459 assert!(
38460 result[0].contains("CASE WHEN"),
38461 "Expected CASE WHEN in result: {}",
38462 result[0]
38463 );
38464 assert!(
38465 result[0].contains("LIST_SUM"),
38466 "Expected LIST_SUM in result: {}",
38467 result[0]
38468 );
38469 }
38470
38471 #[test]
38472 fn test_array_except_generic_to_duckdb() {
38473 let dialect = Dialect::get(DialectType::Generic);
38474 let result = dialect
38475 .transpile(
38476 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
38477 DialectType::DuckDB,
38478 )
38479 .unwrap();
38480 eprintln!("ARRAY_EXCEPT Generic->DuckDB: {}", result[0]);
38481 assert!(
38482 result[0].contains("CASE WHEN"),
38483 "Expected CASE WHEN: {}",
38484 result[0]
38485 );
38486 assert!(
38487 result[0].contains("LIST_FILTER"),
38488 "Expected LIST_FILTER: {}",
38489 result[0]
38490 );
38491 assert!(
38492 result[0].contains("LIST_DISTINCT"),
38493 "Expected LIST_DISTINCT: {}",
38494 result[0]
38495 );
38496 assert!(
38497 result[0].contains("IS NOT DISTINCT FROM"),
38498 "Expected IS NOT DISTINCT FROM: {}",
38499 result[0]
38500 );
38501 assert!(
38502 result[0].contains("= 0"),
38503 "Expected = 0 filter: {}",
38504 result[0]
38505 );
38506 }
38507
38508 #[test]
38509 fn test_array_except_generic_to_snowflake() {
38510 let dialect = Dialect::get(DialectType::Generic);
38511 let result = dialect
38512 .transpile(
38513 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
38514 DialectType::Snowflake,
38515 )
38516 .unwrap();
38517 eprintln!("ARRAY_EXCEPT Generic->Snowflake: {}", result[0]);
38518 assert_eq!(result[0], "SELECT ARRAY_EXCEPT([1, 2, 3], [2])");
38519 }
38520
38521 #[test]
38522 fn test_array_except_generic_to_presto() {
38523 let dialect = Dialect::get(DialectType::Generic);
38524 let result = dialect
38525 .transpile(
38526 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
38527 DialectType::Presto,
38528 )
38529 .unwrap();
38530 eprintln!("ARRAY_EXCEPT Generic->Presto: {}", result[0]);
38531 assert_eq!(result[0], "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])");
38532 }
38533
38534 #[test]
38535 fn test_array_except_snowflake_to_duckdb() {
38536 let dialect = Dialect::get(DialectType::Snowflake);
38537 let result = dialect
38538 .transpile("SELECT ARRAY_EXCEPT([1, 2, 3], [2])", DialectType::DuckDB)
38539 .unwrap();
38540 eprintln!("ARRAY_EXCEPT Snowflake->DuckDB: {}", result[0]);
38541 assert!(
38542 result[0].contains("CASE WHEN"),
38543 "Expected CASE WHEN: {}",
38544 result[0]
38545 );
38546 assert!(
38547 result[0].contains("LIST_TRANSFORM"),
38548 "Expected LIST_TRANSFORM: {}",
38549 result[0]
38550 );
38551 }
38552
38553 #[test]
38554 fn test_array_contains_snowflake_to_snowflake() {
38555 let dialect = Dialect::get(DialectType::Snowflake);
38556 let result = dialect
38557 .transpile(
38558 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
38559 DialectType::Snowflake,
38560 )
38561 .unwrap();
38562 eprintln!("ARRAY_CONTAINS Snowflake->Snowflake: {}", result[0]);
38563 assert_eq!(result[0], "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])");
38564 }
38565
38566 #[test]
38567 fn test_array_contains_snowflake_to_duckdb() {
38568 let dialect = Dialect::get(DialectType::Snowflake);
38569 let result = dialect
38570 .transpile(
38571 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
38572 DialectType::DuckDB,
38573 )
38574 .unwrap();
38575 eprintln!("ARRAY_CONTAINS Snowflake->DuckDB: {}", result[0]);
38576 assert!(
38577 result[0].contains("CASE WHEN"),
38578 "Expected CASE WHEN: {}",
38579 result[0]
38580 );
38581 assert!(
38582 result[0].contains("NULLIF"),
38583 "Expected NULLIF: {}",
38584 result[0]
38585 );
38586 assert!(
38587 result[0].contains("ARRAY_CONTAINS"),
38588 "Expected ARRAY_CONTAINS: {}",
38589 result[0]
38590 );
38591 }
38592
38593 #[test]
38594 fn test_array_distinct_snowflake_to_duckdb() {
38595 let dialect = Dialect::get(DialectType::Snowflake);
38596 let result = dialect
38597 .transpile(
38598 "SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])",
38599 DialectType::DuckDB,
38600 )
38601 .unwrap();
38602 eprintln!("ARRAY_DISTINCT Snowflake->DuckDB: {}", result[0]);
38603 assert!(
38604 result[0].contains("CASE WHEN"),
38605 "Expected CASE WHEN: {}",
38606 result[0]
38607 );
38608 assert!(
38609 result[0].contains("LIST_DISTINCT"),
38610 "Expected LIST_DISTINCT: {}",
38611 result[0]
38612 );
38613 assert!(
38614 result[0].contains("LIST_APPEND"),
38615 "Expected LIST_APPEND: {}",
38616 result[0]
38617 );
38618 assert!(
38619 result[0].contains("LIST_FILTER"),
38620 "Expected LIST_FILTER: {}",
38621 result[0]
38622 );
38623 }
38624}